1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
50 /* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52 static_assert(APFloatBase::integerPartWidth
% 4 == 0, "Part width must be divisible by 4!");
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior
{
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
77 // How NaN values are represented. This is curently only used in combination
78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79 // while having IEEE non-finite behavior is liable to lead to unexpected
81 enum class fltNanEncoding
{
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
102 /* Represents floating point arithmetic semantics. */
103 struct fltSemantics
{
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
106 APFloatBase::ExponentType maxExponent
;
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
110 APFloatBase::ExponentType minExponent
;
112 /* Number of bits in the significand. This includes the integer
114 unsigned int precision
;
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits
;
119 fltNonfiniteBehavior nonFiniteBehavior
= fltNonfiniteBehavior::IEEE754
;
121 fltNanEncoding nanEncoding
= fltNanEncoding::IEEE
;
123 /* Whether this semantics has an encoding for Zero */
126 /* Whether this semantics can represent signed values */
127 bool hasSignedRepr
= true;
129 // Returns true if any number described by this semantics can be precisely
130 // represented by the specified semantics. Does not take into account
131 // the value of fltNonfiniteBehavior.
132 bool isRepresentableBy(const fltSemantics
&S
) const {
133 return maxExponent
<= S
.maxExponent
&& minExponent
>= S
.minExponent
&&
134 precision
<= S
.precision
;
138 static constexpr fltSemantics semIEEEhalf
= {15, -14, 11, 16};
139 static constexpr fltSemantics semBFloat
= {127, -126, 8, 16};
140 static constexpr fltSemantics semIEEEsingle
= {127, -126, 24, 32};
141 static constexpr fltSemantics semIEEEdouble
= {1023, -1022, 53, 64};
142 static constexpr fltSemantics semIEEEquad
= {16383, -16382, 113, 128};
143 static constexpr fltSemantics semFloat8E5M2
= {15, -14, 3, 8};
144 static constexpr fltSemantics semFloat8E5M2FNUZ
= {
145 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::NegativeZero
};
146 static constexpr fltSemantics semFloat8E4M3
= {7, -6, 4, 8};
147 static constexpr fltSemantics semFloat8E4M3FN
= {
148 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::AllOnes
};
149 static constexpr fltSemantics semFloat8E4M3FNUZ
= {
150 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::NegativeZero
};
151 static constexpr fltSemantics semFloat8E4M3B11FNUZ
= {
152 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::NegativeZero
};
153 static constexpr fltSemantics semFloat8E3M4
= {3, -2, 5, 8};
154 static constexpr fltSemantics semFloatTF32
= {127, -126, 11, 19};
155 static constexpr fltSemantics semFloat8E8M0FNU
= {
156 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::AllOnes
,
159 static constexpr fltSemantics semFloat6E3M2FN
= {
160 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly
};
161 static constexpr fltSemantics semFloat6E2M3FN
= {
162 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly
};
163 static constexpr fltSemantics semFloat4E2M1FN
= {
164 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly
};
165 static constexpr fltSemantics semX87DoubleExtended
= {16383, -16382, 64, 80};
166 static constexpr fltSemantics semBogus
= {0, 0, 0, 0};
167 static constexpr fltSemantics semPPCDoubleDouble
= {-1, 0, 0, 128};
168 static constexpr fltSemantics semPPCDoubleDoubleLegacy
= {1023, -1022 + 53,
171 const llvm::fltSemantics
&APFloatBase::EnumToSemantics(Semantics S
) {
183 case S_PPCDoubleDouble
:
184 return PPCDoubleDouble();
185 case S_PPCDoubleDoubleLegacy
:
186 return PPCDoubleDoubleLegacy();
189 case S_Float8E5M2FNUZ
:
190 return Float8E5M2FNUZ();
194 return Float8E4M3FN();
195 case S_Float8E4M3FNUZ
:
196 return Float8E4M3FNUZ();
197 case S_Float8E4M3B11FNUZ
:
198 return Float8E4M3B11FNUZ();
203 case S_Float8E8M0FNU
:
204 return Float8E8M0FNU();
206 return Float6E3M2FN();
208 return Float6E2M3FN();
210 return Float4E2M1FN();
211 case S_x87DoubleExtended
:
212 return x87DoubleExtended();
214 llvm_unreachable("Unrecognised floating semantics");
217 APFloatBase::Semantics
218 APFloatBase::SemanticsToEnum(const llvm::fltSemantics
&Sem
) {
219 if (&Sem
== &llvm::APFloat::IEEEhalf())
221 else if (&Sem
== &llvm::APFloat::BFloat())
223 else if (&Sem
== &llvm::APFloat::IEEEsingle())
225 else if (&Sem
== &llvm::APFloat::IEEEdouble())
227 else if (&Sem
== &llvm::APFloat::IEEEquad())
229 else if (&Sem
== &llvm::APFloat::PPCDoubleDouble())
230 return S_PPCDoubleDouble
;
231 else if (&Sem
== &llvm::APFloat::PPCDoubleDoubleLegacy())
232 return S_PPCDoubleDoubleLegacy
;
233 else if (&Sem
== &llvm::APFloat::Float8E5M2())
235 else if (&Sem
== &llvm::APFloat::Float8E5M2FNUZ())
236 return S_Float8E5M2FNUZ
;
237 else if (&Sem
== &llvm::APFloat::Float8E4M3())
239 else if (&Sem
== &llvm::APFloat::Float8E4M3FN())
240 return S_Float8E4M3FN
;
241 else if (&Sem
== &llvm::APFloat::Float8E4M3FNUZ())
242 return S_Float8E4M3FNUZ
;
243 else if (&Sem
== &llvm::APFloat::Float8E4M3B11FNUZ())
244 return S_Float8E4M3B11FNUZ
;
245 else if (&Sem
== &llvm::APFloat::Float8E3M4())
247 else if (&Sem
== &llvm::APFloat::FloatTF32())
249 else if (&Sem
== &llvm::APFloat::Float8E8M0FNU())
250 return S_Float8E8M0FNU
;
251 else if (&Sem
== &llvm::APFloat::Float6E3M2FN())
252 return S_Float6E3M2FN
;
253 else if (&Sem
== &llvm::APFloat::Float6E2M3FN())
254 return S_Float6E2M3FN
;
255 else if (&Sem
== &llvm::APFloat::Float4E2M1FN())
256 return S_Float4E2M1FN
;
257 else if (&Sem
== &llvm::APFloat::x87DoubleExtended())
258 return S_x87DoubleExtended
;
260 llvm_unreachable("Unknown floating semantics");
263 const fltSemantics
&APFloatBase::IEEEhalf() { return semIEEEhalf
; }
264 const fltSemantics
&APFloatBase::BFloat() { return semBFloat
; }
265 const fltSemantics
&APFloatBase::IEEEsingle() { return semIEEEsingle
; }
266 const fltSemantics
&APFloatBase::IEEEdouble() { return semIEEEdouble
; }
267 const fltSemantics
&APFloatBase::IEEEquad() { return semIEEEquad
; }
268 const fltSemantics
&APFloatBase::PPCDoubleDouble() {
269 return semPPCDoubleDouble
;
271 const fltSemantics
&APFloatBase::PPCDoubleDoubleLegacy() {
272 return semPPCDoubleDoubleLegacy
;
274 const fltSemantics
&APFloatBase::Float8E5M2() { return semFloat8E5M2
; }
275 const fltSemantics
&APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ
; }
276 const fltSemantics
&APFloatBase::Float8E4M3() { return semFloat8E4M3
; }
277 const fltSemantics
&APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN
; }
278 const fltSemantics
&APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ
; }
279 const fltSemantics
&APFloatBase::Float8E4M3B11FNUZ() {
280 return semFloat8E4M3B11FNUZ
;
282 const fltSemantics
&APFloatBase::Float8E3M4() { return semFloat8E3M4
; }
283 const fltSemantics
&APFloatBase::FloatTF32() { return semFloatTF32
; }
284 const fltSemantics
&APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU
; }
285 const fltSemantics
&APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN
; }
286 const fltSemantics
&APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN
; }
287 const fltSemantics
&APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN
; }
288 const fltSemantics
&APFloatBase::x87DoubleExtended() {
289 return semX87DoubleExtended
;
291 const fltSemantics
&APFloatBase::Bogus() { return semBogus
; }
293 constexpr RoundingMode
APFloatBase::rmNearestTiesToEven
;
294 constexpr RoundingMode
APFloatBase::rmTowardPositive
;
295 constexpr RoundingMode
APFloatBase::rmTowardNegative
;
296 constexpr RoundingMode
APFloatBase::rmTowardZero
;
297 constexpr RoundingMode
APFloatBase::rmNearestTiesToAway
;
299 /* A tight upper bound on number of parts required to hold the value
302 power * 815 / (351 * integerPartWidth) + 1
304 However, whilst the result may require only this many parts,
305 because we are multiplying two values to get it, the
306 multiplication may require an extra part with the excess part
307 being zero (consider the trivial case of 1 * 1, tcFullMultiply
308 requires two parts to hold the single-part result). So we add an
309 extra one to guarantee enough space whilst multiplying. */
310 const unsigned int maxExponent
= 16383;
311 const unsigned int maxPrecision
= 113;
312 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
313 const unsigned int maxPowerOfFiveParts
=
315 ((maxPowerOfFiveExponent
* 815) / (351 * APFloatBase::integerPartWidth
));
317 unsigned int APFloatBase::semanticsPrecision(const fltSemantics
&semantics
) {
318 return semantics
.precision
;
320 APFloatBase::ExponentType
321 APFloatBase::semanticsMaxExponent(const fltSemantics
&semantics
) {
322 return semantics
.maxExponent
;
324 APFloatBase::ExponentType
325 APFloatBase::semanticsMinExponent(const fltSemantics
&semantics
) {
326 return semantics
.minExponent
;
328 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics
&semantics
) {
329 return semantics
.sizeInBits
;
331 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics
&semantics
,
333 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
334 // at least one more bit than the MaxExponent to hold the max FP value.
335 unsigned int MinBitWidth
= semanticsMaxExponent(semantics
) + 1;
336 // Extra sign bit needed.
342 bool APFloatBase::semanticsHasZero(const fltSemantics
&semantics
) {
343 return semantics
.hasZero
;
346 bool APFloatBase::semanticsHasSignedRepr(const fltSemantics
&semantics
) {
347 return semantics
.hasSignedRepr
;
350 bool APFloatBase::semanticsHasInf(const fltSemantics
&semantics
) {
351 return semantics
.nonFiniteBehavior
== fltNonfiniteBehavior::IEEE754
;
354 bool APFloatBase::semanticsHasNaN(const fltSemantics
&semantics
) {
355 return semantics
.nonFiniteBehavior
!= fltNonfiniteBehavior::FiniteOnly
;
358 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics
&Src
,
359 const fltSemantics
&Dst
) {
360 // Exponent range must be larger.
361 if (Src
.maxExponent
>= Dst
.maxExponent
|| Src
.minExponent
<= Dst
.minExponent
)
364 // If the mantissa is long enough, the result value could still be denormal
365 // with a larger exponent range.
367 // FIXME: This condition is probably not accurate but also shouldn't be a
368 // practical concern with existing types.
369 return Dst
.precision
>= Src
.precision
;
372 unsigned APFloatBase::getSizeInBits(const fltSemantics
&Sem
) {
373 return Sem
.sizeInBits
;
376 static constexpr APFloatBase::ExponentType
377 exponentZero(const fltSemantics
&semantics
) {
378 return semantics
.minExponent
- 1;
381 static constexpr APFloatBase::ExponentType
382 exponentInf(const fltSemantics
&semantics
) {
383 return semantics
.maxExponent
+ 1;
386 static constexpr APFloatBase::ExponentType
387 exponentNaN(const fltSemantics
&semantics
) {
388 if (semantics
.nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
389 if (semantics
.nanEncoding
== fltNanEncoding::NegativeZero
)
390 return exponentZero(semantics
);
391 if (semantics
.hasSignedRepr
)
392 return semantics
.maxExponent
;
394 return semantics
.maxExponent
+ 1;
397 /* A bunch of private, handy routines. */
399 static inline Error
createError(const Twine
&Err
) {
400 return make_error
<StringError
>(Err
, inconvertibleErrorCode());
403 static constexpr inline unsigned int partCountForBits(unsigned int bits
) {
404 return std::max(1u, (bits
+ APFloatBase::integerPartWidth
- 1) /
405 APFloatBase::integerPartWidth
);
408 /* Returns 0U-9U. Return values >= 10U are not digits. */
409 static inline unsigned int
410 decDigitValue(unsigned int c
)
415 /* Return the value of a decimal exponent of the form
418 If the exponent overflows, returns a large exponent with the
420 static Expected
<int> readExponent(StringRef::iterator begin
,
421 StringRef::iterator end
) {
423 unsigned int absExponent
;
424 const unsigned int overlargeExponent
= 24000; /* FIXME. */
425 StringRef::iterator p
= begin
;
427 // Treat no exponent as 0 to match binutils
428 if (p
== end
|| ((*p
== '-' || *p
== '+') && (p
+ 1) == end
)) {
432 isNegative
= (*p
== '-');
433 if (*p
== '-' || *p
== '+') {
436 return createError("Exponent has no digits");
439 absExponent
= decDigitValue(*p
++);
440 if (absExponent
>= 10U)
441 return createError("Invalid character in exponent");
443 for (; p
!= end
; ++p
) {
446 value
= decDigitValue(*p
);
448 return createError("Invalid character in exponent");
450 absExponent
= absExponent
* 10U + value
;
451 if (absExponent
>= overlargeExponent
) {
452 absExponent
= overlargeExponent
;
458 return -(int) absExponent
;
460 return (int) absExponent
;
463 /* This is ugly and needs cleaning up, but I don't immediately see
464 how whilst remaining safe. */
465 static Expected
<int> totalExponent(StringRef::iterator p
,
466 StringRef::iterator end
,
467 int exponentAdjustment
) {
468 int unsignedExponent
;
469 bool negative
, overflow
;
473 return createError("Exponent has no digits");
475 negative
= *p
== '-';
476 if (*p
== '-' || *p
== '+') {
479 return createError("Exponent has no digits");
482 unsignedExponent
= 0;
484 for (; p
!= end
; ++p
) {
487 value
= decDigitValue(*p
);
489 return createError("Invalid character in exponent");
491 unsignedExponent
= unsignedExponent
* 10 + value
;
492 if (unsignedExponent
> 32767) {
498 if (exponentAdjustment
> 32767 || exponentAdjustment
< -32768)
502 exponent
= unsignedExponent
;
504 exponent
= -exponent
;
505 exponent
+= exponentAdjustment
;
506 if (exponent
> 32767 || exponent
< -32768)
511 exponent
= negative
? -32768: 32767;
516 static Expected
<StringRef::iterator
>
517 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
518 StringRef::iterator
*dot
) {
519 StringRef::iterator p
= begin
;
521 while (p
!= end
&& *p
== '0')
524 if (p
!= end
&& *p
== '.') {
527 if (end
- begin
== 1)
528 return createError("Significand has no digits");
530 while (p
!= end
&& *p
== '0')
537 /* Given a normal decimal floating point number of the form
541 where the decimal point and exponent are optional, fill out the
542 structure D. Exponent is appropriate if the significand is
543 treated as an integer, and normalizedExponent if the significand
544 is taken to have the decimal point after a single leading
547 If the value is zero, V->firstSigDigit points to a non-digit, and
548 the return exponent is zero.
551 const char *firstSigDigit
;
552 const char *lastSigDigit
;
554 int normalizedExponent
;
557 static Error
interpretDecimal(StringRef::iterator begin
,
558 StringRef::iterator end
, decimalInfo
*D
) {
559 StringRef::iterator dot
= end
;
561 auto PtrOrErr
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
563 return PtrOrErr
.takeError();
564 StringRef::iterator p
= *PtrOrErr
;
566 D
->firstSigDigit
= p
;
568 D
->normalizedExponent
= 0;
570 for (; p
!= end
; ++p
) {
573 return createError("String contains multiple dots");
578 if (decDigitValue(*p
) >= 10U)
583 if (*p
!= 'e' && *p
!= 'E')
584 return createError("Invalid character in significand");
586 return createError("Significand has no digits");
587 if (dot
!= end
&& p
- begin
== 1)
588 return createError("Significand has no digits");
590 /* p points to the first non-digit in the string */
591 auto ExpOrErr
= readExponent(p
+ 1, end
);
593 return ExpOrErr
.takeError();
594 D
->exponent
= *ExpOrErr
;
596 /* Implied decimal point? */
601 /* If number is all zeroes accept any exponent. */
602 if (p
!= D
->firstSigDigit
) {
603 /* Drop insignificant trailing zeroes. */
608 while (p
!= begin
&& *p
== '0');
609 while (p
!= begin
&& *p
== '.');
612 /* Adjust the exponents for any decimal point. */
613 D
->exponent
+= static_cast<APFloat::ExponentType
>((dot
- p
) - (dot
> p
));
614 D
->normalizedExponent
= (D
->exponent
+
615 static_cast<APFloat::ExponentType
>((p
- D
->firstSigDigit
)
616 - (dot
> D
->firstSigDigit
&& dot
< p
)));
620 return Error::success();
623 /* Return the trailing fraction of a hexadecimal number.
624 DIGITVALUE is the first hex digit of the fraction, P points to
626 static Expected
<lostFraction
>
627 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
628 unsigned int digitValue
) {
629 unsigned int hexDigit
;
631 /* If the first trailing digit isn't 0 or 8 we can work out the
632 fraction immediately. */
634 return lfMoreThanHalf
;
635 else if (digitValue
< 8 && digitValue
> 0)
636 return lfLessThanHalf
;
638 // Otherwise we need to find the first non-zero digit.
639 while (p
!= end
&& (*p
== '0' || *p
== '.'))
643 return createError("Invalid trailing hexadecimal fraction!");
645 hexDigit
= hexDigitValue(*p
);
647 /* If we ran off the end it is exactly zero or one-half, otherwise
649 if (hexDigit
== UINT_MAX
)
650 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
652 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
655 /* Return the fraction lost were a bignum truncated losing the least
656 significant BITS bits. */
658 lostFractionThroughTruncation(const APFloatBase::integerPart
*parts
,
659 unsigned int partCount
,
664 lsb
= APInt::tcLSB(parts
, partCount
);
666 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
668 return lfExactlyZero
;
670 return lfExactlyHalf
;
671 if (bits
<= partCount
* APFloatBase::integerPartWidth
&&
672 APInt::tcExtractBit(parts
, bits
- 1))
673 return lfMoreThanHalf
;
675 return lfLessThanHalf
;
678 /* Shift DST right BITS bits noting lost fraction. */
680 shiftRight(APFloatBase::integerPart
*dst
, unsigned int parts
, unsigned int bits
)
682 lostFraction lost_fraction
;
684 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
686 APInt::tcShiftRight(dst
, parts
, bits
);
688 return lost_fraction
;
691 /* Combine the effect of two lost fractions. */
693 combineLostFractions(lostFraction moreSignificant
,
694 lostFraction lessSignificant
)
696 if (lessSignificant
!= lfExactlyZero
) {
697 if (moreSignificant
== lfExactlyZero
)
698 moreSignificant
= lfLessThanHalf
;
699 else if (moreSignificant
== lfExactlyHalf
)
700 moreSignificant
= lfMoreThanHalf
;
703 return moreSignificant
;
706 /* The error from the true value, in half-ulps, on multiplying two
707 floating point numbers, which differ from the value they
708 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
709 than the returned value.
711 See "How to Read Floating Point Numbers Accurately" by William D
714 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
716 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
718 if (HUerr1
+ HUerr2
== 0)
719 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
721 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
724 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
725 when the least significant BITS are truncated. BITS cannot be
727 static APFloatBase::integerPart
728 ulpsFromBoundary(const APFloatBase::integerPart
*parts
, unsigned int bits
,
730 unsigned int count
, partBits
;
731 APFloatBase::integerPart part
, boundary
;
736 count
= bits
/ APFloatBase::integerPartWidth
;
737 partBits
= bits
% APFloatBase::integerPartWidth
+ 1;
739 part
= parts
[count
] & (~(APFloatBase::integerPart
) 0 >> (APFloatBase::integerPartWidth
- partBits
));
742 boundary
= (APFloatBase::integerPart
) 1 << (partBits
- 1);
747 if (part
- boundary
<= boundary
- part
)
748 return part
- boundary
;
750 return boundary
- part
;
753 if (part
== boundary
) {
756 return ~(APFloatBase::integerPart
) 0; /* A lot. */
759 } else if (part
== boundary
- 1) {
762 return ~(APFloatBase::integerPart
) 0; /* A lot. */
767 return ~(APFloatBase::integerPart
) 0; /* A lot. */
770 /* Place pow(5, power) in DST, and return the number of parts used.
771 DST must be at least one part larger than size of the answer. */
773 powerOf5(APFloatBase::integerPart
*dst
, unsigned int power
) {
774 static const APFloatBase::integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
775 APFloatBase::integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
776 pow5s
[0] = 78125 * 5;
778 unsigned int partsCount
= 1;
779 APFloatBase::integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
781 assert(power
<= maxExponent
);
786 *p1
= firstEightPowers
[power
& 7];
792 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
793 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
795 APInt::tcFullMultiply(pow5
, pow5
- partsCount
, pow5
- partsCount
,
796 partsCount
, partsCount
);
798 if (pow5
[partsCount
- 1] == 0)
803 APFloatBase::integerPart
*tmp
;
805 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, partsCount
);
806 result
+= partsCount
;
807 if (p2
[result
- 1] == 0)
810 /* Now result is in p1 with partsCount parts and p2 is scratch
821 APInt::tcAssign(dst
, p1
, result
);
826 /* Zero at the end to avoid modular arithmetic when adding one; used
827 when rounding up during hexadecimal output. */
828 static const char hexDigitsLower
[] = "0123456789abcdef0";
829 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
830 static const char infinityL
[] = "infinity";
831 static const char infinityU
[] = "INFINITY";
832 static const char NaNL
[] = "nan";
833 static const char NaNU
[] = "NAN";
835 /* Write out an integerPart in hexadecimal, starting with the most
836 significant nibble. Write out exactly COUNT hexdigits, return
839 partAsHex (char *dst
, APFloatBase::integerPart part
, unsigned int count
,
840 const char *hexDigitChars
)
842 unsigned int result
= count
;
844 assert(count
!= 0 && count
<= APFloatBase::integerPartWidth
/ 4);
846 part
>>= (APFloatBase::integerPartWidth
- 4 * count
);
848 dst
[count
] = hexDigitChars
[part
& 0xf];
855 /* Write out an unsigned decimal integer. */
857 writeUnsignedDecimal (char *dst
, unsigned int n
)
873 /* Write out a signed decimal integer. */
875 writeSignedDecimal (char *dst
, int value
)
879 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
881 dst
= writeUnsignedDecimal(dst
, value
);
888 void IEEEFloat::initialize(const fltSemantics
*ourSemantics
) {
891 semantics
= ourSemantics
;
894 significand
.parts
= new integerPart
[count
];
897 void IEEEFloat::freeSignificand() {
899 delete [] significand
.parts
;
902 void IEEEFloat::assign(const IEEEFloat
&rhs
) {
903 assert(semantics
== rhs
.semantics
);
906 category
= rhs
.category
;
907 exponent
= rhs
.exponent
;
908 if (isFiniteNonZero() || category
== fcNaN
)
909 copySignificand(rhs
);
912 void IEEEFloat::copySignificand(const IEEEFloat
&rhs
) {
913 assert(isFiniteNonZero() || category
== fcNaN
);
914 assert(rhs
.partCount() >= partCount());
916 APInt::tcAssign(significandParts(), rhs
.significandParts(),
920 /* Make this number a NaN, with an arbitrary but deterministic value
921 for the significand. If double or longer, this is a signalling NaN,
922 which may not be ideal. If float, this is QNaN(0). */
923 void IEEEFloat::makeNaN(bool SNaN
, bool Negative
, const APInt
*fill
) {
924 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::FiniteOnly
)
925 llvm_unreachable("This floating point format does not support NaN");
927 if (Negative
&& !semantics
->hasSignedRepr
)
929 "This floating point format does not support signed values");
933 exponent
= exponentNaN();
935 integerPart
*significand
= significandParts();
936 unsigned numParts
= partCount();
939 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
940 // Finite-only types do not distinguish signalling and quiet NaN, so
941 // make them all signalling.
943 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
945 fill_storage
= APInt::getZero(semantics
->precision
- 1);
947 fill_storage
= APInt::getAllOnes(semantics
->precision
- 1);
949 fill
= &fill_storage
;
952 // Set the significand bits to the fill.
953 if (!fill
|| fill
->getNumWords() < numParts
)
954 APInt::tcSet(significand
, 0, numParts
);
956 APInt::tcAssign(significand
, fill
->getRawData(),
957 std::min(fill
->getNumWords(), numParts
));
959 // Zero out the excess bits of the significand.
960 unsigned bitsToPreserve
= semantics
->precision
- 1;
961 unsigned part
= bitsToPreserve
/ 64;
962 bitsToPreserve
%= 64;
963 significand
[part
] &= ((1ULL << bitsToPreserve
) - 1);
964 for (part
++; part
!= numParts
; ++part
)
965 significand
[part
] = 0;
969 (semantics
->precision
>= 2) ? (semantics
->precision
- 2) : 0;
972 // We always have to clear the QNaN bit to make it an SNaN.
973 APInt::tcClearBit(significand
, QNaNBit
);
975 // If there are no bits set in the payload, we have to set
976 // *something* to make it a NaN instead of an infinity;
977 // conventionally, this is the next bit down from the QNaN bit.
978 if (APInt::tcIsZero(significand
, numParts
))
979 APInt::tcSetBit(significand
, QNaNBit
- 1);
980 } else if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
981 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
984 // We always have to set the QNaN bit to make it a QNaN.
985 APInt::tcSetBit(significand
, QNaNBit
);
988 // For x87 extended precision, we want to make a NaN, not a
989 // pseudo-NaN. Maybe we should expose the ability to make
991 if (semantics
== &semX87DoubleExtended
)
992 APInt::tcSetBit(significand
, QNaNBit
+ 1);
995 IEEEFloat
&IEEEFloat::operator=(const IEEEFloat
&rhs
) {
997 if (semantics
!= rhs
.semantics
) {
999 initialize(rhs
.semantics
);
1007 IEEEFloat
&IEEEFloat::operator=(IEEEFloat
&&rhs
) {
1010 semantics
= rhs
.semantics
;
1011 significand
= rhs
.significand
;
1012 exponent
= rhs
.exponent
;
1013 category
= rhs
.category
;
1016 rhs
.semantics
= &semBogus
;
1020 bool IEEEFloat::isDenormal() const {
1021 return isFiniteNonZero() && (exponent
== semantics
->minExponent
) &&
1022 (APInt::tcExtractBit(significandParts(),
1023 semantics
->precision
- 1) == 0);
1026 bool IEEEFloat::isSmallest() const {
1027 // The smallest number by magnitude in our format will be the smallest
1028 // denormal, i.e. the floating point number with exponent being minimum
1029 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1030 return isFiniteNonZero() && exponent
== semantics
->minExponent
&&
1031 significandMSB() == 0;
1034 bool IEEEFloat::isSmallestNormalized() const {
1035 return getCategory() == fcNormal
&& exponent
== semantics
->minExponent
&&
1036 isSignificandAllZerosExceptMSB();
1039 unsigned int IEEEFloat::getNumHighBits() const {
1040 const unsigned int PartCount
= partCountForBits(semantics
->precision
);
1041 const unsigned int Bits
= PartCount
* integerPartWidth
;
1043 // Compute how many bits are used in the final word.
1044 // When precision is just 1, it represents the 'Pth'
1045 // Precision bit and not the actual significand bit.
1046 const unsigned int NumHighBits
= (semantics
->precision
> 1)
1047 ? (Bits
- semantics
->precision
+ 1)
1048 : (Bits
- semantics
->precision
);
1052 bool IEEEFloat::isSignificandAllOnes() const {
1053 // Test if the significand excluding the integral bit is all ones. This allows
1054 // us to test for binade boundaries.
1055 const integerPart
*Parts
= significandParts();
1056 const unsigned PartCount
= partCountForBits(semantics
->precision
);
1057 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
1061 // Set the unused high bits to all ones when we compare.
1062 const unsigned NumHighBits
= getNumHighBits();
1063 assert(NumHighBits
<= integerPartWidth
&& NumHighBits
> 0 &&
1064 "Can not have more high bits to fill than integerPartWidth");
1065 const integerPart HighBitFill
=
1066 ~integerPart(0) << (integerPartWidth
- NumHighBits
);
1067 if ((semantics
->precision
<= 1) || (~(Parts
[PartCount
- 1] | HighBitFill
)))
1073 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1074 // Test if the significand excluding the integral bit is all ones except for
1075 // the least significant bit.
1076 const integerPart
*Parts
= significandParts();
1081 const unsigned PartCount
= partCountForBits(semantics
->precision
);
1082 for (unsigned i
= 0; i
< PartCount
- 1; i
++) {
1083 if (~Parts
[i
] & ~unsigned{!i
})
1087 // Set the unused high bits to all ones when we compare.
1088 const unsigned NumHighBits
= getNumHighBits();
1089 assert(NumHighBits
<= integerPartWidth
&& NumHighBits
> 0 &&
1090 "Can not have more high bits to fill than integerPartWidth");
1091 const integerPart HighBitFill
= ~integerPart(0)
1092 << (integerPartWidth
- NumHighBits
);
1093 if (~(Parts
[PartCount
- 1] | HighBitFill
| 0x1))
1099 bool IEEEFloat::isSignificandAllZeros() const {
1100 // Test if the significand excluding the integral bit is all zeros. This
1101 // allows us to test for binade boundaries.
1102 const integerPart
*Parts
= significandParts();
1103 const unsigned PartCount
= partCountForBits(semantics
->precision
);
1105 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
1109 // Compute how many bits are used in the final word.
1110 const unsigned NumHighBits
= getNumHighBits();
1111 assert(NumHighBits
< integerPartWidth
&& "Can not have more high bits to "
1112 "clear than integerPartWidth");
1113 const integerPart HighBitMask
= ~integerPart(0) >> NumHighBits
;
1115 if ((semantics
->precision
> 1) && (Parts
[PartCount
- 1] & HighBitMask
))
1121 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1122 const integerPart
*Parts
= significandParts();
1123 const unsigned PartCount
= partCountForBits(semantics
->precision
);
1125 for (unsigned i
= 0; i
< PartCount
- 1; i
++) {
1130 const unsigned NumHighBits
= getNumHighBits();
1131 const integerPart MSBMask
= integerPart(1)
1132 << (integerPartWidth
- NumHighBits
);
1133 return ((semantics
->precision
<= 1) || (Parts
[PartCount
- 1] == MSBMask
));
1136 bool IEEEFloat::isLargest() const {
1137 bool IsMaxExp
= isFiniteNonZero() && exponent
== semantics
->maxExponent
;
1138 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1139 semantics
->nanEncoding
== fltNanEncoding::AllOnes
) {
1140 // The largest number by magnitude in our format will be the floating point
1141 // number with maximum exponent and with significand that is all ones except
1143 return (IsMaxExp
&& APFloat::hasSignificand(*semantics
))
1144 ? isSignificandAllOnesExceptLSB()
1147 // The largest number by magnitude in our format will be the floating point
1148 // number with maximum exponent and with significand that is all ones.
1149 return IsMaxExp
&& isSignificandAllOnes();
1153 bool IEEEFloat::isInteger() const {
1154 // This could be made more efficient; I'm going for obviously correct.
1155 if (!isFinite()) return false;
1156 IEEEFloat truncated
= *this;
1157 truncated
.roundToIntegral(rmTowardZero
);
1158 return compare(truncated
) == cmpEqual
;
1161 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat
&rhs
) const {
1164 if (semantics
!= rhs
.semantics
||
1165 category
!= rhs
.category
||
1168 if (category
==fcZero
|| category
==fcInfinity
)
1171 if (isFiniteNonZero() && exponent
!= rhs
.exponent
)
1174 return std::equal(significandParts(), significandParts() + partCount(),
1175 rhs
.significandParts());
1178 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, integerPart value
) {
1179 initialize(&ourSemantics
);
1181 category
= fcNormal
;
1183 exponent
= ourSemantics
.precision
- 1;
1184 significandParts()[0] = value
;
1185 normalize(rmNearestTiesToEven
, lfExactlyZero
);
1188 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
) {
1189 initialize(&ourSemantics
);
1190 // The Float8E8MOFNU format does not have a representation
1191 // for zero. So, use the closest representation instead.
1192 // Moreover, the all-zero encoding represents a valid
1193 // normal value (which is the smallestNormalized here).
1194 // Hence, we call makeSmallestNormalized (where category is
1195 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1196 ourSemantics
.hasZero
? makeZero(false) : makeSmallestNormalized(false);
1199 // Delegate to the previous constructor, because later copy constructor may
1200 // actually inspects category, which can't be garbage.
1201 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, uninitializedTag tag
)
1202 : IEEEFloat(ourSemantics
) {}
1204 IEEEFloat::IEEEFloat(const IEEEFloat
&rhs
) {
1205 initialize(rhs
.semantics
);
1209 IEEEFloat::IEEEFloat(IEEEFloat
&&rhs
) : semantics(&semBogus
) {
1210 *this = std::move(rhs
);
1213 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1215 unsigned int IEEEFloat::partCount() const {
1216 return partCountForBits(semantics
->precision
+ 1);
1219 const APFloat::integerPart
*IEEEFloat::significandParts() const {
1220 return const_cast<IEEEFloat
*>(this)->significandParts();
1223 APFloat::integerPart
*IEEEFloat::significandParts() {
1224 if (partCount() > 1)
1225 return significand
.parts
;
1227 return &significand
.part
;
1230 void IEEEFloat::zeroSignificand() {
1231 APInt::tcSet(significandParts(), 0, partCount());
1234 /* Increment an fcNormal floating point number's significand. */
1235 void IEEEFloat::incrementSignificand() {
1238 carry
= APInt::tcIncrement(significandParts(), partCount());
1240 /* Our callers should never cause us to overflow. */
1245 /* Add the significand of the RHS. Returns the carry flag. */
1246 APFloat::integerPart
IEEEFloat::addSignificand(const IEEEFloat
&rhs
) {
1249 parts
= significandParts();
1251 assert(semantics
== rhs
.semantics
);
1252 assert(exponent
== rhs
.exponent
);
1254 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
1257 /* Subtract the significand of the RHS with a borrow flag. Returns
1259 APFloat::integerPart
IEEEFloat::subtractSignificand(const IEEEFloat
&rhs
,
1260 integerPart borrow
) {
1263 parts
= significandParts();
1265 assert(semantics
== rhs
.semantics
);
1266 assert(exponent
== rhs
.exponent
);
1268 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
1272 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1273 on to the full-precision result of the multiplication. Returns the
1275 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
,
1277 bool ignoreAddend
) {
1278 unsigned int omsb
; // One, not zero, based MSB.
1279 unsigned int partsCount
, newPartsCount
, precision
;
1280 integerPart
*lhsSignificand
;
1281 integerPart scratch
[4];
1282 integerPart
*fullSignificand
;
1283 lostFraction lost_fraction
;
1286 assert(semantics
== rhs
.semantics
);
1288 precision
= semantics
->precision
;
1290 // Allocate space for twice as many bits as the original significand, plus one
1291 // extra bit for the addition to overflow into.
1292 newPartsCount
= partCountForBits(precision
* 2 + 1);
1294 if (newPartsCount
> 4)
1295 fullSignificand
= new integerPart
[newPartsCount
];
1297 fullSignificand
= scratch
;
1299 lhsSignificand
= significandParts();
1300 partsCount
= partCount();
1302 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
1303 rhs
.significandParts(), partsCount
, partsCount
);
1305 lost_fraction
= lfExactlyZero
;
1306 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1307 exponent
+= rhs
.exponent
;
1309 // Assume the operands involved in the multiplication are single-precision
1310 // FP, and the two multiplicants are:
1311 // *this = a23 . a22 ... a0 * 2^e1
1312 // rhs = b23 . b22 ... b0 * 2^e2
1313 // the result of multiplication is:
1314 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1315 // Note that there are three significant bits at the left-hand side of the
1316 // radix point: two for the multiplication, and an overflow bit for the
1317 // addition (that will always be zero at this point). Move the radix point
1318 // toward left by two bits, and adjust exponent accordingly.
1321 if (!ignoreAddend
&& addend
.isNonZero()) {
1322 // The intermediate result of the multiplication has "2 * precision"
1323 // signicant bit; adjust the addend to be consistent with mul result.
1325 Significand savedSignificand
= significand
;
1326 const fltSemantics
*savedSemantics
= semantics
;
1327 fltSemantics extendedSemantics
;
1329 unsigned int extendedPrecision
;
1331 // Normalize our MSB to one below the top bit to allow for overflow.
1332 extendedPrecision
= 2 * precision
+ 1;
1333 if (omsb
!= extendedPrecision
- 1) {
1334 assert(extendedPrecision
> omsb
);
1335 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
1336 (extendedPrecision
- 1) - omsb
);
1337 exponent
-= (extendedPrecision
- 1) - omsb
;
1340 /* Create new semantics. */
1341 extendedSemantics
= *semantics
;
1342 extendedSemantics
.precision
= extendedPrecision
;
1344 if (newPartsCount
== 1)
1345 significand
.part
= fullSignificand
[0];
1347 significand
.parts
= fullSignificand
;
1348 semantics
= &extendedSemantics
;
1350 // Make a copy so we can convert it to the extended semantics.
1351 // Note that we cannot convert the addend directly, as the extendedSemantics
1352 // is a local variable (which we take a reference to).
1353 IEEEFloat
extendedAddend(addend
);
1354 status
= extendedAddend
.convert(extendedSemantics
, APFloat::rmTowardZero
,
1356 assert(status
== APFloat::opOK
);
1359 // Shift the significand of the addend right by one bit. This guarantees
1360 // that the high bit of the significand is zero (same as fullSignificand),
1361 // so the addition will overflow (if it does overflow at all) into the top bit.
1362 lost_fraction
= extendedAddend
.shiftSignificandRight(1);
1363 assert(lost_fraction
== lfExactlyZero
&&
1364 "Lost precision while shifting addend for fused-multiply-add.");
1366 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
1368 /* Restore our state. */
1369 if (newPartsCount
== 1)
1370 fullSignificand
[0] = significand
.part
;
1371 significand
= savedSignificand
;
1372 semantics
= savedSemantics
;
1374 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1377 // Convert the result having "2 * precision" significant-bits back to the one
1378 // having "precision" significant-bits. First, move the radix point from
1379 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1380 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1381 exponent
-= precision
+ 1;
1383 // In case MSB resides at the left-hand side of radix point, shift the
1384 // mantissa right by some amount to make sure the MSB reside right before
1385 // the radix point (i.e. "MSB . rest-significant-bits").
1387 // Note that the result is not normalized when "omsb < precision". So, the
1388 // caller needs to call IEEEFloat::normalize() if normalized value is
1390 if (omsb
> precision
) {
1391 unsigned int bits
, significantParts
;
1394 bits
= omsb
- precision
;
1395 significantParts
= partCountForBits(omsb
);
1396 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
1397 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1401 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
1403 if (newPartsCount
> 4)
1404 delete [] fullSignificand
;
1406 return lost_fraction
;
1409 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
) {
1410 // When the given semantics has zero, the addend here is a zero.
1411 // i.e . it belongs to the 'fcZero' category.
1412 // But when the semantics does not support zero, we need to
1413 // explicitly convey that this addend should be ignored
1414 // for multiplication.
1415 return multiplySignificand(rhs
, IEEEFloat(*semantics
), !semantics
->hasZero
);
1418 /* Multiply the significands of LHS and RHS to DST. */
1419 lostFraction
IEEEFloat::divideSignificand(const IEEEFloat
&rhs
) {
1420 unsigned int bit
, i
, partsCount
;
1421 const integerPart
*rhsSignificand
;
1422 integerPart
*lhsSignificand
, *dividend
, *divisor
;
1423 integerPart scratch
[4];
1424 lostFraction lost_fraction
;
1426 assert(semantics
== rhs
.semantics
);
1428 lhsSignificand
= significandParts();
1429 rhsSignificand
= rhs
.significandParts();
1430 partsCount
= partCount();
1433 dividend
= new integerPart
[partsCount
* 2];
1437 divisor
= dividend
+ partsCount
;
1439 /* Copy the dividend and divisor as they will be modified in-place. */
1440 for (i
= 0; i
< partsCount
; i
++) {
1441 dividend
[i
] = lhsSignificand
[i
];
1442 divisor
[i
] = rhsSignificand
[i
];
1443 lhsSignificand
[i
] = 0;
1446 exponent
-= rhs
.exponent
;
1448 unsigned int precision
= semantics
->precision
;
1450 /* Normalize the divisor. */
1451 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
1454 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
1457 /* Normalize the dividend. */
1458 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
1461 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
1464 /* Ensure the dividend >= divisor initially for the loop below.
1465 Incidentally, this means that the division loop below is
1466 guaranteed to set the integer bit to one. */
1467 if (APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
1469 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1470 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
1473 /* Long division. */
1474 for (bit
= precision
; bit
; bit
-= 1) {
1475 if (APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
1476 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
1477 APInt::tcSetBit(lhsSignificand
, bit
- 1);
1480 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1483 /* Figure out the lost fraction. */
1484 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
1487 lost_fraction
= lfMoreThanHalf
;
1489 lost_fraction
= lfExactlyHalf
;
1490 else if (APInt::tcIsZero(dividend
, partsCount
))
1491 lost_fraction
= lfExactlyZero
;
1493 lost_fraction
= lfLessThanHalf
;
1498 return lost_fraction
;
1501 unsigned int IEEEFloat::significandMSB() const {
1502 return APInt::tcMSB(significandParts(), partCount());
1505 unsigned int IEEEFloat::significandLSB() const {
1506 return APInt::tcLSB(significandParts(), partCount());
1509 /* Note that a zero result is NOT normalized to fcZero. */
1510 lostFraction
IEEEFloat::shiftSignificandRight(unsigned int bits
) {
1511 /* Our exponent should not overflow. */
1512 assert((ExponentType
) (exponent
+ bits
) >= exponent
);
1516 return shiftRight(significandParts(), partCount(), bits
);
1519 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1520 void IEEEFloat::shiftSignificandLeft(unsigned int bits
) {
1521 assert(bits
< semantics
->precision
||
1522 (semantics
->precision
== 1 && bits
<= 1));
1525 unsigned int partsCount
= partCount();
1527 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1530 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1534 APFloat::cmpResult
IEEEFloat::compareAbsoluteValue(const IEEEFloat
&rhs
) const {
1537 assert(semantics
== rhs
.semantics
);
1538 assert(isFiniteNonZero());
1539 assert(rhs
.isFiniteNonZero());
1541 compare
= exponent
- rhs
.exponent
;
1543 /* If exponents are equal, do an unsigned bignum comparison of the
1546 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1550 return cmpGreaterThan
;
1551 else if (compare
< 0)
1557 /* Set the least significant BITS bits of a bignum, clear the
1559 static void tcSetLeastSignificantBits(APInt::WordType
*dst
, unsigned parts
,
1562 while (bits
> APInt::APINT_BITS_PER_WORD
) {
1563 dst
[i
++] = ~(APInt::WordType
)0;
1564 bits
-= APInt::APINT_BITS_PER_WORD
;
1568 dst
[i
++] = ~(APInt::WordType
)0 >> (APInt::APINT_BITS_PER_WORD
- bits
);
1574 /* Handle overflow. Sign is preserved. We either become infinity or
1575 the largest finite number. */
1576 APFloat::opStatus
IEEEFloat::handleOverflow(roundingMode rounding_mode
) {
1577 if (semantics
->nonFiniteBehavior
!= fltNonfiniteBehavior::FiniteOnly
) {
1579 if (rounding_mode
== rmNearestTiesToEven
||
1580 rounding_mode
== rmNearestTiesToAway
||
1581 (rounding_mode
== rmTowardPositive
&& !sign
) ||
1582 (rounding_mode
== rmTowardNegative
&& sign
)) {
1583 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
)
1584 makeNaN(false, sign
);
1586 category
= fcInfinity
;
1587 return static_cast<opStatus
>(opOverflow
| opInexact
);
1591 /* Otherwise we become the largest finite number. */
1592 category
= fcNormal
;
1593 exponent
= semantics
->maxExponent
;
1594 tcSetLeastSignificantBits(significandParts(), partCount(),
1595 semantics
->precision
);
1596 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1597 semantics
->nanEncoding
== fltNanEncoding::AllOnes
)
1598 APInt::tcClearBit(significandParts(), 0);
1603 /* Returns TRUE if, when truncating the current number, with BIT the
1604 new LSB, with the given lost fraction and rounding mode, the result
1605 would need to be rounded away from zero (i.e., by increasing the
1606 signficand). This routine must work for fcZero of both signs, and
1607 fcNormal numbers. */
1608 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode
,
1609 lostFraction lost_fraction
,
1610 unsigned int bit
) const {
1611 /* NaNs and infinities should not have lost fractions. */
1612 assert(isFiniteNonZero() || category
== fcZero
);
1614 /* Current callers never pass this so we don't handle it. */
1615 assert(lost_fraction
!= lfExactlyZero
);
1617 switch (rounding_mode
) {
1618 case rmNearestTiesToAway
:
1619 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1621 case rmNearestTiesToEven
:
1622 if (lost_fraction
== lfMoreThanHalf
)
1625 /* Our zeroes don't have a significand to test. */
1626 if (lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1627 return APInt::tcExtractBit(significandParts(), bit
);
1634 case rmTowardPositive
:
1637 case rmTowardNegative
:
1643 llvm_unreachable("Invalid rounding mode found");
1646 APFloat::opStatus
IEEEFloat::normalize(roundingMode rounding_mode
,
1647 lostFraction lost_fraction
) {
1648 unsigned int omsb
; /* One, not zero, based MSB. */
1651 if (!isFiniteNonZero())
1654 /* Before rounding normalize the exponent of fcNormal numbers. */
1655 omsb
= significandMSB() + 1;
1658 /* OMSB is numbered from 1. We want to place it in the integer
1659 bit numbered PRECISION if possible, with a compensating change in
1661 exponentChange
= omsb
- semantics
->precision
;
1663 /* If the resulting exponent is too high, overflow according to
1664 the rounding mode. */
1665 if (exponent
+ exponentChange
> semantics
->maxExponent
)
1666 return handleOverflow(rounding_mode
);
1668 /* Subnormal numbers have exponent minExponent, and their MSB
1669 is forced based on that. */
1670 if (exponent
+ exponentChange
< semantics
->minExponent
)
1671 exponentChange
= semantics
->minExponent
- exponent
;
1673 /* Shifting left is easy as we don't lose precision. */
1674 if (exponentChange
< 0) {
1675 assert(lost_fraction
== lfExactlyZero
);
1677 shiftSignificandLeft(-exponentChange
);
1682 if (exponentChange
> 0) {
1685 /* Shift right and capture any new lost fraction. */
1686 lf
= shiftSignificandRight(exponentChange
);
1688 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1690 /* Keep OMSB up-to-date. */
1691 if (omsb
> (unsigned) exponentChange
)
1692 omsb
-= exponentChange
;
1698 // The all-ones values is an overflow if NaN is all ones. If NaN is
1699 // represented by negative zero, then it is a valid finite value.
1700 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1701 semantics
->nanEncoding
== fltNanEncoding::AllOnes
&&
1702 exponent
== semantics
->maxExponent
&& isSignificandAllOnes())
1703 return handleOverflow(rounding_mode
);
1705 /* Now round the number according to rounding_mode given the lost
1708 /* As specified in IEEE 754, since we do not trap we do not report
1709 underflow for exact results. */
1710 if (lost_fraction
== lfExactlyZero
) {
1711 /* Canonicalize zeroes. */
1714 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
1716 if (!semantics
->hasZero
)
1717 makeSmallestNormalized(false);
1723 /* Increment the significand if we're rounding away from zero. */
1724 if (roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1726 exponent
= semantics
->minExponent
;
1728 incrementSignificand();
1729 omsb
= significandMSB() + 1;
1731 /* Did the significand increment overflow? */
1732 if (omsb
== (unsigned) semantics
->precision
+ 1) {
1733 /* Renormalize by incrementing the exponent and shifting our
1734 significand right one. However if we already have the
1735 maximum exponent we overflow to infinity. */
1736 if (exponent
== semantics
->maxExponent
)
1737 // Invoke overflow handling with a rounding mode that will guarantee
1738 // that the result gets turned into the correct infinity representation.
1739 // This is needed instead of just setting the category to infinity to
1740 // account for 8-bit floating point types that have no inf, only NaN.
1741 return handleOverflow(sign
? rmTowardNegative
: rmTowardPositive
);
1743 shiftSignificandRight(1);
1748 // The all-ones values is an overflow if NaN is all ones. If NaN is
1749 // represented by negative zero, then it is a valid finite value.
1750 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1751 semantics
->nanEncoding
== fltNanEncoding::AllOnes
&&
1752 exponent
== semantics
->maxExponent
&& isSignificandAllOnes())
1753 return handleOverflow(rounding_mode
);
1756 /* The normal case - we were and are not denormal, and any
1757 significand increment above didn't overflow. */
1758 if (omsb
== semantics
->precision
)
1761 /* We have a non-zero denormal. */
1762 assert(omsb
< semantics
->precision
);
1764 /* Canonicalize zeroes. */
1767 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
1769 // This condition handles the case where the semantics
1770 // does not have zero but uses the all-zero encoding
1771 // to represent the smallest normal value.
1772 if (!semantics
->hasZero
)
1773 makeSmallestNormalized(false);
1776 /* The fcZero case is a denormal that underflowed to zero. */
1777 return (opStatus
) (opUnderflow
| opInexact
);
1780 APFloat::opStatus
IEEEFloat::addOrSubtractSpecials(const IEEEFloat
&rhs
,
1782 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1784 llvm_unreachable(nullptr);
1786 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1787 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1788 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1791 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1792 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1793 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1794 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1795 if (isSignaling()) {
1799 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1801 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1802 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1803 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1806 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1807 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1808 category
= fcInfinity
;
1809 sign
= rhs
.sign
^ subtract
;
1812 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1814 sign
= rhs
.sign
^ subtract
;
1817 case PackCategoriesIntoKey(fcZero
, fcZero
):
1818 /* Sign depends on rounding mode; handled by caller. */
1821 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1822 /* Differently signed infinities can only be validly
1824 if (((sign
^ rhs
.sign
)!=0) != subtract
) {
1831 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1836 /* Add or subtract two normal numbers. */
1837 lostFraction
IEEEFloat::addOrSubtractSignificand(const IEEEFloat
&rhs
,
1840 lostFraction lost_fraction
;
1843 /* Determine if the operation on the absolute values is effectively
1844 an addition or subtraction. */
1845 subtract
^= static_cast<bool>(sign
^ rhs
.sign
);
1847 /* Are we bigger exponent-wise than the RHS? */
1848 bits
= exponent
- rhs
.exponent
;
1850 /* Subtraction is more subtle than one might naively expect. */
1852 if ((bits
< 0) && !semantics
->hasSignedRepr
)
1854 "This floating point format does not support signed values");
1856 IEEEFloat
temp_rhs(rhs
);
1859 lost_fraction
= lfExactlyZero
;
1860 else if (bits
> 0) {
1861 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1862 shiftSignificandLeft(1);
1864 lost_fraction
= shiftSignificandRight(-bits
- 1);
1865 temp_rhs
.shiftSignificandLeft(1);
1868 // Should we reverse the subtraction.
1869 if (compareAbsoluteValue(temp_rhs
) == cmpLessThan
) {
1870 carry
= temp_rhs
.subtractSignificand
1871 (*this, lost_fraction
!= lfExactlyZero
);
1872 copySignificand(temp_rhs
);
1875 carry
= subtractSignificand
1876 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1879 /* Invert the lost fraction - it was on the RHS and
1881 if (lost_fraction
== lfLessThanHalf
)
1882 lost_fraction
= lfMoreThanHalf
;
1883 else if (lost_fraction
== lfMoreThanHalf
)
1884 lost_fraction
= lfLessThanHalf
;
1886 /* The code above is intended to ensure that no borrow is
1892 IEEEFloat
temp_rhs(rhs
);
1894 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1895 carry
= addSignificand(temp_rhs
);
1897 lost_fraction
= shiftSignificandRight(-bits
);
1898 carry
= addSignificand(rhs
);
1901 /* We have a guard bit; generating a carry cannot happen. */
1906 return lost_fraction
;
1909 APFloat::opStatus
IEEEFloat::multiplySpecials(const IEEEFloat
&rhs
) {
1910 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1912 llvm_unreachable(nullptr);
1914 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1915 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1916 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1920 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1921 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1922 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1923 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1924 sign
^= rhs
.sign
; // restore the original sign
1925 if (isSignaling()) {
1929 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1931 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1932 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1933 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1934 category
= fcInfinity
;
1937 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1938 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1939 case PackCategoriesIntoKey(fcZero
, fcZero
):
1943 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1944 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1948 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1953 APFloat::opStatus
IEEEFloat::divideSpecials(const IEEEFloat
&rhs
) {
1954 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1956 llvm_unreachable(nullptr);
1958 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1959 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1960 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1964 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1965 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1966 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1967 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1968 sign
^= rhs
.sign
; // restore the original sign
1969 if (isSignaling()) {
1973 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1975 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1976 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1977 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1978 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1981 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1985 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1986 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
)
1987 makeNaN(false, sign
);
1989 category
= fcInfinity
;
1992 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1993 case PackCategoriesIntoKey(fcZero
, fcZero
):
1997 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
2002 APFloat::opStatus
IEEEFloat::modSpecials(const IEEEFloat
&rhs
) {
2003 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
2005 llvm_unreachable(nullptr);
2007 case PackCategoriesIntoKey(fcZero
, fcNaN
):
2008 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
2009 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
2012 case PackCategoriesIntoKey(fcNaN
, fcZero
):
2013 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
2014 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
2015 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
2016 if (isSignaling()) {
2020 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
2022 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
2023 case PackCategoriesIntoKey(fcZero
, fcNormal
):
2024 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
2027 case PackCategoriesIntoKey(fcNormal
, fcZero
):
2028 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
2029 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
2030 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
2031 case PackCategoriesIntoKey(fcZero
, fcZero
):
2035 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
2040 APFloat::opStatus
IEEEFloat::remainderSpecials(const IEEEFloat
&rhs
) {
2041 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
2043 llvm_unreachable(nullptr);
2045 case PackCategoriesIntoKey(fcZero
, fcNaN
):
2046 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
2047 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
2050 case PackCategoriesIntoKey(fcNaN
, fcZero
):
2051 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
2052 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
2053 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
2054 if (isSignaling()) {
2058 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
2060 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
2061 case PackCategoriesIntoKey(fcZero
, fcNormal
):
2062 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
2065 case PackCategoriesIntoKey(fcNormal
, fcZero
):
2066 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
2067 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
2068 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
2069 case PackCategoriesIntoKey(fcZero
, fcZero
):
2073 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
2074 return opDivByZero
; // fake status, indicating this is not a special case
2079 void IEEEFloat::changeSign() {
2080 // With NaN-as-negative-zero, neither NaN or negative zero can change
2082 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
&&
2083 (isZero() || isNaN()))
2085 /* Look mummy, this one's easy. */
2089 /* Normalized addition or subtraction. */
2090 APFloat::opStatus
IEEEFloat::addOrSubtract(const IEEEFloat
&rhs
,
2091 roundingMode rounding_mode
,
2095 fs
= addOrSubtractSpecials(rhs
, subtract
);
2097 /* This return code means it was not a simple case. */
2098 if (fs
== opDivByZero
) {
2099 lostFraction lost_fraction
;
2101 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
2102 fs
= normalize(rounding_mode
, lost_fraction
);
2104 /* Can only be zero if we lost no fraction. */
2105 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
2108 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2109 positive zero unless rounding to minus infinity, except that
2110 adding two like-signed zeroes gives that zero. */
2111 if (category
== fcZero
) {
2112 if (rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
2113 sign
= (rounding_mode
== rmTowardNegative
);
2114 // NaN-in-negative-zero means zeros need to be normalized to +0.
2115 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2122 /* Normalized addition. */
2123 APFloat::opStatus
IEEEFloat::add(const IEEEFloat
&rhs
,
2124 roundingMode rounding_mode
) {
2125 return addOrSubtract(rhs
, rounding_mode
, false);
2128 /* Normalized subtraction. */
2129 APFloat::opStatus
IEEEFloat::subtract(const IEEEFloat
&rhs
,
2130 roundingMode rounding_mode
) {
2131 return addOrSubtract(rhs
, rounding_mode
, true);
2134 /* Normalized multiply. */
2135 APFloat::opStatus
IEEEFloat::multiply(const IEEEFloat
&rhs
,
2136 roundingMode rounding_mode
) {
2140 fs
= multiplySpecials(rhs
);
2142 if (isZero() && semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2144 if (isFiniteNonZero()) {
2145 lostFraction lost_fraction
= multiplySignificand(rhs
);
2146 fs
= normalize(rounding_mode
, lost_fraction
);
2147 if (lost_fraction
!= lfExactlyZero
)
2148 fs
= (opStatus
) (fs
| opInexact
);
2154 /* Normalized divide. */
2155 APFloat::opStatus
IEEEFloat::divide(const IEEEFloat
&rhs
,
2156 roundingMode rounding_mode
) {
2160 fs
= divideSpecials(rhs
);
2162 if (isZero() && semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2164 if (isFiniteNonZero()) {
2165 lostFraction lost_fraction
= divideSignificand(rhs
);
2166 fs
= normalize(rounding_mode
, lost_fraction
);
2167 if (lost_fraction
!= lfExactlyZero
)
2168 fs
= (opStatus
) (fs
| opInexact
);
2174 /* Normalized remainder. */
2175 APFloat::opStatus
IEEEFloat::remainder(const IEEEFloat
&rhs
) {
2177 unsigned int origSign
= sign
;
2179 // First handle the special cases.
2180 fs
= remainderSpecials(rhs
);
2181 if (fs
!= opDivByZero
)
2186 // Make sure the current value is less than twice the denom. If the addition
2187 // did not succeed (an overflow has happened), which means that the finite
2188 // value we currently posses must be less than twice the denom (as we are
2189 // using the same semantics).
2191 if (P2
.add(rhs
, rmNearestTiesToEven
) == opOK
) {
2196 // Lets work with absolute numbers.
2202 // To calculate the remainder we use the following scheme.
2204 // The remainder is defained as follows:
2206 // remainder = numer - rquot * denom = x - r * p
2208 // Where r is the result of: x/p, rounded toward the nearest integral value
2209 // (with halfway cases rounded toward the even number).
2211 // Currently, (after x mod 2p):
2212 // r is the number of 2p's present inside x, which is inherently, an even
2215 // We may split the remaining calculation into 4 options:
2216 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2217 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2218 // are done as well.
2219 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2220 // to subtract 1p at least once.
2221 // - if x >= p then we must subtract p at least once, as x must be a
2224 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2226 // We can now split the remaining calculation to the following 3 options:
2227 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2228 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2229 // must round up to the next even number. so we must subtract p once more.
2230 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2231 // integral, and subtract p once more.
2234 // Extend the semantics to prevent an overflow/underflow or inexact result.
2236 fltSemantics extendedSemantics
= *semantics
;
2237 extendedSemantics
.maxExponent
++;
2238 extendedSemantics
.minExponent
--;
2239 extendedSemantics
.precision
+= 2;
2241 IEEEFloat VEx
= *this;
2242 fs
= VEx
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2243 assert(fs
== opOK
&& !losesInfo
);
2245 fs
= PEx
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2246 assert(fs
== opOK
&& !losesInfo
);
2248 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2250 fs
= VEx
.add(VEx
, rmNearestTiesToEven
);
2253 if (VEx
.compare(PEx
) == cmpGreaterThan
) {
2254 fs
= subtract(P
, rmNearestTiesToEven
);
2257 // Make VEx = this.add(this), but because we have different semantics, we do
2258 // not want to `convert` again, so we just subtract PEx twice (which equals
2259 // to the desired value).
2260 fs
= VEx
.subtract(PEx
, rmNearestTiesToEven
);
2262 fs
= VEx
.subtract(PEx
, rmNearestTiesToEven
);
2265 cmpResult result
= VEx
.compare(PEx
);
2266 if (result
== cmpGreaterThan
|| result
== cmpEqual
) {
2267 fs
= subtract(P
, rmNearestTiesToEven
);
2273 sign
= origSign
; // IEEE754 requires this
2274 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2275 // But some 8-bit floats only have positive 0.
2284 /* Normalized llvm frem (C fmod). */
2285 APFloat::opStatus
IEEEFloat::mod(const IEEEFloat
&rhs
) {
2287 fs
= modSpecials(rhs
);
2288 unsigned int origSign
= sign
;
2290 while (isFiniteNonZero() && rhs
.isFiniteNonZero() &&
2291 compareAbsoluteValue(rhs
) != cmpLessThan
) {
2292 int Exp
= ilogb(*this) - ilogb(rhs
);
2293 IEEEFloat V
= scalbn(rhs
, Exp
, rmNearestTiesToEven
);
2294 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2296 if (V
.isNaN() || compareAbsoluteValue(V
) == cmpLessThan
)
2297 V
= scalbn(rhs
, Exp
- 1, rmNearestTiesToEven
);
2300 fs
= subtract(V
, rmNearestTiesToEven
);
2302 // When the semantics supports zero, this loop's
2303 // exit-condition is handled by the 'isFiniteNonZero'
2304 // category check above. However, when the semantics
2305 // does not have 'fcZero' and we have reached the
2306 // minimum possible value, (and any further subtract
2307 // will underflow to the same value) explicitly
2308 // provide an exit-path here.
2309 if (!semantics
->hasZero
&& this->isSmallest())
2315 sign
= origSign
; // fmod requires this
2316 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2322 /* Normalized fused-multiply-add. */
2323 APFloat::opStatus
IEEEFloat::fusedMultiplyAdd(const IEEEFloat
&multiplicand
,
2324 const IEEEFloat
&addend
,
2325 roundingMode rounding_mode
) {
2328 /* Post-multiplication sign, before addition. */
2329 sign
^= multiplicand
.sign
;
2331 /* If and only if all arguments are normal do we need to do an
2332 extended-precision calculation. */
2333 if (isFiniteNonZero() &&
2334 multiplicand
.isFiniteNonZero() &&
2335 addend
.isFinite()) {
2336 lostFraction lost_fraction
;
2338 lost_fraction
= multiplySignificand(multiplicand
, addend
);
2339 fs
= normalize(rounding_mode
, lost_fraction
);
2340 if (lost_fraction
!= lfExactlyZero
)
2341 fs
= (opStatus
) (fs
| opInexact
);
2343 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2344 positive zero unless rounding to minus infinity, except that
2345 adding two like-signed zeroes gives that zero. */
2346 if (category
== fcZero
&& !(fs
& opUnderflow
) && sign
!= addend
.sign
) {
2347 sign
= (rounding_mode
== rmTowardNegative
);
2348 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2352 fs
= multiplySpecials(multiplicand
);
2354 /* FS can only be opOK or opInvalidOp. There is no more work
2355 to do in the latter case. The IEEE-754R standard says it is
2356 implementation-defined in this case whether, if ADDEND is a
2357 quiet NaN, we raise invalid op; this implementation does so.
2359 If we need to do the addition we can do so with normal
2362 fs
= addOrSubtract(addend
, rounding_mode
, false);
2368 /* Rounding-mode correct round to integral value. */
2369 APFloat::opStatus
IEEEFloat::roundToIntegral(roundingMode rounding_mode
) {
2373 // [IEEE Std 754-2008 6.1]:
2374 // The behavior of infinity in floating-point arithmetic is derived from the
2375 // limiting cases of real arithmetic with operands of arbitrarily
2376 // large magnitude, when such a limit exists.
2378 // Operations on infinite operands are usually exact and therefore signal no
2383 if (isSignaling()) {
2384 // [IEEE Std 754-2008 6.2]:
2385 // Under default exception handling, any operation signaling an invalid
2386 // operation exception and for which a floating-point result is to be
2387 // delivered shall deliver a quiet NaN.
2389 // [IEEE Std 754-2008 6.2]:
2390 // Signaling NaNs shall be reserved operands that, under default exception
2391 // handling, signal the invalid operation exception(see 7.2) for every
2392 // general-computational and signaling-computational operation except for
2393 // the conversions described in 5.12.
2396 // [IEEE Std 754-2008 6.2]:
2397 // For an operation with quiet NaN inputs, other than maximum and minimum
2398 // operations, if a floating-point result is to be delivered the result
2399 // shall be a quiet NaN which should be one of the input NaNs.
2401 // Every general-computational and quiet-computational operation involving
2402 // one or more input NaNs, none of them signaling, shall signal no
2403 // exception, except fusedMultiplyAdd might signal the invalid operation
2404 // exception(see 7.2).
2410 // [IEEE Std 754-2008 6.3]:
2411 // ... the sign of the result of conversions, the quantize operation, the
2412 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2413 // the sign of the first or only operand.
2417 // If the exponent is large enough, we know that this value is already
2418 // integral, and the arithmetic below would potentially cause it to saturate
2419 // to +/-Inf. Bail out early instead.
2420 if (exponent
+ 1 >= (int)APFloat::semanticsPrecision(*semantics
))
2423 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2424 // precision of our format, and then subtract it back off again. The choice
2425 // of rounding modes for the addition/subtraction determines the rounding mode
2426 // for our integral rounding as well.
2427 // NOTE: When the input value is negative, we do subtraction followed by
2428 // addition instead.
2429 APInt
IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics
)),
2431 IntegerConstant
<<= APFloat::semanticsPrecision(*semantics
) - 1;
2432 IEEEFloat
MagicConstant(*semantics
);
2433 fs
= MagicConstant
.convertFromAPInt(IntegerConstant
, false,
2434 rmNearestTiesToEven
);
2436 MagicConstant
.sign
= sign
;
2438 // Preserve the input sign so that we can handle the case of zero result
2440 bool inputSign
= isNegative();
2442 fs
= add(MagicConstant
, rounding_mode
);
2444 // Current value and 'MagicConstant' are both integers, so the result of the
2445 // subtraction is always exact according to Sterbenz' lemma.
2446 subtract(MagicConstant
, rounding_mode
);
2448 // Restore the input sign.
2449 if (inputSign
!= isNegative())
2455 /* Comparison requires normalized numbers. */
2456 APFloat::cmpResult
IEEEFloat::compare(const IEEEFloat
&rhs
) const {
2459 assert(semantics
== rhs
.semantics
);
2461 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
2463 llvm_unreachable(nullptr);
2465 case PackCategoriesIntoKey(fcNaN
, fcZero
):
2466 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
2467 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
2468 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
2469 case PackCategoriesIntoKey(fcZero
, fcNaN
):
2470 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
2471 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
2472 return cmpUnordered
;
2474 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
2475 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
2476 case PackCategoriesIntoKey(fcNormal
, fcZero
):
2480 return cmpGreaterThan
;
2482 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
2483 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
2484 case PackCategoriesIntoKey(fcZero
, fcNormal
):
2486 return cmpGreaterThan
;
2490 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
2491 if (sign
== rhs
.sign
)
2496 return cmpGreaterThan
;
2498 case PackCategoriesIntoKey(fcZero
, fcZero
):
2501 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
2505 /* Two normal numbers. Do they have the same sign? */
2506 if (sign
!= rhs
.sign
) {
2508 result
= cmpLessThan
;
2510 result
= cmpGreaterThan
;
2512 /* Compare absolute values; invert result if negative. */
2513 result
= compareAbsoluteValue(rhs
);
2516 if (result
== cmpLessThan
)
2517 result
= cmpGreaterThan
;
2518 else if (result
== cmpGreaterThan
)
2519 result
= cmpLessThan
;
2526 /// IEEEFloat::convert - convert a value of one floating point type to another.
2527 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2528 /// records whether the transformation lost information, i.e. whether
2529 /// converting the result back to the original type will produce the
2530 /// original value (this is almost the same as return value==fsOK, but there
2531 /// are edge cases where this is not so).
2533 APFloat::opStatus
IEEEFloat::convert(const fltSemantics
&toSemantics
,
2534 roundingMode rounding_mode
,
2536 lostFraction lostFraction
;
2537 unsigned int newPartCount
, oldPartCount
;
2540 const fltSemantics
&fromSemantics
= *semantics
;
2541 bool is_signaling
= isSignaling();
2543 lostFraction
= lfExactlyZero
;
2544 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
2545 oldPartCount
= partCount();
2546 shift
= toSemantics
.precision
- fromSemantics
.precision
;
2548 bool X86SpecialNan
= false;
2549 if (&fromSemantics
== &semX87DoubleExtended
&&
2550 &toSemantics
!= &semX87DoubleExtended
&& category
== fcNaN
&&
2551 (!(*significandParts() & 0x8000000000000000ULL
) ||
2552 !(*significandParts() & 0x4000000000000000ULL
))) {
2553 // x86 has some unusual NaNs which cannot be represented in any other
2554 // format; note them here.
2555 X86SpecialNan
= true;
2558 // If this is a truncation of a denormal number, and the target semantics
2559 // has larger exponent range than the source semantics (this can happen
2560 // when truncating from PowerPC double-double to double format), the
2561 // right shift could lose result mantissa bits. Adjust exponent instead
2562 // of performing excessive shift.
2563 // Also do a similar trick in case shifting denormal would produce zero
2564 // significand as this case isn't handled correctly by normalize.
2565 if (shift
< 0 && isFiniteNonZero()) {
2566 int omsb
= significandMSB() + 1;
2567 int exponentChange
= omsb
- fromSemantics
.precision
;
2568 if (exponent
+ exponentChange
< toSemantics
.minExponent
)
2569 exponentChange
= toSemantics
.minExponent
- exponent
;
2570 if (exponentChange
< shift
)
2571 exponentChange
= shift
;
2572 if (exponentChange
< 0) {
2573 shift
-= exponentChange
;
2574 exponent
+= exponentChange
;
2575 } else if (omsb
<= -shift
) {
2576 exponentChange
= omsb
+ shift
- 1; // leave at least one bit set
2577 shift
-= exponentChange
;
2578 exponent
+= exponentChange
;
2582 // If this is a truncation, perform the shift before we narrow the storage.
2583 if (shift
< 0 && (isFiniteNonZero() ||
2584 (category
== fcNaN
&& semantics
->nonFiniteBehavior
!=
2585 fltNonfiniteBehavior::NanOnly
)))
2586 lostFraction
= shiftRight(significandParts(), oldPartCount
, -shift
);
2588 // Fix the storage so it can hold to new value.
2589 if (newPartCount
> oldPartCount
) {
2590 // The new type requires more storage; make it available.
2591 integerPart
*newParts
;
2592 newParts
= new integerPart
[newPartCount
];
2593 APInt::tcSet(newParts
, 0, newPartCount
);
2594 if (isFiniteNonZero() || category
==fcNaN
)
2595 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
2597 significand
.parts
= newParts
;
2598 } else if (newPartCount
== 1 && oldPartCount
!= 1) {
2599 // Switch to built-in storage for a single part.
2600 integerPart newPart
= 0;
2601 if (isFiniteNonZero() || category
==fcNaN
)
2602 newPart
= significandParts()[0];
2604 significand
.part
= newPart
;
2607 // Now that we have the right storage, switch the semantics.
2608 semantics
= &toSemantics
;
2610 // If this is an extension, perform the shift now that the storage is
2612 if (shift
> 0 && (isFiniteNonZero() || category
==fcNaN
))
2613 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
2615 if (isFiniteNonZero()) {
2616 fs
= normalize(rounding_mode
, lostFraction
);
2617 *losesInfo
= (fs
!= opOK
);
2618 } else if (category
== fcNaN
) {
2619 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
2621 fromSemantics
.nonFiniteBehavior
!= fltNonfiniteBehavior::NanOnly
;
2622 makeNaN(false, sign
);
2623 return is_signaling
? opInvalidOp
: opOK
;
2626 // If NaN is negative zero, we need to create a new NaN to avoid converting
2628 if (fromSemantics
.nanEncoding
== fltNanEncoding::NegativeZero
&&
2629 semantics
->nanEncoding
!= fltNanEncoding::NegativeZero
)
2630 makeNaN(false, false);
2632 *losesInfo
= lostFraction
!= lfExactlyZero
|| X86SpecialNan
;
2634 // For x87 extended precision, we want to make a NaN, not a special NaN if
2635 // the input wasn't special either.
2636 if (!X86SpecialNan
&& semantics
== &semX87DoubleExtended
)
2637 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
2639 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2640 // This also guarantees that a sNaN does not become Inf on a truncation
2641 // that loses all payload bits.
2648 } else if (category
== fcInfinity
&&
2649 semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
2650 makeNaN(false, sign
);
2653 } else if (category
== fcZero
&&
2654 semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
2655 // Negative zero loses info, but positive zero doesn't.
2657 fromSemantics
.nanEncoding
!= fltNanEncoding::NegativeZero
&& sign
;
2658 fs
= *losesInfo
? opInexact
: opOK
;
2659 // NaN is negative zero means -0 -> +0, which can lose information
2666 if (category
== fcZero
&& !semantics
->hasZero
)
2667 makeSmallestNormalized(false);
2671 /* Convert a floating point number to an integer according to the
2672 rounding mode. If the rounded integer value is out of range this
2673 returns an invalid operation exception and the contents of the
2674 destination parts are unspecified. If the rounded value is in
2675 range but the floating point number is not the exact integer, the C
2676 standard doesn't require an inexact exception to be raised. IEEE
2677 854 does require it so we do that.
2679 Note that for conversions to integer type the C standard requires
2680 round-to-zero to always be used. */
2681 APFloat::opStatus
IEEEFloat::convertToSignExtendedInteger(
2682 MutableArrayRef
<integerPart
> parts
, unsigned int width
, bool isSigned
,
2683 roundingMode rounding_mode
, bool *isExact
) const {
2684 lostFraction lost_fraction
;
2685 const integerPart
*src
;
2686 unsigned int dstPartsCount
, truncatedBits
;
2690 /* Handle the three special cases first. */
2691 if (category
== fcInfinity
|| category
== fcNaN
)
2694 dstPartsCount
= partCountForBits(width
);
2695 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2697 if (category
== fcZero
) {
2698 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2699 // Negative zero can't be represented as an int.
2704 src
= significandParts();
2706 /* Step 1: place our absolute value, with any fraction truncated, in
2709 /* Our absolute value is less than one; truncate everything. */
2710 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2711 /* For exponent -1 the integer bit represents .5, look at that.
2712 For smaller exponents leftmost truncated bit is 0. */
2713 truncatedBits
= semantics
->precision
-1U - exponent
;
2715 /* We want the most significant (exponent + 1) bits; the rest are
2717 unsigned int bits
= exponent
+ 1U;
2719 /* Hopelessly large in magnitude? */
2723 if (bits
< semantics
->precision
) {
2724 /* We truncate (semantics->precision - bits) bits. */
2725 truncatedBits
= semantics
->precision
- bits
;
2726 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, bits
, truncatedBits
);
2728 /* We want at least as many bits as are available. */
2729 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, semantics
->precision
,
2731 APInt::tcShiftLeft(parts
.data(), dstPartsCount
,
2732 bits
- semantics
->precision
);
2737 /* Step 2: work out any lost fraction, and increment the absolute
2738 value if we would round away from zero. */
2739 if (truncatedBits
) {
2740 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
2742 if (lost_fraction
!= lfExactlyZero
&&
2743 roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
2744 if (APInt::tcIncrement(parts
.data(), dstPartsCount
))
2745 return opInvalidOp
; /* Overflow. */
2748 lost_fraction
= lfExactlyZero
;
2751 /* Step 3: check if we fit in the destination. */
2752 unsigned int omsb
= APInt::tcMSB(parts
.data(), dstPartsCount
) + 1;
2756 /* Negative numbers cannot be represented as unsigned. */
2760 /* It takes omsb bits to represent the unsigned integer value.
2761 We lose a bit for the sign, but care is needed as the
2762 maximally negative integer is a special case. */
2763 if (omsb
== width
&&
2764 APInt::tcLSB(parts
.data(), dstPartsCount
) + 1 != omsb
)
2767 /* This case can happen because of rounding. */
2772 APInt::tcNegate (parts
.data(), dstPartsCount
);
2774 if (omsb
>= width
+ !isSigned
)
2778 if (lost_fraction
== lfExactlyZero
) {
2785 /* Same as convertToSignExtendedInteger, except we provide
2786 deterministic values in case of an invalid operation exception,
2787 namely zero for NaNs and the minimal or maximal value respectively
2788 for underflow or overflow.
2789 The *isExact output tells whether the result is exact, in the sense
2790 that converting it back to the original floating point type produces
2791 the original value. This is almost equivalent to result==opOK,
2792 except for negative zeroes.
2795 IEEEFloat::convertToInteger(MutableArrayRef
<integerPart
> parts
,
2796 unsigned int width
, bool isSigned
,
2797 roundingMode rounding_mode
, bool *isExact
) const {
2800 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2803 if (fs
== opInvalidOp
) {
2804 unsigned int bits
, dstPartsCount
;
2806 dstPartsCount
= partCountForBits(width
);
2807 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2809 if (category
== fcNaN
)
2814 bits
= width
- isSigned
;
2816 tcSetLeastSignificantBits(parts
.data(), dstPartsCount
, bits
);
2817 if (sign
&& isSigned
)
2818 APInt::tcShiftLeft(parts
.data(), dstPartsCount
, width
- 1);
2824 /* Convert an unsigned integer SRC to a floating point number,
2825 rounding according to ROUNDING_MODE. The sign of the floating
2826 point number is not modified. */
2827 APFloat::opStatus
IEEEFloat::convertFromUnsignedParts(
2828 const integerPart
*src
, unsigned int srcCount
, roundingMode rounding_mode
) {
2829 unsigned int omsb
, precision
, dstCount
;
2831 lostFraction lost_fraction
;
2833 category
= fcNormal
;
2834 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2835 dst
= significandParts();
2836 dstCount
= partCount();
2837 precision
= semantics
->precision
;
2839 /* We want the most significant PRECISION bits of SRC. There may not
2840 be that many; extract what we can. */
2841 if (precision
<= omsb
) {
2842 exponent
= omsb
- 1;
2843 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2845 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2847 exponent
= precision
- 1;
2848 lost_fraction
= lfExactlyZero
;
2849 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2852 return normalize(rounding_mode
, lost_fraction
);
2855 APFloat::opStatus
IEEEFloat::convertFromAPInt(const APInt
&Val
, bool isSigned
,
2856 roundingMode rounding_mode
) {
2857 unsigned int partCount
= Val
.getNumWords();
2861 if (isSigned
&& api
.isNegative()) {
2866 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2869 /* Convert a two's complement integer SRC to a floating point number,
2870 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2871 integer is signed, in which case it must be sign-extended. */
2873 IEEEFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2874 unsigned int srcCount
, bool isSigned
,
2875 roundingMode rounding_mode
) {
2879 APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2882 /* If we're signed and negative negate a copy. */
2884 copy
= new integerPart
[srcCount
];
2885 APInt::tcAssign(copy
, src
, srcCount
);
2886 APInt::tcNegate(copy
, srcCount
);
2887 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2891 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2897 /* FIXME: should this just take a const APInt reference? */
2899 IEEEFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2900 unsigned int width
, bool isSigned
,
2901 roundingMode rounding_mode
) {
2902 unsigned int partCount
= partCountForBits(width
);
2903 APInt api
= APInt(width
, ArrayRef(parts
, partCount
));
2906 if (isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2911 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2914 Expected
<APFloat::opStatus
>
2915 IEEEFloat::convertFromHexadecimalString(StringRef s
,
2916 roundingMode rounding_mode
) {
2917 lostFraction lost_fraction
= lfExactlyZero
;
2919 category
= fcNormal
;
2923 integerPart
*significand
= significandParts();
2924 unsigned partsCount
= partCount();
2925 unsigned bitPos
= partsCount
* integerPartWidth
;
2926 bool computedTrailingFraction
= false;
2928 // Skip leading zeroes and any (hexa)decimal point.
2929 StringRef::iterator begin
= s
.begin();
2930 StringRef::iterator end
= s
.end();
2931 StringRef::iterator dot
;
2932 auto PtrOrErr
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2934 return PtrOrErr
.takeError();
2935 StringRef::iterator p
= *PtrOrErr
;
2936 StringRef::iterator firstSignificantDigit
= p
;
2939 integerPart hex_value
;
2943 return createError("String contains multiple dots");
2948 hex_value
= hexDigitValue(*p
);
2949 if (hex_value
== UINT_MAX
)
2954 // Store the number while we have space.
2957 hex_value
<<= bitPos
% integerPartWidth
;
2958 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2959 } else if (!computedTrailingFraction
) {
2960 auto FractOrErr
= trailingHexadecimalFraction(p
, end
, hex_value
);
2962 return FractOrErr
.takeError();
2963 lost_fraction
= *FractOrErr
;
2964 computedTrailingFraction
= true;
2968 /* Hex floats require an exponent but not a hexadecimal point. */
2970 return createError("Hex strings require an exponent");
2971 if (*p
!= 'p' && *p
!= 'P')
2972 return createError("Invalid character in significand");
2974 return createError("Significand has no digits");
2975 if (dot
!= end
&& p
- begin
== 1)
2976 return createError("Significand has no digits");
2978 /* Ignore the exponent if we are zero. */
2979 if (p
!= firstSignificantDigit
) {
2982 /* Implicit hexadecimal point? */
2986 /* Calculate the exponent adjustment implicit in the number of
2987 significant digits. */
2988 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2989 if (expAdjustment
< 0)
2991 expAdjustment
= expAdjustment
* 4 - 1;
2993 /* Adjust for writing the significand starting at the most
2994 significant nibble. */
2995 expAdjustment
+= semantics
->precision
;
2996 expAdjustment
-= partsCount
* integerPartWidth
;
2998 /* Adjust for the given exponent. */
2999 auto ExpOrErr
= totalExponent(p
+ 1, end
, expAdjustment
);
3001 return ExpOrErr
.takeError();
3002 exponent
= *ExpOrErr
;
3005 return normalize(rounding_mode
, lost_fraction
);
3009 IEEEFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
3010 unsigned sigPartCount
, int exp
,
3011 roundingMode rounding_mode
) {
3012 unsigned int parts
, pow5PartCount
;
3013 fltSemantics calcSemantics
= { 32767, -32767, 0, 0 };
3014 integerPart pow5Parts
[maxPowerOfFiveParts
];
3017 isNearest
= (rounding_mode
== rmNearestTiesToEven
||
3018 rounding_mode
== rmNearestTiesToAway
);
3020 parts
= partCountForBits(semantics
->precision
+ 11);
3022 /* Calculate pow(5, abs(exp)). */
3023 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
3025 for (;; parts
*= 2) {
3026 opStatus sigStatus
, powStatus
;
3027 unsigned int excessPrecision
, truncatedBits
;
3029 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
3030 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
3031 truncatedBits
= excessPrecision
;
3033 IEEEFloat
decSig(calcSemantics
, uninitialized
);
3034 decSig
.makeZero(sign
);
3035 IEEEFloat
pow5(calcSemantics
);
3037 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
3038 rmNearestTiesToEven
);
3039 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
3040 rmNearestTiesToEven
);
3041 /* Add exp, as 10^n = 5^n * 2^n. */
3042 decSig
.exponent
+= exp
;
3044 lostFraction calcLostFraction
;
3045 integerPart HUerr
, HUdistance
;
3046 unsigned int powHUerr
;
3049 /* multiplySignificand leaves the precision-th bit set to 1. */
3050 calcLostFraction
= decSig
.multiplySignificand(pow5
);
3051 powHUerr
= powStatus
!= opOK
;
3053 calcLostFraction
= decSig
.divideSignificand(pow5
);
3054 /* Denormal numbers have less precision. */
3055 if (decSig
.exponent
< semantics
->minExponent
) {
3056 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
3057 truncatedBits
= excessPrecision
;
3058 if (excessPrecision
> calcSemantics
.precision
)
3059 excessPrecision
= calcSemantics
.precision
;
3061 /* Extra half-ulp lost in reciprocal of exponent. */
3062 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
3065 /* Both multiplySignificand and divideSignificand return the
3066 result with the integer bit set. */
3067 assert(APInt::tcExtractBit
3068 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
3070 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
3072 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
3073 excessPrecision
, isNearest
);
3075 /* Are we guaranteed to round correctly if we truncate? */
3076 if (HUdistance
>= HUerr
) {
3077 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
3078 calcSemantics
.precision
- excessPrecision
,
3080 /* Take the exponent of decSig. If we tcExtract-ed less bits
3081 above we must adjust our exponent to compensate for the
3082 implicit right shift. */
3083 exponent
= (decSig
.exponent
+ semantics
->precision
3084 - (calcSemantics
.precision
- excessPrecision
));
3085 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
3088 return normalize(rounding_mode
, calcLostFraction
);
3093 Expected
<APFloat::opStatus
>
3094 IEEEFloat::convertFromDecimalString(StringRef str
, roundingMode rounding_mode
) {
3098 /* Scan the text. */
3099 StringRef::iterator p
= str
.begin();
3100 if (Error Err
= interpretDecimal(p
, str
.end(), &D
))
3101 return std::move(Err
);
3103 /* Handle the quick cases. First the case of no significant digits,
3104 i.e. zero, and then exponents that are obviously too large or too
3105 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3106 definitely overflows if
3108 (exp - 1) * L >= maxExponent
3110 and definitely underflows to zero where
3112 (exp + 1) * L <= minExponent - precision
3114 With integer arithmetic the tightest bounds for L are
3116 93/28 < L < 196/59 [ numerator <= 256 ]
3117 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3120 // Test if we have a zero number allowing for strings with no null terminators
3121 // and zero decimals with non-zero exponents.
3123 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3124 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3125 // be at most one dot. On the other hand, if we have a zero with a non-zero
3126 // exponent, then we know that D.firstSigDigit will be non-numeric.
3127 if (D
.firstSigDigit
== str
.end() || decDigitValue(*D
.firstSigDigit
) >= 10U) {
3130 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
3132 if (!semantics
->hasZero
)
3133 makeSmallestNormalized(false);
3135 /* Check whether the normalized exponent is high enough to overflow
3136 max during the log-rebasing in the max-exponent check below. */
3137 } else if (D
.normalizedExponent
- 1 > INT_MAX
/ 42039) {
3138 fs
= handleOverflow(rounding_mode
);
3140 /* If it wasn't, then it also wasn't high enough to overflow max
3141 during the log-rebasing in the min-exponent check. Check that it
3142 won't overflow min in either check, then perform the min-exponent
3144 } else if (D
.normalizedExponent
- 1 < INT_MIN
/ 42039 ||
3145 (D
.normalizedExponent
+ 1) * 28738 <=
3146 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
3147 /* Underflow to zero and round. */
3148 category
= fcNormal
;
3150 fs
= normalize(rounding_mode
, lfLessThanHalf
);
3152 /* We can finally safely perform the max-exponent check. */
3153 } else if ((D
.normalizedExponent
- 1) * 42039
3154 >= 12655 * semantics
->maxExponent
) {
3155 /* Overflow and round. */
3156 fs
= handleOverflow(rounding_mode
);
3158 integerPart
*decSignificand
;
3159 unsigned int partCount
;
3161 /* A tight upper bound on number of bits required to hold an
3162 N-digit decimal integer is N * 196 / 59. Allocate enough space
3163 to hold the full significand, and an extra part required by
3165 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
3166 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
3167 decSignificand
= new integerPart
[partCount
+ 1];
3170 /* Convert to binary efficiently - we do almost all multiplication
3171 in an integerPart. When this would overflow do we do a single
3172 bignum multiplication, and then revert again to multiplication
3173 in an integerPart. */
3175 integerPart decValue
, val
, multiplier
;
3183 if (p
== str
.end()) {
3187 decValue
= decDigitValue(*p
++);
3188 if (decValue
>= 10U) {
3189 delete[] decSignificand
;
3190 return createError("Invalid character in significand");
3193 val
= val
* 10 + decValue
;
3194 /* The maximum number that can be multiplied by ten with any
3195 digit added without overflowing an integerPart. */
3196 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
3198 /* Multiply out the current part. */
3199 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
3200 partCount
, partCount
+ 1, false);
3202 /* If we used another part (likely but not guaranteed), increase
3204 if (decSignificand
[partCount
])
3206 } while (p
<= D
.lastSigDigit
);
3208 category
= fcNormal
;
3209 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
3210 D
.exponent
, rounding_mode
);
3212 delete [] decSignificand
;
3218 bool IEEEFloat::convertFromStringSpecials(StringRef str
) {
3219 const size_t MIN_NAME_SIZE
= 3;
3221 if (str
.size() < MIN_NAME_SIZE
)
3224 if (str
== "inf" || str
== "INFINITY" || str
== "+Inf") {
3229 bool IsNegative
= str
.front() == '-';
3231 str
= str
.drop_front();
3232 if (str
.size() < MIN_NAME_SIZE
)
3235 if (str
== "inf" || str
== "INFINITY" || str
== "Inf") {
3241 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3242 bool IsSignaling
= str
.front() == 's' || str
.front() == 'S';
3244 str
= str
.drop_front();
3245 if (str
.size() < MIN_NAME_SIZE
)
3249 if (str
.starts_with("nan") || str
.starts_with("NaN")) {
3250 str
= str
.drop_front(3);
3252 // A NaN without payload.
3254 makeNaN(IsSignaling
, IsNegative
);
3258 // Allow the payload to be inside parentheses.
3259 if (str
.front() == '(') {
3260 // Parentheses should be balanced (and not empty).
3261 if (str
.size() <= 2 || str
.back() != ')')
3264 str
= str
.slice(1, str
.size() - 1);
3267 // Determine the payload number's radix.
3268 unsigned Radix
= 10;
3269 if (str
[0] == '0') {
3270 if (str
.size() > 1 && tolower(str
[1]) == 'x') {
3271 str
= str
.drop_front(2);
3277 // Parse the payload and make the NaN.
3279 if (!str
.getAsInteger(Radix
, Payload
)) {
3280 makeNaN(IsSignaling
, IsNegative
, &Payload
);
3288 Expected
<APFloat::opStatus
>
3289 IEEEFloat::convertFromString(StringRef str
, roundingMode rounding_mode
) {
3291 return createError("Invalid string length");
3293 // Handle special cases.
3294 if (convertFromStringSpecials(str
))
3297 /* Handle a leading minus sign. */
3298 StringRef::iterator p
= str
.begin();
3299 size_t slen
= str
.size();
3300 sign
= *p
== '-' ? 1 : 0;
3301 if (sign
&& !semantics
->hasSignedRepr
)
3303 "This floating point format does not support signed values");
3305 if (*p
== '-' || *p
== '+') {
3309 return createError("String has no digits");
3312 if (slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
3314 return createError("Invalid string");
3315 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
3319 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
3322 /* Write out a hexadecimal representation of the floating point value
3323 to DST, which must be of sufficient size, in the C99 form
3324 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3325 excluding the terminating NUL.
3327 If UPPERCASE, the output is in upper case, otherwise in lower case.
3329 HEXDIGITS digits appear altogether, rounding the value if
3330 necessary. If HEXDIGITS is 0, the minimal precision to display the
3331 number precisely is used instead. If nothing would appear after
3332 the decimal point it is suppressed.
3334 The decimal exponent is always printed and has at least one digit.
3335 Zero values display an exponent of zero. Infinities and NaNs
3336 appear as "infinity" or "nan" respectively.
3338 The above rules are as specified by C99. There is ambiguity about
3339 what the leading hexadecimal digit should be. This implementation
3340 uses whatever is necessary so that the exponent is displayed as
3341 stored. This implies the exponent will fall within the IEEE format
3342 range, and the leading hexadecimal digit will be 0 (for denormals),
3343 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3344 any other digits zero).
3346 unsigned int IEEEFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
3348 roundingMode rounding_mode
) const {
3357 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
3358 dst
+= sizeof infinityL
- 1;
3362 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
3363 dst
+= sizeof NaNU
- 1;
3368 *dst
++ = upperCase
? 'X': 'x';
3370 if (hexDigits
> 1) {
3372 memset (dst
, '0', hexDigits
- 1);
3373 dst
+= hexDigits
- 1;
3375 *dst
++ = upperCase
? 'P': 'p';
3380 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
3386 return static_cast<unsigned int>(dst
- p
);
3389 /* Does the hard work of outputting the correctly rounded hexadecimal
3390 form of a normal floating point number with the specified number of
3391 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3392 digits necessary to print the value precisely is output. */
3393 char *IEEEFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
3395 roundingMode rounding_mode
) const {
3396 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
3397 const char *hexDigitChars
;
3398 const integerPart
*significand
;
3403 *dst
++ = upperCase
? 'X': 'x';
3406 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
3408 significand
= significandParts();
3409 partsCount
= partCount();
3411 /* +3 because the first digit only uses the single integer bit, so
3412 we have 3 virtual zero most-significant-bits. */
3413 valueBits
= semantics
->precision
+ 3;
3414 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
3416 /* The natural number of digits required ignoring trailing
3417 insignificant zeroes. */
3418 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
3420 /* hexDigits of zero means use the required number for the
3421 precision. Otherwise, see if we are truncating. If we are,
3422 find out if we need to round away from zero. */
3424 if (hexDigits
< outputDigits
) {
3425 /* We are dropping non-zero bits, so need to check how to round.
3426 "bits" is the number of dropped bits. */
3428 lostFraction fraction
;
3430 bits
= valueBits
- hexDigits
* 4;
3431 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
3432 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
3434 outputDigits
= hexDigits
;
3437 /* Write the digits consecutively, and start writing in the location
3438 of the hexadecimal point. We move the most significant digit
3439 left and add the hexadecimal point later. */
3442 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
3444 while (outputDigits
&& count
) {
3447 /* Put the most significant integerPartWidth bits in "part". */
3448 if (--count
== partsCount
)
3449 part
= 0; /* An imaginary higher zero part. */
3451 part
= significand
[count
] << shift
;
3454 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
3456 /* Convert as much of "part" to hexdigits as we can. */
3457 unsigned int curDigits
= integerPartWidth
/ 4;
3459 if (curDigits
> outputDigits
)
3460 curDigits
= outputDigits
;
3461 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
3462 outputDigits
-= curDigits
;
3468 /* Note that hexDigitChars has a trailing '0'. */
3471 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
3472 } while (*q
== '0');
3475 /* Add trailing zeroes. */
3476 memset (dst
, '0', outputDigits
);
3477 dst
+= outputDigits
;
3480 /* Move the most significant digit to before the point, and if there
3481 is something after the decimal point add it. This must come
3482 after rounding above. */
3489 /* Finally output the exponent. */
3490 *dst
++ = upperCase
? 'P': 'p';
3492 return writeSignedDecimal (dst
, exponent
);
3495 hash_code
hash_value(const IEEEFloat
&Arg
) {
3496 if (!Arg
.isFiniteNonZero())
3497 return hash_combine((uint8_t)Arg
.category
,
3498 // NaN has no sign, fix it at zero.
3499 Arg
.isNaN() ? (uint8_t)0 : (uint8_t)Arg
.sign
,
3500 Arg
.semantics
->precision
);
3502 // Normal floats need their exponent and significand hashed.
3503 return hash_combine((uint8_t)Arg
.category
, (uint8_t)Arg
.sign
,
3504 Arg
.semantics
->precision
, Arg
.exponent
,
3506 Arg
.significandParts(),
3507 Arg
.significandParts() + Arg
.partCount()));
3510 // Conversion from APFloat to/from host float/double. It may eventually be
3511 // possible to eliminate these and have everybody deal with APFloats, but that
3512 // will take a while. This approach will not easily extend to long double.
3513 // Current implementation requires integerPartWidth==64, which is correct at
3514 // the moment but could be made more general.
3516 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3517 // the actual IEEE respresentations. We compensate for that here.
3519 APInt
IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3520 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
);
3521 assert(partCount()==2);
3523 uint64_t myexponent
, mysignificand
;
3525 if (isFiniteNonZero()) {
3526 myexponent
= exponent
+16383; //bias
3527 mysignificand
= significandParts()[0];
3528 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
3529 myexponent
= 0; // denormal
3530 } else if (category
==fcZero
) {
3533 } else if (category
==fcInfinity
) {
3534 myexponent
= 0x7fff;
3535 mysignificand
= 0x8000000000000000ULL
;
3537 assert(category
== fcNaN
&& "Unknown category");
3538 myexponent
= 0x7fff;
3539 mysignificand
= significandParts()[0];
3543 words
[0] = mysignificand
;
3544 words
[1] = ((uint64_t)(sign
& 1) << 15) |
3545 (myexponent
& 0x7fffLL
);
3546 return APInt(80, words
);
3549 APInt
IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3550 assert(semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
);
3551 assert(partCount()==2);
3557 // Convert number to double. To avoid spurious underflows, we re-
3558 // normalize against the "double" minExponent first, and only *then*
3559 // truncate the mantissa. The result of that second conversion
3560 // may be inexact, but should never underflow.
3561 // Declare fltSemantics before APFloat that uses it (and
3562 // saves pointer to it) to ensure correct destruction order.
3563 fltSemantics extendedSemantics
= *semantics
;
3564 extendedSemantics
.minExponent
= semIEEEdouble
.minExponent
;
3565 IEEEFloat
extended(*this);
3566 fs
= extended
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
3567 assert(fs
== opOK
&& !losesInfo
);
3570 IEEEFloat
u(extended
);
3571 fs
= u
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
3572 assert(fs
== opOK
|| fs
== opInexact
);
3574 words
[0] = *u
.convertDoubleAPFloatToAPInt().getRawData();
3576 // If conversion was exact or resulted in a special case, we're done;
3577 // just set the second double to zero. Otherwise, re-convert back to
3578 // the extended format and compute the difference. This now should
3579 // convert exactly to double.
3580 if (u
.isFiniteNonZero() && losesInfo
) {
3581 fs
= u
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
3582 assert(fs
== opOK
&& !losesInfo
);
3585 IEEEFloat
v(extended
);
3586 v
.subtract(u
, rmNearestTiesToEven
);
3587 fs
= v
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
3588 assert(fs
== opOK
&& !losesInfo
);
3590 words
[1] = *v
.convertDoubleAPFloatToAPInt().getRawData();
3595 return APInt(128, words
);
3598 template <const fltSemantics
&S
>
3599 APInt
IEEEFloat::convertIEEEFloatToAPInt() const {
3600 assert(semantics
== &S
);
3602 (semantics
== &semFloat8E8M0FNU
) ? -S
.minExponent
: -(S
.minExponent
- 1);
3603 constexpr unsigned int trailing_significand_bits
= S
.precision
- 1;
3604 constexpr int integer_bit_part
= trailing_significand_bits
/ integerPartWidth
;
3605 constexpr integerPart integer_bit
=
3606 integerPart
{1} << (trailing_significand_bits
% integerPartWidth
);
3607 constexpr uint64_t significand_mask
= integer_bit
- 1;
3608 constexpr unsigned int exponent_bits
=
3609 trailing_significand_bits
? (S
.sizeInBits
- 1 - trailing_significand_bits
)
3611 static_assert(exponent_bits
< 64);
3612 constexpr uint64_t exponent_mask
= (uint64_t{1} << exponent_bits
) - 1;
3614 uint64_t myexponent
;
3615 std::array
<integerPart
, partCountForBits(trailing_significand_bits
)>
3618 if (isFiniteNonZero()) {
3619 myexponent
= exponent
+ bias
;
3620 std::copy_n(significandParts(), mysignificand
.size(),
3621 mysignificand
.begin());
3622 if (myexponent
== 1 &&
3623 !(significandParts()[integer_bit_part
] & integer_bit
))
3624 myexponent
= 0; // denormal
3625 } else if (category
== fcZero
) {
3627 llvm_unreachable("semantics does not support zero!");
3628 myexponent
= ::exponentZero(S
) + bias
;
3629 mysignificand
.fill(0);
3630 } else if (category
== fcInfinity
) {
3631 if (S
.nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
||
3632 S
.nonFiniteBehavior
== fltNonfiniteBehavior::FiniteOnly
)
3633 llvm_unreachable("semantics don't support inf!");
3634 myexponent
= ::exponentInf(S
) + bias
;
3635 mysignificand
.fill(0);
3637 assert(category
== fcNaN
&& "Unknown category!");
3638 if (S
.nonFiniteBehavior
== fltNonfiniteBehavior::FiniteOnly
)
3639 llvm_unreachable("semantics don't support NaN!");
3640 myexponent
= ::exponentNaN(S
) + bias
;
3641 std::copy_n(significandParts(), mysignificand
.size(),
3642 mysignificand
.begin());
3644 std::array
<uint64_t, (S
.sizeInBits
+ 63) / 64> words
;
3646 std::copy_n(mysignificand
.begin(), mysignificand
.size(), words
.begin());
3647 if constexpr (significand_mask
!= 0 || trailing_significand_bits
== 0) {
3648 // Clear the integer bit.
3649 words
[mysignificand
.size() - 1] &= significand_mask
;
3651 std::fill(words_iter
, words
.end(), uint64_t{0});
3652 constexpr size_t last_word
= words
.size() - 1;
3653 uint64_t shifted_sign
= static_cast<uint64_t>(sign
& 1)
3654 << ((S
.sizeInBits
- 1) % 64);
3655 words
[last_word
] |= shifted_sign
;
3656 uint64_t shifted_exponent
= (myexponent
& exponent_mask
)
3657 << (trailing_significand_bits
% 64);
3658 words
[last_word
] |= shifted_exponent
;
3659 if constexpr (last_word
== 0) {
3660 return APInt(S
.sizeInBits
, words
[0]);
3662 return APInt(S
.sizeInBits
, words
);
3665 APInt
IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3666 assert(partCount() == 2);
3667 return convertIEEEFloatToAPInt
<semIEEEquad
>();
3670 APInt
IEEEFloat::convertDoubleAPFloatToAPInt() const {
3671 assert(partCount()==1);
3672 return convertIEEEFloatToAPInt
<semIEEEdouble
>();
3675 APInt
IEEEFloat::convertFloatAPFloatToAPInt() const {
3676 assert(partCount()==1);
3677 return convertIEEEFloatToAPInt
<semIEEEsingle
>();
3680 APInt
IEEEFloat::convertBFloatAPFloatToAPInt() const {
3681 assert(partCount() == 1);
3682 return convertIEEEFloatToAPInt
<semBFloat
>();
3685 APInt
IEEEFloat::convertHalfAPFloatToAPInt() const {
3686 assert(partCount()==1);
3687 return convertIEEEFloatToAPInt
<semIEEEhalf
>();
3690 APInt
IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3691 assert(partCount() == 1);
3692 return convertIEEEFloatToAPInt
<semFloat8E5M2
>();
3695 APInt
IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3696 assert(partCount() == 1);
3697 return convertIEEEFloatToAPInt
<semFloat8E5M2FNUZ
>();
3700 APInt
IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3701 assert(partCount() == 1);
3702 return convertIEEEFloatToAPInt
<semFloat8E4M3
>();
3705 APInt
IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3706 assert(partCount() == 1);
3707 return convertIEEEFloatToAPInt
<semFloat8E4M3FN
>();
3710 APInt
IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3711 assert(partCount() == 1);
3712 return convertIEEEFloatToAPInt
<semFloat8E4M3FNUZ
>();
3715 APInt
IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3716 assert(partCount() == 1);
3717 return convertIEEEFloatToAPInt
<semFloat8E4M3B11FNUZ
>();
3720 APInt
IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3721 assert(partCount() == 1);
3722 return convertIEEEFloatToAPInt
<semFloat8E3M4
>();
3725 APInt
IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3726 assert(partCount() == 1);
3727 return convertIEEEFloatToAPInt
<semFloatTF32
>();
3730 APInt
IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3731 assert(partCount() == 1);
3732 return convertIEEEFloatToAPInt
<semFloat8E8M0FNU
>();
3735 APInt
IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3736 assert(partCount() == 1);
3737 return convertIEEEFloatToAPInt
<semFloat6E3M2FN
>();
3740 APInt
IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3741 assert(partCount() == 1);
3742 return convertIEEEFloatToAPInt
<semFloat6E2M3FN
>();
3745 APInt
IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3746 assert(partCount() == 1);
3747 return convertIEEEFloatToAPInt
<semFloat4E2M1FN
>();
3750 // This function creates an APInt that is just a bit map of the floating
3751 // point constant as it would appear in memory. It is not a conversion,
3752 // and treating the result as a normal integer is unlikely to be useful.
3754 APInt
IEEEFloat::bitcastToAPInt() const {
3755 if (semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
)
3756 return convertHalfAPFloatToAPInt();
3758 if (semantics
== (const llvm::fltSemantics
*)&semBFloat
)
3759 return convertBFloatAPFloatToAPInt();
3761 if (semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
)
3762 return convertFloatAPFloatToAPInt();
3764 if (semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
)
3765 return convertDoubleAPFloatToAPInt();
3767 if (semantics
== (const llvm::fltSemantics
*)&semIEEEquad
)
3768 return convertQuadrupleAPFloatToAPInt();
3770 if (semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
)
3771 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3773 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E5M2
)
3774 return convertFloat8E5M2APFloatToAPInt();
3776 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E5M2FNUZ
)
3777 return convertFloat8E5M2FNUZAPFloatToAPInt();
3779 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E4M3
)
3780 return convertFloat8E4M3APFloatToAPInt();
3782 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E4M3FN
)
3783 return convertFloat8E4M3FNAPFloatToAPInt();
3785 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E4M3FNUZ
)
3786 return convertFloat8E4M3FNUZAPFloatToAPInt();
3788 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E4M3B11FNUZ
)
3789 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3791 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E3M4
)
3792 return convertFloat8E3M4APFloatToAPInt();
3794 if (semantics
== (const llvm::fltSemantics
*)&semFloatTF32
)
3795 return convertFloatTF32APFloatToAPInt();
3797 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E8M0FNU
)
3798 return convertFloat8E8M0FNUAPFloatToAPInt();
3800 if (semantics
== (const llvm::fltSemantics
*)&semFloat6E3M2FN
)
3801 return convertFloat6E3M2FNAPFloatToAPInt();
3803 if (semantics
== (const llvm::fltSemantics
*)&semFloat6E2M3FN
)
3804 return convertFloat6E2M3FNAPFloatToAPInt();
3806 if (semantics
== (const llvm::fltSemantics
*)&semFloat4E2M1FN
)
3807 return convertFloat4E2M1FNAPFloatToAPInt();
3809 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
&&
3811 return convertF80LongDoubleAPFloatToAPInt();
3814 float IEEEFloat::convertToFloat() const {
3815 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
&&
3816 "Float semantics are not IEEEsingle");
3817 APInt api
= bitcastToAPInt();
3818 return api
.bitsToFloat();
3821 double IEEEFloat::convertToDouble() const {
3822 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
&&
3823 "Float semantics are not IEEEdouble");
3824 APInt api
= bitcastToAPInt();
3825 return api
.bitsToDouble();
3828 #ifdef HAS_IEE754_FLOAT128
3829 float128
IEEEFloat::convertToQuad() const {
3830 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEquad
&&
3831 "Float semantics are not IEEEquads");
3832 APInt api
= bitcastToAPInt();
3833 return api
.bitsToQuad();
3837 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3838 /// does not support these bit patterns:
3839 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3840 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3841 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3842 /// exponent = 0, integer bit 1 ("pseudodenormal")
3843 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3844 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt
&api
) {
3845 uint64_t i1
= api
.getRawData()[0];
3846 uint64_t i2
= api
.getRawData()[1];
3847 uint64_t myexponent
= (i2
& 0x7fff);
3848 uint64_t mysignificand
= i1
;
3849 uint8_t myintegerbit
= mysignificand
>> 63;
3851 initialize(&semX87DoubleExtended
);
3852 assert(partCount()==2);
3854 sign
= static_cast<unsigned int>(i2
>>15);
3855 if (myexponent
== 0 && mysignificand
== 0) {
3857 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
3859 } else if ((myexponent
== 0x7fff && mysignificand
!= 0x8000000000000000ULL
) ||
3860 (myexponent
!= 0x7fff && myexponent
!= 0 && myintegerbit
== 0)) {
3862 exponent
= exponentNaN();
3863 significandParts()[0] = mysignificand
;
3864 significandParts()[1] = 0;
3866 category
= fcNormal
;
3867 exponent
= myexponent
- 16383;
3868 significandParts()[0] = mysignificand
;
3869 significandParts()[1] = 0;
3870 if (myexponent
==0) // denormal
3875 void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt
&api
) {
3876 uint64_t i1
= api
.getRawData()[0];
3877 uint64_t i2
= api
.getRawData()[1];
3881 // Get the first double and convert to our format.
3882 initFromDoubleAPInt(APInt(64, i1
));
3883 fs
= convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3884 assert(fs
== opOK
&& !losesInfo
);
3887 // Unless we have a special case, add in second double.
3888 if (isFiniteNonZero()) {
3889 IEEEFloat
v(semIEEEdouble
, APInt(64, i2
));
3890 fs
= v
.convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3891 assert(fs
== opOK
&& !losesInfo
);
3894 add(v
, rmNearestTiesToEven
);
3898 // The E8M0 format has the following characteristics:
3899 // It is an 8-bit unsigned format with only exponents (no actual significand).
3900 // No encodings for {zero, infinities or denorms}.
3901 // NaN is represented by all 1's.
3903 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt
&api
) {
3904 const uint64_t exponent_mask
= 0xff;
3905 uint64_t val
= api
.getRawData()[0];
3906 uint64_t myexponent
= (val
& exponent_mask
);
3908 initialize(&semFloat8E8M0FNU
);
3909 assert(partCount() == 1);
3911 // This format has unsigned representation only
3914 // Set the significand
3915 // This format does not have any significand but the 'Pth' precision bit is
3916 // always set to 1 for consistency in APFloat's internal representation.
3917 uint64_t mysignificand
= 1;
3918 significandParts()[0] = mysignificand
;
3920 // This format can either have a NaN or fcNormal
3921 // All 1's i.e. 255 is a NaN
3922 if (val
== exponent_mask
) {
3924 exponent
= exponentNaN();
3927 // Handle fcNormal...
3928 category
= fcNormal
;
3929 exponent
= myexponent
- 127; // 127 is bias
3931 template <const fltSemantics
&S
>
3932 void IEEEFloat::initFromIEEEAPInt(const APInt
&api
) {
3933 assert(api
.getBitWidth() == S
.sizeInBits
);
3934 constexpr integerPart integer_bit
= integerPart
{1}
3935 << ((S
.precision
- 1) % integerPartWidth
);
3936 constexpr uint64_t significand_mask
= integer_bit
- 1;
3937 constexpr unsigned int trailing_significand_bits
= S
.precision
- 1;
3938 constexpr unsigned int stored_significand_parts
=
3939 partCountForBits(trailing_significand_bits
);
3940 constexpr unsigned int exponent_bits
=
3941 S
.sizeInBits
- 1 - trailing_significand_bits
;
3942 static_assert(exponent_bits
< 64);
3943 constexpr uint64_t exponent_mask
= (uint64_t{1} << exponent_bits
) - 1;
3944 constexpr int bias
= -(S
.minExponent
- 1);
3946 // Copy the bits of the significand. We need to clear out the exponent and
3947 // sign bit in the last word.
3948 std::array
<integerPart
, stored_significand_parts
> mysignificand
;
3949 std::copy_n(api
.getRawData(), mysignificand
.size(), mysignificand
.begin());
3950 if constexpr (significand_mask
!= 0) {
3951 mysignificand
[mysignificand
.size() - 1] &= significand_mask
;
3954 // We assume the last word holds the sign bit, the exponent, and potentially
3955 // some of the trailing significand field.
3956 uint64_t last_word
= api
.getRawData()[api
.getNumWords() - 1];
3957 uint64_t myexponent
=
3958 (last_word
>> (trailing_significand_bits
% 64)) & exponent_mask
;
3961 assert(partCount() == mysignificand
.size());
3963 sign
= static_cast<unsigned int>(last_word
>> ((S
.sizeInBits
- 1) % 64));
3965 bool all_zero_significand
=
3966 llvm::all_of(mysignificand
, [](integerPart bits
) { return bits
== 0; });
3968 bool is_zero
= myexponent
== 0 && all_zero_significand
;
3970 if constexpr (S
.nonFiniteBehavior
== fltNonfiniteBehavior::IEEE754
) {
3971 if (myexponent
- bias
== ::exponentInf(S
) && all_zero_significand
) {
3977 bool is_nan
= false;
3979 if constexpr (S
.nanEncoding
== fltNanEncoding::IEEE
) {
3980 is_nan
= myexponent
- bias
== ::exponentNaN(S
) && !all_zero_significand
;
3981 } else if constexpr (S
.nanEncoding
== fltNanEncoding::AllOnes
) {
3982 bool all_ones_significand
=
3983 std::all_of(mysignificand
.begin(), mysignificand
.end() - 1,
3984 [](integerPart bits
) { return bits
== ~integerPart
{0}; }) &&
3985 (!significand_mask
||
3986 mysignificand
[mysignificand
.size() - 1] == significand_mask
);
3987 is_nan
= myexponent
- bias
== ::exponentNaN(S
) && all_ones_significand
;
3988 } else if constexpr (S
.nanEncoding
== fltNanEncoding::NegativeZero
) {
3989 is_nan
= is_zero
&& sign
;
3994 exponent
= ::exponentNaN(S
);
3995 std::copy_n(mysignificand
.begin(), mysignificand
.size(),
3996 significandParts());
4005 category
= fcNormal
;
4006 exponent
= myexponent
- bias
;
4007 std::copy_n(mysignificand
.begin(), mysignificand
.size(), significandParts());
4008 if (myexponent
== 0) // denormal
4009 exponent
= S
.minExponent
;
4011 significandParts()[mysignificand
.size()-1] |= integer_bit
; // integer bit
4014 void IEEEFloat::initFromQuadrupleAPInt(const APInt
&api
) {
4015 initFromIEEEAPInt
<semIEEEquad
>(api
);
4018 void IEEEFloat::initFromDoubleAPInt(const APInt
&api
) {
4019 initFromIEEEAPInt
<semIEEEdouble
>(api
);
4022 void IEEEFloat::initFromFloatAPInt(const APInt
&api
) {
4023 initFromIEEEAPInt
<semIEEEsingle
>(api
);
4026 void IEEEFloat::initFromBFloatAPInt(const APInt
&api
) {
4027 initFromIEEEAPInt
<semBFloat
>(api
);
4030 void IEEEFloat::initFromHalfAPInt(const APInt
&api
) {
4031 initFromIEEEAPInt
<semIEEEhalf
>(api
);
4034 void IEEEFloat::initFromFloat8E5M2APInt(const APInt
&api
) {
4035 initFromIEEEAPInt
<semFloat8E5M2
>(api
);
4038 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt
&api
) {
4039 initFromIEEEAPInt
<semFloat8E5M2FNUZ
>(api
);
4042 void IEEEFloat::initFromFloat8E4M3APInt(const APInt
&api
) {
4043 initFromIEEEAPInt
<semFloat8E4M3
>(api
);
4046 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt
&api
) {
4047 initFromIEEEAPInt
<semFloat8E4M3FN
>(api
);
4050 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt
&api
) {
4051 initFromIEEEAPInt
<semFloat8E4M3FNUZ
>(api
);
4054 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt
&api
) {
4055 initFromIEEEAPInt
<semFloat8E4M3B11FNUZ
>(api
);
4058 void IEEEFloat::initFromFloat8E3M4APInt(const APInt
&api
) {
4059 initFromIEEEAPInt
<semFloat8E3M4
>(api
);
4062 void IEEEFloat::initFromFloatTF32APInt(const APInt
&api
) {
4063 initFromIEEEAPInt
<semFloatTF32
>(api
);
4066 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt
&api
) {
4067 initFromIEEEAPInt
<semFloat6E3M2FN
>(api
);
4070 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt
&api
) {
4071 initFromIEEEAPInt
<semFloat6E2M3FN
>(api
);
4074 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt
&api
) {
4075 initFromIEEEAPInt
<semFloat4E2M1FN
>(api
);
4078 /// Treat api as containing the bits of a floating point number.
4079 void IEEEFloat::initFromAPInt(const fltSemantics
*Sem
, const APInt
&api
) {
4080 assert(api
.getBitWidth() == Sem
->sizeInBits
);
4081 if (Sem
== &semIEEEhalf
)
4082 return initFromHalfAPInt(api
);
4083 if (Sem
== &semBFloat
)
4084 return initFromBFloatAPInt(api
);
4085 if (Sem
== &semIEEEsingle
)
4086 return initFromFloatAPInt(api
);
4087 if (Sem
== &semIEEEdouble
)
4088 return initFromDoubleAPInt(api
);
4089 if (Sem
== &semX87DoubleExtended
)
4090 return initFromF80LongDoubleAPInt(api
);
4091 if (Sem
== &semIEEEquad
)
4092 return initFromQuadrupleAPInt(api
);
4093 if (Sem
== &semPPCDoubleDoubleLegacy
)
4094 return initFromPPCDoubleDoubleLegacyAPInt(api
);
4095 if (Sem
== &semFloat8E5M2
)
4096 return initFromFloat8E5M2APInt(api
);
4097 if (Sem
== &semFloat8E5M2FNUZ
)
4098 return initFromFloat8E5M2FNUZAPInt(api
);
4099 if (Sem
== &semFloat8E4M3
)
4100 return initFromFloat8E4M3APInt(api
);
4101 if (Sem
== &semFloat8E4M3FN
)
4102 return initFromFloat8E4M3FNAPInt(api
);
4103 if (Sem
== &semFloat8E4M3FNUZ
)
4104 return initFromFloat8E4M3FNUZAPInt(api
);
4105 if (Sem
== &semFloat8E4M3B11FNUZ
)
4106 return initFromFloat8E4M3B11FNUZAPInt(api
);
4107 if (Sem
== &semFloat8E3M4
)
4108 return initFromFloat8E3M4APInt(api
);
4109 if (Sem
== &semFloatTF32
)
4110 return initFromFloatTF32APInt(api
);
4111 if (Sem
== &semFloat8E8M0FNU
)
4112 return initFromFloat8E8M0FNUAPInt(api
);
4113 if (Sem
== &semFloat6E3M2FN
)
4114 return initFromFloat6E3M2FNAPInt(api
);
4115 if (Sem
== &semFloat6E2M3FN
)
4116 return initFromFloat6E2M3FNAPInt(api
);
4117 if (Sem
== &semFloat4E2M1FN
)
4118 return initFromFloat4E2M1FNAPInt(api
);
4120 llvm_unreachable("unsupported semantics");
4123 /// Make this number the largest magnitude normal number in the given
4125 void IEEEFloat::makeLargest(bool Negative
) {
4126 if (Negative
&& !semantics
->hasSignedRepr
)
4128 "This floating point format does not support signed values");
4129 // We want (in interchange format):
4130 // sign = {Negative}
4132 // significand = 1..1
4133 category
= fcNormal
;
4135 exponent
= semantics
->maxExponent
;
4137 // Use memset to set all but the highest integerPart to all ones.
4138 integerPart
*significand
= significandParts();
4139 unsigned PartCount
= partCount();
4140 memset(significand
, 0xFF, sizeof(integerPart
)*(PartCount
- 1));
4142 // Set the high integerPart especially setting all unused top bits for
4143 // internal consistency.
4144 const unsigned NumUnusedHighBits
=
4145 PartCount
*integerPartWidth
- semantics
->precision
;
4146 significand
[PartCount
- 1] = (NumUnusedHighBits
< integerPartWidth
)
4147 ? (~integerPart(0) >> NumUnusedHighBits
)
4149 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
4150 semantics
->nanEncoding
== fltNanEncoding::AllOnes
&&
4151 (semantics
->precision
> 1))
4152 significand
[0] &= ~integerPart(1);
4155 /// Make this number the smallest magnitude denormal number in the given
4157 void IEEEFloat::makeSmallest(bool Negative
) {
4158 if (Negative
&& !semantics
->hasSignedRepr
)
4160 "This floating point format does not support signed values");
4161 // We want (in interchange format):
4162 // sign = {Negative}
4164 // significand = 0..01
4165 category
= fcNormal
;
4167 exponent
= semantics
->minExponent
;
4168 APInt::tcSet(significandParts(), 1, partCount());
4171 void IEEEFloat::makeSmallestNormalized(bool Negative
) {
4172 if (Negative
&& !semantics
->hasSignedRepr
)
4174 "This floating point format does not support signed values");
4175 // We want (in interchange format):
4176 // sign = {Negative}
4178 // significand = 10..0
4180 category
= fcNormal
;
4183 exponent
= semantics
->minExponent
;
4184 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
4187 IEEEFloat::IEEEFloat(const fltSemantics
&Sem
, const APInt
&API
) {
4188 initFromAPInt(&Sem
, API
);
4191 IEEEFloat::IEEEFloat(float f
) {
4192 initFromAPInt(&semIEEEsingle
, APInt::floatToBits(f
));
4195 IEEEFloat::IEEEFloat(double d
) {
4196 initFromAPInt(&semIEEEdouble
, APInt::doubleToBits(d
));
4200 void append(SmallVectorImpl
<char> &Buffer
, StringRef Str
) {
4201 Buffer
.append(Str
.begin(), Str
.end());
4204 /// Removes data from the given significand until it is no more
4205 /// precise than is required for the desired precision.
4206 void AdjustToPrecision(APInt
&significand
,
4207 int &exp
, unsigned FormatPrecision
) {
4208 unsigned bits
= significand
.getActiveBits();
4210 // 196/59 is a very slight overestimate of lg_2(10).
4211 unsigned bitsRequired
= (FormatPrecision
* 196 + 58) / 59;
4213 if (bits
<= bitsRequired
) return;
4215 unsigned tensRemovable
= (bits
- bitsRequired
) * 59 / 196;
4216 if (!tensRemovable
) return;
4218 exp
+= tensRemovable
;
4220 APInt
divisor(significand
.getBitWidth(), 1);
4221 APInt
powten(significand
.getBitWidth(), 10);
4223 if (tensRemovable
& 1)
4225 tensRemovable
>>= 1;
4226 if (!tensRemovable
) break;
4230 significand
= significand
.udiv(divisor
);
4232 // Truncate the significand down to its active bit count.
4233 significand
= significand
.trunc(significand
.getActiveBits());
4237 void AdjustToPrecision(SmallVectorImpl
<char> &buffer
,
4238 int &exp
, unsigned FormatPrecision
) {
4239 unsigned N
= buffer
.size();
4240 if (N
<= FormatPrecision
) return;
4242 // The most significant figures are the last ones in the buffer.
4243 unsigned FirstSignificant
= N
- FormatPrecision
;
4246 // FIXME: this probably shouldn't use 'round half up'.
4248 // Rounding down is just a truncation, except we also want to drop
4249 // trailing zeros from the new result.
4250 if (buffer
[FirstSignificant
- 1] < '5') {
4251 while (FirstSignificant
< N
&& buffer
[FirstSignificant
] == '0')
4254 exp
+= FirstSignificant
;
4255 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
4259 // Rounding up requires a decimal add-with-carry. If we continue
4260 // the carry, the newly-introduced zeros will just be truncated.
4261 for (unsigned I
= FirstSignificant
; I
!= N
; ++I
) {
4262 if (buffer
[I
] == '9') {
4270 // If we carried through, we have exactly one digit of precision.
4271 if (FirstSignificant
== N
) {
4272 exp
+= FirstSignificant
;
4274 buffer
.push_back('1');
4278 exp
+= FirstSignificant
;
4279 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
4282 void toStringImpl(SmallVectorImpl
<char> &Str
, const bool isNeg
, int exp
,
4283 APInt significand
, unsigned FormatPrecision
,
4284 unsigned FormatMaxPadding
, bool TruncateZero
) {
4285 const int semanticsPrecision
= significand
.getBitWidth();
4290 // Set FormatPrecision if zero. We want to do this before we
4291 // truncate trailing zeros, as those are part of the precision.
4292 if (!FormatPrecision
) {
4293 // We use enough digits so the number can be round-tripped back to an
4294 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4295 // Accurately" by Steele and White.
4296 // FIXME: Using a formula based purely on the precision is conservative;
4297 // we can print fewer digits depending on the actual value being printed.
4299 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4300 FormatPrecision
= 2 + semanticsPrecision
* 59 / 196;
4303 // Ignore trailing binary zeros.
4304 int trailingZeros
= significand
.countr_zero();
4305 exp
+= trailingZeros
;
4306 significand
.lshrInPlace(trailingZeros
);
4308 // Change the exponent from 2^e to 10^e.
4311 } else if (exp
> 0) {
4313 significand
= significand
.zext(semanticsPrecision
+ exp
);
4314 significand
<<= exp
;
4316 } else { /* exp < 0 */
4319 // We transform this using the identity:
4320 // (N)(2^-e) == (N)(5^e)(10^-e)
4321 // This means we have to multiply N (the significand) by 5^e.
4322 // To avoid overflow, we have to operate on numbers large
4323 // enough to store N * 5^e:
4324 // log2(N * 5^e) == log2(N) + e * log2(5)
4325 // <= semantics->precision + e * 137 / 59
4326 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4328 unsigned precision
= semanticsPrecision
+ (137 * texp
+ 136) / 59;
4330 // Multiply significand by 5^e.
4331 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4332 significand
= significand
.zext(precision
);
4333 APInt
five_to_the_i(precision
, 5);
4336 significand
*= five_to_the_i
;
4341 five_to_the_i
*= five_to_the_i
;
4345 AdjustToPrecision(significand
, exp
, FormatPrecision
);
4347 SmallVector
<char, 256> buffer
;
4350 unsigned precision
= significand
.getBitWidth();
4351 if (precision
< 4) {
4352 // We need enough precision to store the value 10.
4354 significand
= significand
.zext(precision
);
4356 APInt
ten(precision
, 10);
4357 APInt
digit(precision
, 0);
4359 bool inTrail
= true;
4360 while (significand
!= 0) {
4361 // digit <- significand % 10
4362 // significand <- significand / 10
4363 APInt::udivrem(significand
, ten
, significand
, digit
);
4365 unsigned d
= digit
.getZExtValue();
4367 // Drop trailing zeros.
4371 buffer
.push_back((char) ('0' + d
));
4376 assert(!buffer
.empty() && "no characters in buffer!");
4378 // Drop down to FormatPrecision.
4379 // TODO: don't do more precise calculations above than are required.
4380 AdjustToPrecision(buffer
, exp
, FormatPrecision
);
4382 unsigned NDigits
= buffer
.size();
4384 // Check whether we should use scientific notation.
4385 bool FormatScientific
;
4386 if (!FormatMaxPadding
)
4387 FormatScientific
= true;
4392 // But we shouldn't make the number look more precise than it is.
4393 FormatScientific
= ((unsigned) exp
> FormatMaxPadding
||
4394 NDigits
+ (unsigned) exp
> FormatPrecision
);
4396 // Power of the most significant digit.
4397 int MSD
= exp
+ (int) (NDigits
- 1);
4400 FormatScientific
= false;
4402 // 765e-5 == 0.00765
4404 FormatScientific
= ((unsigned) -MSD
) > FormatMaxPadding
;
4409 // Scientific formatting is pretty straightforward.
4410 if (FormatScientific
) {
4411 exp
+= (NDigits
- 1);
4413 Str
.push_back(buffer
[NDigits
-1]);
4415 if (NDigits
== 1 && TruncateZero
)
4418 for (unsigned I
= 1; I
!= NDigits
; ++I
)
4419 Str
.push_back(buffer
[NDigits
-1-I
]);
4420 // Fill with zeros up to FormatPrecision.
4421 if (!TruncateZero
&& FormatPrecision
> NDigits
- 1)
4422 Str
.append(FormatPrecision
- NDigits
+ 1, '0');
4423 // For !TruncateZero we use lower 'e'.
4424 Str
.push_back(TruncateZero
? 'E' : 'e');
4426 Str
.push_back(exp
>= 0 ? '+' : '-');
4429 SmallVector
<char, 6> expbuf
;
4431 expbuf
.push_back((char) ('0' + (exp
% 10)));
4434 // Exponent always at least two digits if we do not truncate zeros.
4435 if (!TruncateZero
&& expbuf
.size() < 2)
4436 expbuf
.push_back('0');
4437 for (unsigned I
= 0, E
= expbuf
.size(); I
!= E
; ++I
)
4438 Str
.push_back(expbuf
[E
-1-I
]);
4442 // Non-scientific, positive exponents.
4444 for (unsigned I
= 0; I
!= NDigits
; ++I
)
4445 Str
.push_back(buffer
[NDigits
-1-I
]);
4446 for (unsigned I
= 0; I
!= (unsigned) exp
; ++I
)
4451 // Non-scientific, negative exponents.
4453 // The number of digits to the left of the decimal point.
4454 int NWholeDigits
= exp
+ (int) NDigits
;
4457 if (NWholeDigits
> 0) {
4458 for (; I
!= (unsigned) NWholeDigits
; ++I
)
4459 Str
.push_back(buffer
[NDigits
-I
-1]);
4462 unsigned NZeros
= 1 + (unsigned) -NWholeDigits
;
4466 for (unsigned Z
= 1; Z
!= NZeros
; ++Z
)
4470 for (; I
!= NDigits
; ++I
)
4471 Str
.push_back(buffer
[NDigits
-I
-1]);
4476 void IEEEFloat::toString(SmallVectorImpl
<char> &Str
, unsigned FormatPrecision
,
4477 unsigned FormatMaxPadding
, bool TruncateZero
) const {
4481 return append(Str
, "-Inf");
4483 return append(Str
, "+Inf");
4485 case fcNaN
: return append(Str
, "NaN");
4491 if (!FormatMaxPadding
) {
4493 append(Str
, "0.0E+0");
4496 if (FormatPrecision
> 1)
4497 Str
.append(FormatPrecision
- 1, '0');
4498 append(Str
, "e+00");
4508 // Decompose the number into an APInt and an exponent.
4509 int exp
= exponent
- ((int) semantics
->precision
- 1);
4511 semantics
->precision
,
4512 ArrayRef(significandParts(), partCountForBits(semantics
->precision
)));
4514 toStringImpl(Str
, isNegative(), exp
, significand
, FormatPrecision
,
4515 FormatMaxPadding
, TruncateZero
);
4519 bool IEEEFloat::getExactInverse(APFloat
*inv
) const {
4520 // Special floats and denormals have no exact inverse.
4521 if (!isFiniteNonZero())
4524 // Check that the number is a power of two by making sure that only the
4525 // integer bit is set in the significand.
4526 if (significandLSB() != semantics
->precision
- 1)
4530 IEEEFloat
reciprocal(*semantics
, 1ULL);
4531 if (reciprocal
.divide(*this, rmNearestTiesToEven
) != opOK
)
4534 // Avoid multiplication with a denormal, it is not safe on all platforms and
4535 // may be slower than a normal division.
4536 if (reciprocal
.isDenormal())
4539 assert(reciprocal
.isFiniteNonZero() &&
4540 reciprocal
.significandLSB() == reciprocal
.semantics
->precision
- 1);
4543 *inv
= APFloat(reciprocal
, *semantics
);
4548 int IEEEFloat::getExactLog2Abs() const {
4549 if (!isFinite() || isZero())
4552 const integerPart
*Parts
= significandParts();
4553 const int PartCount
= partCountForBits(semantics
->precision
);
4556 for (int i
= 0; i
< PartCount
; ++i
) {
4557 PopCount
+= llvm::popcount(Parts
[i
]);
4562 if (exponent
!= semantics
->minExponent
)
4565 int CountrParts
= 0;
4566 for (int i
= 0; i
< PartCount
;
4567 ++i
, CountrParts
+= APInt::APINT_BITS_PER_WORD
) {
4568 if (Parts
[i
] != 0) {
4569 return exponent
- semantics
->precision
+ CountrParts
+
4570 llvm::countr_zero(Parts
[i
]) + 1;
4574 llvm_unreachable("didn't find the set bit");
4577 bool IEEEFloat::isSignaling() const {
4580 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
||
4581 semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::FiniteOnly
)
4584 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4585 // first bit of the trailing significand being 0.
4586 return !APInt::tcExtractBit(significandParts(), semantics
->precision
- 2);
4589 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4591 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4592 /// appropriate sign switching before/after the computation.
4593 APFloat::opStatus
IEEEFloat::next(bool nextDown
) {
4594 // If we are performing nextDown, swap sign so we have -x.
4598 // Compute nextUp(x)
4599 opStatus result
= opOK
;
4601 // Handle each float category separately.
4604 // nextUp(+inf) = +inf
4607 // nextUp(-inf) = -getLargest()
4611 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4612 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4613 // change the payload.
4614 if (isSignaling()) {
4615 result
= opInvalidOp
;
4616 // For consistency, propagate the sign of the sNaN to the qNaN.
4617 makeNaN(false, isNegative(), nullptr);
4621 // nextUp(pm 0) = +getSmallest()
4622 makeSmallest(false);
4625 // nextUp(-getSmallest()) = -0
4626 if (isSmallest() && isNegative()) {
4627 APInt::tcSet(significandParts(), 0, partCount());
4630 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
4632 if (!semantics
->hasZero
)
4633 makeSmallestNormalized(false);
4637 if (isLargest() && !isNegative()) {
4638 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
4639 // nextUp(getLargest()) == NAN
4642 } else if (semantics
->nonFiniteBehavior
==
4643 fltNonfiniteBehavior::FiniteOnly
) {
4644 // nextUp(getLargest()) == getLargest()
4647 // nextUp(getLargest()) == INFINITY
4648 APInt::tcSet(significandParts(), 0, partCount());
4649 category
= fcInfinity
;
4650 exponent
= semantics
->maxExponent
+ 1;
4655 // nextUp(normal) == normal + inc.
4657 // If we are negative, we need to decrement the significand.
4659 // We only cross a binade boundary that requires adjusting the exponent
4661 // 1. exponent != semantics->minExponent. This implies we are not in the
4662 // smallest binade or are dealing with denormals.
4663 // 2. Our significand excluding the integral bit is all zeros.
4664 bool WillCrossBinadeBoundary
=
4665 exponent
!= semantics
->minExponent
&& isSignificandAllZeros();
4667 // Decrement the significand.
4669 // We always do this since:
4670 // 1. If we are dealing with a non-binade decrement, by definition we
4671 // just decrement the significand.
4672 // 2. If we are dealing with a normal -> normal binade decrement, since
4673 // we have an explicit integral bit the fact that all bits but the
4674 // integral bit are zero implies that subtracting one will yield a
4675 // significand with 0 integral bit and 1 in all other spots. Thus we
4676 // must just adjust the exponent and set the integral bit to 1.
4677 // 3. If we are dealing with a normal -> denormal binade decrement,
4678 // since we set the integral bit to 0 when we represent denormals, we
4679 // just decrement the significand.
4680 integerPart
*Parts
= significandParts();
4681 APInt::tcDecrement(Parts
, partCount());
4683 if (WillCrossBinadeBoundary
) {
4684 // Our result is a normal number. Do the following:
4685 // 1. Set the integral bit to 1.
4686 // 2. Decrement the exponent.
4687 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
4691 // If we are positive, we need to increment the significand.
4693 // We only cross a binade boundary that requires adjusting the exponent if
4694 // the input is not a denormal and all of said input's significand bits
4695 // are set. If all of said conditions are true: clear the significand, set
4696 // the integral bit to 1, and increment the exponent. If we have a
4697 // denormal always increment since moving denormals and the numbers in the
4698 // smallest normal binade have the same exponent in our representation.
4699 // If there are only exponents, any increment always crosses the
4701 bool WillCrossBinadeBoundary
= !APFloat::hasSignificand(*semantics
) ||
4702 (!isDenormal() && isSignificandAllOnes());
4704 if (WillCrossBinadeBoundary
) {
4705 integerPart
*Parts
= significandParts();
4706 APInt::tcSet(Parts
, 0, partCount());
4707 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
4708 assert(exponent
!= semantics
->maxExponent
&&
4709 "We can not increment an exponent beyond the maxExponent allowed"
4710 " by the given floating point semantics.");
4713 incrementSignificand();
4719 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4726 APFloatBase::ExponentType
IEEEFloat::exponentNaN() const {
4727 return ::exponentNaN(*semantics
);
4730 APFloatBase::ExponentType
IEEEFloat::exponentInf() const {
4731 return ::exponentInf(*semantics
);
4734 APFloatBase::ExponentType
IEEEFloat::exponentZero() const {
4735 return ::exponentZero(*semantics
);
4738 void IEEEFloat::makeInf(bool Negative
) {
4739 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::FiniteOnly
)
4740 llvm_unreachable("This floating point format does not support Inf");
4742 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
4743 // There is no Inf, so make NaN instead.
4744 makeNaN(false, Negative
);
4747 category
= fcInfinity
;
4749 exponent
= exponentInf();
4750 APInt::tcSet(significandParts(), 0, partCount());
4753 void IEEEFloat::makeZero(bool Negative
) {
4754 if (!semantics
->hasZero
)
4755 llvm_unreachable("This floating point format does not support Zero");
4759 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
4760 // Merge negative zero to positive because 0b10000...000 is used for NaN
4763 exponent
= exponentZero();
4764 APInt::tcSet(significandParts(), 0, partCount());
4767 void IEEEFloat::makeQuiet() {
4769 if (semantics
->nonFiniteBehavior
!= fltNonfiniteBehavior::NanOnly
)
4770 APInt::tcSetBit(significandParts(), semantics
->precision
- 2);
4773 int ilogb(const IEEEFloat
&Arg
) {
4775 return APFloat::IEK_NaN
;
4777 return APFloat::IEK_Zero
;
4778 if (Arg
.isInfinity())
4779 return APFloat::IEK_Inf
;
4780 if (!Arg
.isDenormal())
4781 return Arg
.exponent
;
4783 IEEEFloat
Normalized(Arg
);
4784 int SignificandBits
= Arg
.getSemantics().precision
- 1;
4786 Normalized
.exponent
+= SignificandBits
;
4787 Normalized
.normalize(APFloat::rmNearestTiesToEven
, lfExactlyZero
);
4788 return Normalized
.exponent
- SignificandBits
;
4791 IEEEFloat
scalbn(IEEEFloat X
, int Exp
, roundingMode RoundingMode
) {
4792 auto MaxExp
= X
.getSemantics().maxExponent
;
4793 auto MinExp
= X
.getSemantics().minExponent
;
4795 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4796 // overflow; clamp it to a safe range before adding, but ensure that the range
4797 // is large enough that the clamp does not change the result. The range we
4798 // need to support is the difference between the largest possible exponent and
4799 // the normalized exponent of half the smallest denormal.
4801 int SignificandBits
= X
.getSemantics().precision
- 1;
4802 int MaxIncrement
= MaxExp
- (MinExp
- SignificandBits
) + 1;
4804 // Clamp to one past the range ends to let normalize handle overlflow.
4805 X
.exponent
+= std::clamp(Exp
, -MaxIncrement
- 1, MaxIncrement
);
4806 X
.normalize(RoundingMode
, lfExactlyZero
);
4812 IEEEFloat
frexp(const IEEEFloat
&Val
, int &Exp
, roundingMode RM
) {
4815 // Quiet signalling nans.
4816 if (Exp
== APFloat::IEK_NaN
) {
4817 IEEEFloat
Quiet(Val
);
4822 if (Exp
== APFloat::IEK_Inf
)
4825 // 1 is added because frexp is defined to return a normalized fraction in
4826 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4827 Exp
= Exp
== APFloat::IEK_Zero
? 0 : Exp
+ 1;
4828 return scalbn(Val
, -Exp
, RM
);
4831 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
)
4833 Floats(new APFloat
[2]{APFloat(semIEEEdouble
), APFloat(semIEEEdouble
)}) {
4834 assert(Semantics
== &semPPCDoubleDouble
);
4837 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, uninitializedTag
)
4839 Floats(new APFloat
[2]{APFloat(semIEEEdouble
, uninitialized
),
4840 APFloat(semIEEEdouble
, uninitialized
)}) {
4841 assert(Semantics
== &semPPCDoubleDouble
);
4844 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, integerPart I
)
4845 : Semantics(&S
), Floats(new APFloat
[2]{APFloat(semIEEEdouble
, I
),
4846 APFloat(semIEEEdouble
)}) {
4847 assert(Semantics
== &semPPCDoubleDouble
);
4850 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, const APInt
&I
)
4852 Floats(new APFloat
[2]{
4853 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[0])),
4854 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[1]))}) {
4855 assert(Semantics
== &semPPCDoubleDouble
);
4858 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, APFloat
&&First
,
4861 Floats(new APFloat
[2]{std::move(First
), std::move(Second
)}) {
4862 assert(Semantics
== &semPPCDoubleDouble
);
4863 assert(&Floats
[0].getSemantics() == &semIEEEdouble
);
4864 assert(&Floats
[1].getSemantics() == &semIEEEdouble
);
4867 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat
&RHS
)
4868 : Semantics(RHS
.Semantics
),
4869 Floats(RHS
.Floats
? new APFloat
[2]{APFloat(RHS
.Floats
[0]),
4870 APFloat(RHS
.Floats
[1])}
4872 assert(Semantics
== &semPPCDoubleDouble
);
4875 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat
&&RHS
)
4876 : Semantics(RHS
.Semantics
), Floats(std::move(RHS
.Floats
)) {
4877 RHS
.Semantics
= &semBogus
;
4878 assert(Semantics
== &semPPCDoubleDouble
);
4881 DoubleAPFloat
&DoubleAPFloat::operator=(const DoubleAPFloat
&RHS
) {
4882 if (Semantics
== RHS
.Semantics
&& RHS
.Floats
) {
4883 Floats
[0] = RHS
.Floats
[0];
4884 Floats
[1] = RHS
.Floats
[1];
4885 } else if (this != &RHS
) {
4886 this->~DoubleAPFloat();
4887 new (this) DoubleAPFloat(RHS
);
4892 // Implement addition, subtraction, multiplication and division based on:
4893 // "Software for Doubled-Precision Floating-Point Computations",
4894 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4895 APFloat::opStatus
DoubleAPFloat::addImpl(const APFloat
&a
, const APFloat
&aa
,
4896 const APFloat
&c
, const APFloat
&cc
,
4900 Status
|= z
.add(c
, RM
);
4901 if (!z
.isFinite()) {
4902 if (!z
.isInfinity()) {
4903 Floats
[0] = std::move(z
);
4904 Floats
[1].makeZero(/* Neg = */ false);
4905 return (opStatus
)Status
;
4908 auto AComparedToC
= a
.compareAbsoluteValue(c
);
4910 Status
|= z
.add(aa
, RM
);
4911 if (AComparedToC
== APFloat::cmpGreaterThan
) {
4912 // z = cc + aa + c + a;
4913 Status
|= z
.add(c
, RM
);
4914 Status
|= z
.add(a
, RM
);
4916 // z = cc + aa + a + c;
4917 Status
|= z
.add(a
, RM
);
4918 Status
|= z
.add(c
, RM
);
4920 if (!z
.isFinite()) {
4921 Floats
[0] = std::move(z
);
4922 Floats
[1].makeZero(/* Neg = */ false);
4923 return (opStatus
)Status
;
4927 Status
|= zz
.add(cc
, RM
);
4928 if (AComparedToC
== APFloat::cmpGreaterThan
) {
4929 // Floats[1] = a - z + c + zz;
4931 Status
|= Floats
[1].subtract(z
, RM
);
4932 Status
|= Floats
[1].add(c
, RM
);
4933 Status
|= Floats
[1].add(zz
, RM
);
4935 // Floats[1] = c - z + a + zz;
4937 Status
|= Floats
[1].subtract(z
, RM
);
4938 Status
|= Floats
[1].add(a
, RM
);
4939 Status
|= Floats
[1].add(zz
, RM
);
4944 Status
|= q
.subtract(z
, RM
);
4946 // zz = q + c + (a - (q + z)) + aa + cc;
4947 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4949 Status
|= zz
.add(c
, RM
);
4950 Status
|= q
.add(z
, RM
);
4951 Status
|= q
.subtract(a
, RM
);
4953 Status
|= zz
.add(q
, RM
);
4954 Status
|= zz
.add(aa
, RM
);
4955 Status
|= zz
.add(cc
, RM
);
4956 if (zz
.isZero() && !zz
.isNegative()) {
4957 Floats
[0] = std::move(z
);
4958 Floats
[1].makeZero(/* Neg = */ false);
4962 Status
|= Floats
[0].add(zz
, RM
);
4963 if (!Floats
[0].isFinite()) {
4964 Floats
[1].makeZero(/* Neg = */ false);
4965 return (opStatus
)Status
;
4967 Floats
[1] = std::move(z
);
4968 Status
|= Floats
[1].subtract(Floats
[0], RM
);
4969 Status
|= Floats
[1].add(zz
, RM
);
4971 return (opStatus
)Status
;
4974 APFloat::opStatus
DoubleAPFloat::addWithSpecial(const DoubleAPFloat
&LHS
,
4975 const DoubleAPFloat
&RHS
,
4978 if (LHS
.getCategory() == fcNaN
) {
4982 if (RHS
.getCategory() == fcNaN
) {
4986 if (LHS
.getCategory() == fcZero
) {
4990 if (RHS
.getCategory() == fcZero
) {
4994 if (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcInfinity
&&
4995 LHS
.isNegative() != RHS
.isNegative()) {
4996 Out
.makeNaN(false, Out
.isNegative(), nullptr);
4999 if (LHS
.getCategory() == fcInfinity
) {
5003 if (RHS
.getCategory() == fcInfinity
) {
5007 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
);
5009 APFloat
A(LHS
.Floats
[0]), AA(LHS
.Floats
[1]), C(RHS
.Floats
[0]),
5011 assert(&A
.getSemantics() == &semIEEEdouble
);
5012 assert(&AA
.getSemantics() == &semIEEEdouble
);
5013 assert(&C
.getSemantics() == &semIEEEdouble
);
5014 assert(&CC
.getSemantics() == &semIEEEdouble
);
5015 assert(&Out
.Floats
[0].getSemantics() == &semIEEEdouble
);
5016 assert(&Out
.Floats
[1].getSemantics() == &semIEEEdouble
);
5017 return Out
.addImpl(A
, AA
, C
, CC
, RM
);
5020 APFloat::opStatus
DoubleAPFloat::add(const DoubleAPFloat
&RHS
,
5022 return addWithSpecial(*this, RHS
, *this, RM
);
5025 APFloat::opStatus
DoubleAPFloat::subtract(const DoubleAPFloat
&RHS
,
5028 auto Ret
= add(RHS
, RM
);
5033 APFloat::opStatus
DoubleAPFloat::multiply(const DoubleAPFloat
&RHS
,
5034 APFloat::roundingMode RM
) {
5035 const auto &LHS
= *this;
5037 /* Interesting observation: For special categories, finding the lowest
5038 common ancestor of the following layered graph gives the correct
5047 e.g. NaN * NaN = NaN
5049 Normal * Zero = Zero
5052 if (LHS
.getCategory() == fcNaN
) {
5056 if (RHS
.getCategory() == fcNaN
) {
5060 if ((LHS
.getCategory() == fcZero
&& RHS
.getCategory() == fcInfinity
) ||
5061 (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcZero
)) {
5062 Out
.makeNaN(false, false, nullptr);
5065 if (LHS
.getCategory() == fcZero
|| LHS
.getCategory() == fcInfinity
) {
5069 if (RHS
.getCategory() == fcZero
|| RHS
.getCategory() == fcInfinity
) {
5073 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
&&
5074 "Special cases not handled exhaustively");
5077 APFloat A
= Floats
[0], B
= Floats
[1], C
= RHS
.Floats
[0], D
= RHS
.Floats
[1];
5080 Status
|= T
.multiply(C
, RM
);
5081 if (!T
.isFiniteNonZero()) {
5083 Floats
[1].makeZero(/* Neg = */ false);
5084 return (opStatus
)Status
;
5087 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5090 Status
|= Tau
.fusedMultiplyAdd(C
, T
, RM
);
5095 Status
|= V
.multiply(D
, RM
);
5098 Status
|= W
.multiply(C
, RM
);
5099 Status
|= V
.add(W
, RM
);
5101 Status
|= Tau
.add(V
, RM
);
5105 Status
|= U
.add(Tau
, RM
);
5108 if (!U
.isFinite()) {
5109 Floats
[1].makeZero(/* Neg = */ false);
5111 // Floats[1] = (t - u) + tau
5112 Status
|= T
.subtract(U
, RM
);
5113 Status
|= T
.add(Tau
, RM
);
5116 return (opStatus
)Status
;
5119 APFloat::opStatus
DoubleAPFloat::divide(const DoubleAPFloat
&RHS
,
5120 APFloat::roundingMode RM
) {
5121 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5122 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5124 Tmp
.divide(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()), RM
);
5125 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5129 APFloat::opStatus
DoubleAPFloat::remainder(const DoubleAPFloat
&RHS
) {
5130 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5131 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5133 Tmp
.remainder(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
5134 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5138 APFloat::opStatus
DoubleAPFloat::mod(const DoubleAPFloat
&RHS
) {
5139 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5140 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5141 auto Ret
= Tmp
.mod(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
5142 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5147 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat
&Multiplicand
,
5148 const DoubleAPFloat
&Addend
,
5149 APFloat::roundingMode RM
) {
5150 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5151 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5152 auto Ret
= Tmp
.fusedMultiplyAdd(
5153 APFloat(semPPCDoubleDoubleLegacy
, Multiplicand
.bitcastToAPInt()),
5154 APFloat(semPPCDoubleDoubleLegacy
, Addend
.bitcastToAPInt()), RM
);
5155 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5159 APFloat::opStatus
DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM
) {
5160 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5161 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5162 auto Ret
= Tmp
.roundToIntegral(RM
);
5163 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5167 void DoubleAPFloat::changeSign() {
5168 Floats
[0].changeSign();
5169 Floats
[1].changeSign();
5173 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat
&RHS
) const {
5174 auto Result
= Floats
[0].compareAbsoluteValue(RHS
.Floats
[0]);
5175 if (Result
!= cmpEqual
)
5177 Result
= Floats
[1].compareAbsoluteValue(RHS
.Floats
[1]);
5178 if (Result
== cmpLessThan
|| Result
== cmpGreaterThan
) {
5179 auto Against
= Floats
[0].isNegative() ^ Floats
[1].isNegative();
5180 auto RHSAgainst
= RHS
.Floats
[0].isNegative() ^ RHS
.Floats
[1].isNegative();
5181 if (Against
&& !RHSAgainst
)
5183 if (!Against
&& RHSAgainst
)
5184 return cmpGreaterThan
;
5185 if (!Against
&& !RHSAgainst
)
5187 if (Against
&& RHSAgainst
)
5188 return (cmpResult
)(cmpLessThan
+ cmpGreaterThan
- Result
);
5193 APFloat::fltCategory
DoubleAPFloat::getCategory() const {
5194 return Floats
[0].getCategory();
5197 bool DoubleAPFloat::isNegative() const { return Floats
[0].isNegative(); }
5199 void DoubleAPFloat::makeInf(bool Neg
) {
5200 Floats
[0].makeInf(Neg
);
5201 Floats
[1].makeZero(/* Neg = */ false);
5204 void DoubleAPFloat::makeZero(bool Neg
) {
5205 Floats
[0].makeZero(Neg
);
5206 Floats
[1].makeZero(/* Neg = */ false);
5209 void DoubleAPFloat::makeLargest(bool Neg
) {
5210 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5211 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x7fefffffffffffffull
));
5212 Floats
[1] = APFloat(semIEEEdouble
, APInt(64, 0x7c8ffffffffffffeull
));
5217 void DoubleAPFloat::makeSmallest(bool Neg
) {
5218 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5219 Floats
[0].makeSmallest(Neg
);
5220 Floats
[1].makeZero(/* Neg = */ false);
5223 void DoubleAPFloat::makeSmallestNormalized(bool Neg
) {
5224 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5225 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x0360000000000000ull
));
5227 Floats
[0].changeSign();
5228 Floats
[1].makeZero(/* Neg = */ false);
5231 void DoubleAPFloat::makeNaN(bool SNaN
, bool Neg
, const APInt
*fill
) {
5232 Floats
[0].makeNaN(SNaN
, Neg
, fill
);
5233 Floats
[1].makeZero(/* Neg = */ false);
5236 APFloat::cmpResult
DoubleAPFloat::compare(const DoubleAPFloat
&RHS
) const {
5237 auto Result
= Floats
[0].compare(RHS
.Floats
[0]);
5238 // |Float[0]| > |Float[1]|
5239 if (Result
== APFloat::cmpEqual
)
5240 return Floats
[1].compare(RHS
.Floats
[1]);
5244 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat
&RHS
) const {
5245 return Floats
[0].bitwiseIsEqual(RHS
.Floats
[0]) &&
5246 Floats
[1].bitwiseIsEqual(RHS
.Floats
[1]);
5249 hash_code
hash_value(const DoubleAPFloat
&Arg
) {
5251 return hash_combine(hash_value(Arg
.Floats
[0]), hash_value(Arg
.Floats
[1]));
5252 return hash_combine(Arg
.Semantics
);
5255 APInt
DoubleAPFloat::bitcastToAPInt() const {
5256 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5258 Floats
[0].bitcastToAPInt().getRawData()[0],
5259 Floats
[1].bitcastToAPInt().getRawData()[0],
5261 return APInt(128, 2, Data
);
5264 Expected
<APFloat::opStatus
> DoubleAPFloat::convertFromString(StringRef S
,
5266 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5267 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
5268 auto Ret
= Tmp
.convertFromString(S
, RM
);
5269 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5273 APFloat::opStatus
DoubleAPFloat::next(bool nextDown
) {
5274 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5275 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5276 auto Ret
= Tmp
.next(nextDown
);
5277 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5282 DoubleAPFloat::convertToInteger(MutableArrayRef
<integerPart
> Input
,
5283 unsigned int Width
, bool IsSigned
,
5284 roundingMode RM
, bool *IsExact
) const {
5285 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5286 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
5287 .convertToInteger(Input
, Width
, IsSigned
, RM
, IsExact
);
5290 APFloat::opStatus
DoubleAPFloat::convertFromAPInt(const APInt
&Input
,
5293 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5294 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
5295 auto Ret
= Tmp
.convertFromAPInt(Input
, IsSigned
, RM
);
5296 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5301 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart
*Input
,
5302 unsigned int InputSize
,
5303 bool IsSigned
, roundingMode RM
) {
5304 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5305 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
5306 auto Ret
= Tmp
.convertFromSignExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
5307 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5312 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart
*Input
,
5313 unsigned int InputSize
,
5314 bool IsSigned
, roundingMode RM
) {
5315 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5316 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
5317 auto Ret
= Tmp
.convertFromZeroExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
5318 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5322 unsigned int DoubleAPFloat::convertToHexString(char *DST
,
5323 unsigned int HexDigits
,
5325 roundingMode RM
) const {
5326 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5327 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
5328 .convertToHexString(DST
, HexDigits
, UpperCase
, RM
);
5331 bool DoubleAPFloat::isDenormal() const {
5332 return getCategory() == fcNormal
&&
5333 (Floats
[0].isDenormal() || Floats
[1].isDenormal() ||
5334 // (double)(Hi + Lo) == Hi defines a normal number.
5335 Floats
[0] != Floats
[0] + Floats
[1]);
5338 bool DoubleAPFloat::isSmallest() const {
5339 if (getCategory() != fcNormal
)
5341 DoubleAPFloat
Tmp(*this);
5342 Tmp
.makeSmallest(this->isNegative());
5343 return Tmp
.compare(*this) == cmpEqual
;
5346 bool DoubleAPFloat::isSmallestNormalized() const {
5347 if (getCategory() != fcNormal
)
5350 DoubleAPFloat
Tmp(*this);
5351 Tmp
.makeSmallestNormalized(this->isNegative());
5352 return Tmp
.compare(*this) == cmpEqual
;
5355 bool DoubleAPFloat::isLargest() const {
5356 if (getCategory() != fcNormal
)
5358 DoubleAPFloat
Tmp(*this);
5359 Tmp
.makeLargest(this->isNegative());
5360 return Tmp
.compare(*this) == cmpEqual
;
5363 bool DoubleAPFloat::isInteger() const {
5364 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5365 return Floats
[0].isInteger() && Floats
[1].isInteger();
5368 void DoubleAPFloat::toString(SmallVectorImpl
<char> &Str
,
5369 unsigned FormatPrecision
,
5370 unsigned FormatMaxPadding
,
5371 bool TruncateZero
) const {
5372 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5373 APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
5374 .toString(Str
, FormatPrecision
, FormatMaxPadding
, TruncateZero
);
5377 bool DoubleAPFloat::getExactInverse(APFloat
*inv
) const {
5378 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5379 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5381 return Tmp
.getExactInverse(nullptr);
5382 APFloat
Inv(semPPCDoubleDoubleLegacy
);
5383 auto Ret
= Tmp
.getExactInverse(&Inv
);
5384 *inv
= APFloat(semPPCDoubleDouble
, Inv
.bitcastToAPInt());
5388 int DoubleAPFloat::getExactLog2() const {
5389 // TODO: Implement me
5393 int DoubleAPFloat::getExactLog2Abs() const {
5394 // TODO: Implement me
5398 DoubleAPFloat
scalbn(const DoubleAPFloat
&Arg
, int Exp
,
5399 APFloat::roundingMode RM
) {
5400 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5401 return DoubleAPFloat(semPPCDoubleDouble
, scalbn(Arg
.Floats
[0], Exp
, RM
),
5402 scalbn(Arg
.Floats
[1], Exp
, RM
));
5405 DoubleAPFloat
frexp(const DoubleAPFloat
&Arg
, int &Exp
,
5406 APFloat::roundingMode RM
) {
5407 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5408 APFloat First
= frexp(Arg
.Floats
[0], Exp
, RM
);
5409 APFloat Second
= Arg
.Floats
[1];
5410 if (Arg
.getCategory() == APFloat::fcNormal
)
5411 Second
= scalbn(Second
, -Exp
, RM
);
5412 return DoubleAPFloat(semPPCDoubleDouble
, std::move(First
), std::move(Second
));
5415 } // namespace detail
5417 APFloat::Storage::Storage(IEEEFloat F
, const fltSemantics
&Semantics
) {
5418 if (usesLayout
<IEEEFloat
>(Semantics
)) {
5419 new (&IEEE
) IEEEFloat(std::move(F
));
5422 if (usesLayout
<DoubleAPFloat
>(Semantics
)) {
5423 const fltSemantics
& S
= F
.getSemantics();
5425 DoubleAPFloat(Semantics
, APFloat(std::move(F
), S
),
5426 APFloat(semIEEEdouble
));
5429 llvm_unreachable("Unexpected semantics");
5432 Expected
<APFloat::opStatus
> APFloat::convertFromString(StringRef Str
,
5434 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str
, RM
));
5437 hash_code
hash_value(const APFloat
&Arg
) {
5438 if (APFloat::usesLayout
<detail::IEEEFloat
>(Arg
.getSemantics()))
5439 return hash_value(Arg
.U
.IEEE
);
5440 if (APFloat::usesLayout
<detail::DoubleAPFloat
>(Arg
.getSemantics()))
5441 return hash_value(Arg
.U
.Double
);
5442 llvm_unreachable("Unexpected semantics");
5445 APFloat::APFloat(const fltSemantics
&Semantics
, StringRef S
)
5446 : APFloat(Semantics
) {
5447 auto StatusOrErr
= convertFromString(S
, rmNearestTiesToEven
);
5448 assert(StatusOrErr
&& "Invalid floating point representation");
5449 consumeError(StatusOrErr
.takeError());
5452 FPClassTest
APFloat::classify() const {
5454 return isNegative() ? fcNegZero
: fcPosZero
;
5456 return isNegative() ? fcNegNormal
: fcPosNormal
;
5458 return isNegative() ? fcNegSubnormal
: fcPosSubnormal
;
5460 return isNegative() ? fcNegInf
: fcPosInf
;
5461 assert(isNaN() && "Other class of FP constant");
5462 return isSignaling() ? fcSNan
: fcQNan
;
5465 APFloat::opStatus
APFloat::convert(const fltSemantics
&ToSemantics
,
5466 roundingMode RM
, bool *losesInfo
) {
5467 if (&getSemantics() == &ToSemantics
) {
5471 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
5472 usesLayout
<IEEEFloat
>(ToSemantics
))
5473 return U
.IEEE
.convert(ToSemantics
, RM
, losesInfo
);
5474 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
5475 usesLayout
<DoubleAPFloat
>(ToSemantics
)) {
5476 assert(&ToSemantics
== &semPPCDoubleDouble
);
5477 auto Ret
= U
.IEEE
.convert(semPPCDoubleDoubleLegacy
, RM
, losesInfo
);
5478 *this = APFloat(ToSemantics
, U
.IEEE
.bitcastToAPInt());
5481 if (usesLayout
<DoubleAPFloat
>(getSemantics()) &&
5482 usesLayout
<IEEEFloat
>(ToSemantics
)) {
5483 auto Ret
= getIEEE().convert(ToSemantics
, RM
, losesInfo
);
5484 *this = APFloat(std::move(getIEEE()), ToSemantics
);
5487 llvm_unreachable("Unexpected semantics");
5490 APFloat
APFloat::getAllOnesValue(const fltSemantics
&Semantics
) {
5491 return APFloat(Semantics
, APInt::getAllOnes(Semantics
.sizeInBits
));
5494 void APFloat::print(raw_ostream
&OS
) const {
5495 SmallVector
<char, 16> Buffer
;
5500 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5501 LLVM_DUMP_METHOD
void APFloat::dump() const {
5507 void APFloat::Profile(FoldingSetNodeID
&NID
) const {
5508 NID
.Add(bitcastToAPInt());
5511 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5512 an APSInt, whose initial bit-width and signed-ness are used to determine the
5513 precision of the conversion.
5515 APFloat::opStatus
APFloat::convertToInteger(APSInt
&result
,
5516 roundingMode rounding_mode
,
5517 bool *isExact
) const {
5518 unsigned bitWidth
= result
.getBitWidth();
5519 SmallVector
<uint64_t, 4> parts(result
.getNumWords());
5520 opStatus status
= convertToInteger(parts
, bitWidth
, result
.isSigned(),
5521 rounding_mode
, isExact
);
5522 // Keeps the original signed-ness.
5523 result
= APInt(bitWidth
, parts
);
5527 double APFloat::convertToDouble() const {
5528 if (&getSemantics() == (const llvm::fltSemantics
*)&semIEEEdouble
)
5529 return getIEEE().convertToDouble();
5530 assert(getSemantics().isRepresentableBy(semIEEEdouble
) &&
5531 "Float semantics is not representable by IEEEdouble");
5532 APFloat Temp
= *this;
5534 opStatus St
= Temp
.convert(semIEEEdouble
, rmNearestTiesToEven
, &LosesInfo
);
5535 assert(!(St
& opInexact
) && !LosesInfo
&& "Unexpected imprecision");
5537 return Temp
.getIEEE().convertToDouble();
5540 #ifdef HAS_IEE754_FLOAT128
5541 float128
APFloat::convertToQuad() const {
5542 if (&getSemantics() == (const llvm::fltSemantics
*)&semIEEEquad
)
5543 return getIEEE().convertToQuad();
5544 assert(getSemantics().isRepresentableBy(semIEEEquad
) &&
5545 "Float semantics is not representable by IEEEquad");
5546 APFloat Temp
= *this;
5548 opStatus St
= Temp
.convert(semIEEEquad
, rmNearestTiesToEven
, &LosesInfo
);
5549 assert(!(St
& opInexact
) && !LosesInfo
&& "Unexpected imprecision");
5551 return Temp
.getIEEE().convertToQuad();
5555 float APFloat::convertToFloat() const {
5556 if (&getSemantics() == (const llvm::fltSemantics
*)&semIEEEsingle
)
5557 return getIEEE().convertToFloat();
5558 assert(getSemantics().isRepresentableBy(semIEEEsingle
) &&
5559 "Float semantics is not representable by IEEEsingle");
5560 APFloat Temp
= *this;
5562 opStatus St
= Temp
.convert(semIEEEsingle
, rmNearestTiesToEven
, &LosesInfo
);
5563 assert(!(St
& opInexact
) && !LosesInfo
&& "Unexpected imprecision");
5565 return Temp
.getIEEE().convertToFloat();
5570 #undef APFLOAT_DISPATCH_ON_SEMANTICS