[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / Support / APFloat.cpp
blob0a4f5ac01553f10272257492af7a70f2d01953f7
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
40 using namespace llvm;
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
50 /* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
54 namespace llvm {
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
73 // How NaN values are represented. This is curently only used in combination
74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
75 // while having IEEE non-finite behavior is liable to lead to unexpected
76 // results.
77 enum class fltNanEncoding {
78 // Represents the standard IEEE behavior where a value is NaN if its
79 // exponent is all 1s and the significand is non-zero.
80 IEEE,
82 // Represents the behavior in the Float8E4M3 floating point type where NaN is
83 // represented by having the exponent and mantissa set to all 1s.
84 // This behavior matches the FP8 E4M3 type described in
85 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
86 // as non-signalling, although the paper does not state whether the NaN
87 // values are signalling or not.
88 AllOnes,
90 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
91 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
92 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
93 // there is only one NaN value, it is treated as quiet NaN. This matches the
94 // behavior described in https://arxiv.org/abs/2206.02915 .
95 NegativeZero,
98 /* Represents floating point arithmetic semantics. */
99 struct fltSemantics {
100 /* The largest E such that 2^E is representable; this matches the
101 definition of IEEE 754. */
102 APFloatBase::ExponentType maxExponent;
104 /* The smallest E such that 2^E is a normalized number; this
105 matches the definition of IEEE 754. */
106 APFloatBase::ExponentType minExponent;
108 /* Number of bits in the significand. This includes the integer
109 bit. */
110 unsigned int precision;
112 /* Number of bits actually used in the semantics. */
113 unsigned int sizeInBits;
115 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
117 fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
118 // Returns true if any number described by this semantics can be precisely
119 // represented by the specified semantics. Does not take into account
120 // the value of fltNonfiniteBehavior.
121 bool isRepresentableBy(const fltSemantics &S) const {
122 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
123 precision <= S.precision;
127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
133 static constexpr fltSemantics semFloat8E5M2FNUZ = {
134 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
135 static constexpr fltSemantics semFloat8E4M3FN = {
136 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
137 static constexpr fltSemantics semFloat8E4M3FNUZ = {
138 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
140 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
143 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
146 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
147 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
148 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
149 to each other, and two 11-bit exponents.
151 Note: we need to make the value different from semBogus as otherwise
152 an unsafe optimization may collapse both values to a single address,
153 and we heavily rely on them having distinct addresses. */
154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
156 /* These are legacy semantics for the fallback, inaccrurate implementation of
157 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
158 operation. It's equivalent to having an IEEE number with consecutive 106
159 bits of mantissa and 11 bits of exponent.
161 It's not equivalent to IBM double-double. For example, a legit IBM
162 double-double, 1 + epsilon:
164 1 + epsilon = 1 + (1 >> 1076)
166 is not representable by a consecutive 106 bits of mantissa.
168 Currently, these semantics are used in the following way:
170 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
171 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
172 semPPCDoubleDoubleLegacy -> IEEE operations
174 We use bitcastToAPInt() to get the bit representation (in APInt) of the
175 underlying IEEEdouble, then use the APInt constructor to construct the
176 legacy IEEE float.
178 TODO: Implement all operations in semPPCDoubleDouble, and delete these
179 semantics. */
180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
181 53 + 53, 128};
183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
184 switch (S) {
185 case S_IEEEhalf:
186 return IEEEhalf();
187 case S_BFloat:
188 return BFloat();
189 case S_IEEEsingle:
190 return IEEEsingle();
191 case S_IEEEdouble:
192 return IEEEdouble();
193 case S_IEEEquad:
194 return IEEEquad();
195 case S_PPCDoubleDouble:
196 return PPCDoubleDouble();
197 case S_Float8E5M2:
198 return Float8E5M2();
199 case S_Float8E5M2FNUZ:
200 return Float8E5M2FNUZ();
201 case S_Float8E4M3FN:
202 return Float8E4M3FN();
203 case S_Float8E4M3FNUZ:
204 return Float8E4M3FNUZ();
205 case S_Float8E4M3B11FNUZ:
206 return Float8E4M3B11FNUZ();
207 case S_FloatTF32:
208 return FloatTF32();
209 case S_x87DoubleExtended:
210 return x87DoubleExtended();
212 llvm_unreachable("Unrecognised floating semantics");
215 APFloatBase::Semantics
216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
217 if (&Sem == &llvm::APFloat::IEEEhalf())
218 return S_IEEEhalf;
219 else if (&Sem == &llvm::APFloat::BFloat())
220 return S_BFloat;
221 else if (&Sem == &llvm::APFloat::IEEEsingle())
222 return S_IEEEsingle;
223 else if (&Sem == &llvm::APFloat::IEEEdouble())
224 return S_IEEEdouble;
225 else if (&Sem == &llvm::APFloat::IEEEquad())
226 return S_IEEEquad;
227 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
228 return S_PPCDoubleDouble;
229 else if (&Sem == &llvm::APFloat::Float8E5M2())
230 return S_Float8E5M2;
231 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
232 return S_Float8E5M2FNUZ;
233 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
234 return S_Float8E4M3FN;
235 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
236 return S_Float8E4M3FNUZ;
237 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
238 return S_Float8E4M3B11FNUZ;
239 else if (&Sem == &llvm::APFloat::FloatTF32())
240 return S_FloatTF32;
241 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
242 return S_x87DoubleExtended;
243 else
244 llvm_unreachable("Unknown floating semantics");
247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
252 const fltSemantics &APFloatBase::PPCDoubleDouble() {
253 return semPPCDoubleDouble;
255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
260 return semFloat8E4M3B11FNUZ;
262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
263 const fltSemantics &APFloatBase::x87DoubleExtended() {
264 return semX87DoubleExtended;
266 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
269 constexpr RoundingMode APFloatBase::rmTowardPositive;
270 constexpr RoundingMode APFloatBase::rmTowardNegative;
271 constexpr RoundingMode APFloatBase::rmTowardZero;
272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
274 /* A tight upper bound on number of parts required to hold the value
275 pow(5, power) is
277 power * 815 / (351 * integerPartWidth) + 1
279 However, whilst the result may require only this many parts,
280 because we are multiplying two values to get it, the
281 multiplication may require an extra part with the excess part
282 being zero (consider the trivial case of 1 * 1, tcFullMultiply
283 requires two parts to hold the single-part result). So we add an
284 extra one to guarantee enough space whilst multiplying. */
285 const unsigned int maxExponent = 16383;
286 const unsigned int maxPrecision = 113;
287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
288 const unsigned int maxPowerOfFiveParts =
290 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
293 return semantics.precision;
295 APFloatBase::ExponentType
296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
297 return semantics.maxExponent;
299 APFloatBase::ExponentType
300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
301 return semantics.minExponent;
303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
304 return semantics.sizeInBits;
306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
307 bool isSigned) {
308 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
309 // at least one more bit than the MaxExponent to hold the max FP value.
310 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
311 // Extra sign bit needed.
312 if (isSigned)
313 ++MinBitWidth;
314 return MinBitWidth;
317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
318 const fltSemantics &Dst) {
319 // Exponent range must be larger.
320 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
321 return false;
323 // If the mantissa is long enough, the result value could still be denormal
324 // with a larger exponent range.
326 // FIXME: This condition is probably not accurate but also shouldn't be a
327 // practical concern with existing types.
328 return Dst.precision >= Src.precision;
331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
332 return Sem.sizeInBits;
335 static constexpr APFloatBase::ExponentType
336 exponentZero(const fltSemantics &semantics) {
337 return semantics.minExponent - 1;
340 static constexpr APFloatBase::ExponentType
341 exponentInf(const fltSemantics &semantics) {
342 return semantics.maxExponent + 1;
345 static constexpr APFloatBase::ExponentType
346 exponentNaN(const fltSemantics &semantics) {
347 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
348 if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
349 return exponentZero(semantics);
350 return semantics.maxExponent;
352 return semantics.maxExponent + 1;
355 /* A bunch of private, handy routines. */
357 static inline Error createError(const Twine &Err) {
358 return make_error<StringError>(Err, inconvertibleErrorCode());
361 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
362 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
365 /* Returns 0U-9U. Return values >= 10U are not digits. */
366 static inline unsigned int
367 decDigitValue(unsigned int c)
369 return c - '0';
372 /* Return the value of a decimal exponent of the form
373 [+-]ddddddd.
375 If the exponent overflows, returns a large exponent with the
376 appropriate sign. */
377 static Expected<int> readExponent(StringRef::iterator begin,
378 StringRef::iterator end) {
379 bool isNegative;
380 unsigned int absExponent;
381 const unsigned int overlargeExponent = 24000; /* FIXME. */
382 StringRef::iterator p = begin;
384 // Treat no exponent as 0 to match binutils
385 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
386 return 0;
389 isNegative = (*p == '-');
390 if (*p == '-' || *p == '+') {
391 p++;
392 if (p == end)
393 return createError("Exponent has no digits");
396 absExponent = decDigitValue(*p++);
397 if (absExponent >= 10U)
398 return createError("Invalid character in exponent");
400 for (; p != end; ++p) {
401 unsigned int value;
403 value = decDigitValue(*p);
404 if (value >= 10U)
405 return createError("Invalid character in exponent");
407 absExponent = absExponent * 10U + value;
408 if (absExponent >= overlargeExponent) {
409 absExponent = overlargeExponent;
410 break;
414 if (isNegative)
415 return -(int) absExponent;
416 else
417 return (int) absExponent;
420 /* This is ugly and needs cleaning up, but I don't immediately see
421 how whilst remaining safe. */
422 static Expected<int> totalExponent(StringRef::iterator p,
423 StringRef::iterator end,
424 int exponentAdjustment) {
425 int unsignedExponent;
426 bool negative, overflow;
427 int exponent = 0;
429 if (p == end)
430 return createError("Exponent has no digits");
432 negative = *p == '-';
433 if (*p == '-' || *p == '+') {
434 p++;
435 if (p == end)
436 return createError("Exponent has no digits");
439 unsignedExponent = 0;
440 overflow = false;
441 for (; p != end; ++p) {
442 unsigned int value;
444 value = decDigitValue(*p);
445 if (value >= 10U)
446 return createError("Invalid character in exponent");
448 unsignedExponent = unsignedExponent * 10 + value;
449 if (unsignedExponent > 32767) {
450 overflow = true;
451 break;
455 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
456 overflow = true;
458 if (!overflow) {
459 exponent = unsignedExponent;
460 if (negative)
461 exponent = -exponent;
462 exponent += exponentAdjustment;
463 if (exponent > 32767 || exponent < -32768)
464 overflow = true;
467 if (overflow)
468 exponent = negative ? -32768: 32767;
470 return exponent;
473 static Expected<StringRef::iterator>
474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
475 StringRef::iterator *dot) {
476 StringRef::iterator p = begin;
477 *dot = end;
478 while (p != end && *p == '0')
479 p++;
481 if (p != end && *p == '.') {
482 *dot = p++;
484 if (end - begin == 1)
485 return createError("Significand has no digits");
487 while (p != end && *p == '0')
488 p++;
491 return p;
494 /* Given a normal decimal floating point number of the form
496 dddd.dddd[eE][+-]ddd
498 where the decimal point and exponent are optional, fill out the
499 structure D. Exponent is appropriate if the significand is
500 treated as an integer, and normalizedExponent if the significand
501 is taken to have the decimal point after a single leading
502 non-zero digit.
504 If the value is zero, V->firstSigDigit points to a non-digit, and
505 the return exponent is zero.
507 struct decimalInfo {
508 const char *firstSigDigit;
509 const char *lastSigDigit;
510 int exponent;
511 int normalizedExponent;
514 static Error interpretDecimal(StringRef::iterator begin,
515 StringRef::iterator end, decimalInfo *D) {
516 StringRef::iterator dot = end;
518 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
519 if (!PtrOrErr)
520 return PtrOrErr.takeError();
521 StringRef::iterator p = *PtrOrErr;
523 D->firstSigDigit = p;
524 D->exponent = 0;
525 D->normalizedExponent = 0;
527 for (; p != end; ++p) {
528 if (*p == '.') {
529 if (dot != end)
530 return createError("String contains multiple dots");
531 dot = p++;
532 if (p == end)
533 break;
535 if (decDigitValue(*p) >= 10U)
536 break;
539 if (p != end) {
540 if (*p != 'e' && *p != 'E')
541 return createError("Invalid character in significand");
542 if (p == begin)
543 return createError("Significand has no digits");
544 if (dot != end && p - begin == 1)
545 return createError("Significand has no digits");
547 /* p points to the first non-digit in the string */
548 auto ExpOrErr = readExponent(p + 1, end);
549 if (!ExpOrErr)
550 return ExpOrErr.takeError();
551 D->exponent = *ExpOrErr;
553 /* Implied decimal point? */
554 if (dot == end)
555 dot = p;
558 /* If number is all zeroes accept any exponent. */
559 if (p != D->firstSigDigit) {
560 /* Drop insignificant trailing zeroes. */
561 if (p != begin) {
564 p--;
565 while (p != begin && *p == '0');
566 while (p != begin && *p == '.');
569 /* Adjust the exponents for any decimal point. */
570 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
571 D->normalizedExponent = (D->exponent +
572 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
573 - (dot > D->firstSigDigit && dot < p)));
576 D->lastSigDigit = p;
577 return Error::success();
580 /* Return the trailing fraction of a hexadecimal number.
581 DIGITVALUE is the first hex digit of the fraction, P points to
582 the next digit. */
583 static Expected<lostFraction>
584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
585 unsigned int digitValue) {
586 unsigned int hexDigit;
588 /* If the first trailing digit isn't 0 or 8 we can work out the
589 fraction immediately. */
590 if (digitValue > 8)
591 return lfMoreThanHalf;
592 else if (digitValue < 8 && digitValue > 0)
593 return lfLessThanHalf;
595 // Otherwise we need to find the first non-zero digit.
596 while (p != end && (*p == '0' || *p == '.'))
597 p++;
599 if (p == end)
600 return createError("Invalid trailing hexadecimal fraction!");
602 hexDigit = hexDigitValue(*p);
604 /* If we ran off the end it is exactly zero or one-half, otherwise
605 a little more. */
606 if (hexDigit == UINT_MAX)
607 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
608 else
609 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
612 /* Return the fraction lost were a bignum truncated losing the least
613 significant BITS bits. */
614 static lostFraction
615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
616 unsigned int partCount,
617 unsigned int bits)
619 unsigned int lsb;
621 lsb = APInt::tcLSB(parts, partCount);
623 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
624 if (bits <= lsb)
625 return lfExactlyZero;
626 if (bits == lsb + 1)
627 return lfExactlyHalf;
628 if (bits <= partCount * APFloatBase::integerPartWidth &&
629 APInt::tcExtractBit(parts, bits - 1))
630 return lfMoreThanHalf;
632 return lfLessThanHalf;
635 /* Shift DST right BITS bits noting lost fraction. */
636 static lostFraction
637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
639 lostFraction lost_fraction;
641 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
643 APInt::tcShiftRight(dst, parts, bits);
645 return lost_fraction;
648 /* Combine the effect of two lost fractions. */
649 static lostFraction
650 combineLostFractions(lostFraction moreSignificant,
651 lostFraction lessSignificant)
653 if (lessSignificant != lfExactlyZero) {
654 if (moreSignificant == lfExactlyZero)
655 moreSignificant = lfLessThanHalf;
656 else if (moreSignificant == lfExactlyHalf)
657 moreSignificant = lfMoreThanHalf;
660 return moreSignificant;
663 /* The error from the true value, in half-ulps, on multiplying two
664 floating point numbers, which differ from the value they
665 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
666 than the returned value.
668 See "How to Read Floating Point Numbers Accurately" by William D
669 Clinger. */
670 static unsigned int
671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
673 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
675 if (HUerr1 + HUerr2 == 0)
676 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
677 else
678 return inexactMultiply + 2 * (HUerr1 + HUerr2);
681 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
682 when the least significant BITS are truncated. BITS cannot be
683 zero. */
684 static APFloatBase::integerPart
685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
686 bool isNearest) {
687 unsigned int count, partBits;
688 APFloatBase::integerPart part, boundary;
690 assert(bits != 0);
692 bits--;
693 count = bits / APFloatBase::integerPartWidth;
694 partBits = bits % APFloatBase::integerPartWidth + 1;
696 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
698 if (isNearest)
699 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
700 else
701 boundary = 0;
703 if (count == 0) {
704 if (part - boundary <= boundary - part)
705 return part - boundary;
706 else
707 return boundary - part;
710 if (part == boundary) {
711 while (--count)
712 if (parts[count])
713 return ~(APFloatBase::integerPart) 0; /* A lot. */
715 return parts[0];
716 } else if (part == boundary - 1) {
717 while (--count)
718 if (~parts[count])
719 return ~(APFloatBase::integerPart) 0; /* A lot. */
721 return -parts[0];
724 return ~(APFloatBase::integerPart) 0; /* A lot. */
727 /* Place pow(5, power) in DST, and return the number of parts used.
728 DST must be at least one part larger than size of the answer. */
729 static unsigned int
730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
731 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
732 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
733 pow5s[0] = 78125 * 5;
735 unsigned int partsCount[16] = { 1 };
736 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
737 unsigned int result;
738 assert(power <= maxExponent);
740 p1 = dst;
741 p2 = scratch;
743 *p1 = firstEightPowers[power & 7];
744 power >>= 3;
746 result = 1;
747 pow5 = pow5s;
749 for (unsigned int n = 0; power; power >>= 1, n++) {
750 unsigned int pc;
752 pc = partsCount[n];
754 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
755 if (pc == 0) {
756 pc = partsCount[n - 1];
757 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
758 pc *= 2;
759 if (pow5[pc - 1] == 0)
760 pc--;
761 partsCount[n] = pc;
764 if (power & 1) {
765 APFloatBase::integerPart *tmp;
767 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
768 result += pc;
769 if (p2[result - 1] == 0)
770 result--;
772 /* Now result is in p1 with partsCount parts and p2 is scratch
773 space. */
774 tmp = p1;
775 p1 = p2;
776 p2 = tmp;
779 pow5 += pc;
782 if (p1 != dst)
783 APInt::tcAssign(dst, p1, result);
785 return result;
788 /* Zero at the end to avoid modular arithmetic when adding one; used
789 when rounding up during hexadecimal output. */
790 static const char hexDigitsLower[] = "0123456789abcdef0";
791 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
792 static const char infinityL[] = "infinity";
793 static const char infinityU[] = "INFINITY";
794 static const char NaNL[] = "nan";
795 static const char NaNU[] = "NAN";
797 /* Write out an integerPart in hexadecimal, starting with the most
798 significant nibble. Write out exactly COUNT hexdigits, return
799 COUNT. */
800 static unsigned int
801 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
802 const char *hexDigitChars)
804 unsigned int result = count;
806 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
808 part >>= (APFloatBase::integerPartWidth - 4 * count);
809 while (count--) {
810 dst[count] = hexDigitChars[part & 0xf];
811 part >>= 4;
814 return result;
817 /* Write out an unsigned decimal integer. */
818 static char *
819 writeUnsignedDecimal (char *dst, unsigned int n)
821 char buff[40], *p;
823 p = buff;
825 *p++ = '0' + n % 10;
826 while (n /= 10);
829 *dst++ = *--p;
830 while (p != buff);
832 return dst;
835 /* Write out a signed decimal integer. */
836 static char *
837 writeSignedDecimal (char *dst, int value)
839 if (value < 0) {
840 *dst++ = '-';
841 dst = writeUnsignedDecimal(dst, -(unsigned) value);
842 } else
843 dst = writeUnsignedDecimal(dst, value);
845 return dst;
848 namespace detail {
849 /* Constructors. */
850 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
851 unsigned int count;
853 semantics = ourSemantics;
854 count = partCount();
855 if (count > 1)
856 significand.parts = new integerPart[count];
859 void IEEEFloat::freeSignificand() {
860 if (needsCleanup())
861 delete [] significand.parts;
864 void IEEEFloat::assign(const IEEEFloat &rhs) {
865 assert(semantics == rhs.semantics);
867 sign = rhs.sign;
868 category = rhs.category;
869 exponent = rhs.exponent;
870 if (isFiniteNonZero() || category == fcNaN)
871 copySignificand(rhs);
874 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
875 assert(isFiniteNonZero() || category == fcNaN);
876 assert(rhs.partCount() >= partCount());
878 APInt::tcAssign(significandParts(), rhs.significandParts(),
879 partCount());
882 /* Make this number a NaN, with an arbitrary but deterministic value
883 for the significand. If double or longer, this is a signalling NaN,
884 which may not be ideal. If float, this is QNaN(0). */
885 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
886 category = fcNaN;
887 sign = Negative;
888 exponent = exponentNaN();
890 integerPart *significand = significandParts();
891 unsigned numParts = partCount();
893 APInt fill_storage;
894 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
895 // Finite-only types do not distinguish signalling and quiet NaN, so
896 // make them all signalling.
897 SNaN = false;
898 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
899 sign = true;
900 fill_storage = APInt::getZero(semantics->precision - 1);
901 } else {
902 fill_storage = APInt::getAllOnes(semantics->precision - 1);
904 fill = &fill_storage;
907 // Set the significand bits to the fill.
908 if (!fill || fill->getNumWords() < numParts)
909 APInt::tcSet(significand, 0, numParts);
910 if (fill) {
911 APInt::tcAssign(significand, fill->getRawData(),
912 std::min(fill->getNumWords(), numParts));
914 // Zero out the excess bits of the significand.
915 unsigned bitsToPreserve = semantics->precision - 1;
916 unsigned part = bitsToPreserve / 64;
917 bitsToPreserve %= 64;
918 significand[part] &= ((1ULL << bitsToPreserve) - 1);
919 for (part++; part != numParts; ++part)
920 significand[part] = 0;
923 unsigned QNaNBit = semantics->precision - 2;
925 if (SNaN) {
926 // We always have to clear the QNaN bit to make it an SNaN.
927 APInt::tcClearBit(significand, QNaNBit);
929 // If there are no bits set in the payload, we have to set
930 // *something* to make it a NaN instead of an infinity;
931 // conventionally, this is the next bit down from the QNaN bit.
932 if (APInt::tcIsZero(significand, numParts))
933 APInt::tcSetBit(significand, QNaNBit - 1);
934 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
935 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
936 // Do nothing.
937 } else {
938 // We always have to set the QNaN bit to make it a QNaN.
939 APInt::tcSetBit(significand, QNaNBit);
942 // For x87 extended precision, we want to make a NaN, not a
943 // pseudo-NaN. Maybe we should expose the ability to make
944 // pseudo-NaNs?
945 if (semantics == &semX87DoubleExtended)
946 APInt::tcSetBit(significand, QNaNBit + 1);
949 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
950 if (this != &rhs) {
951 if (semantics != rhs.semantics) {
952 freeSignificand();
953 initialize(rhs.semantics);
955 assign(rhs);
958 return *this;
961 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
962 freeSignificand();
964 semantics = rhs.semantics;
965 significand = rhs.significand;
966 exponent = rhs.exponent;
967 category = rhs.category;
968 sign = rhs.sign;
970 rhs.semantics = &semBogus;
971 return *this;
974 bool IEEEFloat::isDenormal() const {
975 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
976 (APInt::tcExtractBit(significandParts(),
977 semantics->precision - 1) == 0);
980 bool IEEEFloat::isSmallest() const {
981 // The smallest number by magnitude in our format will be the smallest
982 // denormal, i.e. the floating point number with exponent being minimum
983 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
984 return isFiniteNonZero() && exponent == semantics->minExponent &&
985 significandMSB() == 0;
988 bool IEEEFloat::isSmallestNormalized() const {
989 return getCategory() == fcNormal && exponent == semantics->minExponent &&
990 isSignificandAllZerosExceptMSB();
993 bool IEEEFloat::isSignificandAllOnes() const {
994 // Test if the significand excluding the integral bit is all ones. This allows
995 // us to test for binade boundaries.
996 const integerPart *Parts = significandParts();
997 const unsigned PartCount = partCountForBits(semantics->precision);
998 for (unsigned i = 0; i < PartCount - 1; i++)
999 if (~Parts[i])
1000 return false;
1002 // Set the unused high bits to all ones when we compare.
1003 const unsigned NumHighBits =
1004 PartCount*integerPartWidth - semantics->precision + 1;
1005 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1006 "Can not have more high bits to fill than integerPartWidth");
1007 const integerPart HighBitFill =
1008 ~integerPart(0) << (integerPartWidth - NumHighBits);
1009 if (~(Parts[PartCount - 1] | HighBitFill))
1010 return false;
1012 return true;
1015 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1016 // Test if the significand excluding the integral bit is all ones except for
1017 // the least significant bit.
1018 const integerPart *Parts = significandParts();
1020 if (Parts[0] & 1)
1021 return false;
1023 const unsigned PartCount = partCountForBits(semantics->precision);
1024 for (unsigned i = 0; i < PartCount - 1; i++) {
1025 if (~Parts[i] & ~unsigned{!i})
1026 return false;
1029 // Set the unused high bits to all ones when we compare.
1030 const unsigned NumHighBits =
1031 PartCount * integerPartWidth - semantics->precision + 1;
1032 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1033 "Can not have more high bits to fill than integerPartWidth");
1034 const integerPart HighBitFill = ~integerPart(0)
1035 << (integerPartWidth - NumHighBits);
1036 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1037 return false;
1039 return true;
1042 bool IEEEFloat::isSignificandAllZeros() const {
1043 // Test if the significand excluding the integral bit is all zeros. This
1044 // allows us to test for binade boundaries.
1045 const integerPart *Parts = significandParts();
1046 const unsigned PartCount = partCountForBits(semantics->precision);
1048 for (unsigned i = 0; i < PartCount - 1; i++)
1049 if (Parts[i])
1050 return false;
1052 // Compute how many bits are used in the final word.
1053 const unsigned NumHighBits =
1054 PartCount*integerPartWidth - semantics->precision + 1;
1055 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1056 "clear than integerPartWidth");
1057 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1059 if (Parts[PartCount - 1] & HighBitMask)
1060 return false;
1062 return true;
1065 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1066 const integerPart *Parts = significandParts();
1067 const unsigned PartCount = partCountForBits(semantics->precision);
1069 for (unsigned i = 0; i < PartCount - 1; i++) {
1070 if (Parts[i])
1071 return false;
1074 const unsigned NumHighBits =
1075 PartCount * integerPartWidth - semantics->precision + 1;
1076 return Parts[PartCount - 1] == integerPart(1)
1077 << (integerPartWidth - NumHighBits);
1080 bool IEEEFloat::isLargest() const {
1081 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1082 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1083 // The largest number by magnitude in our format will be the floating point
1084 // number with maximum exponent and with significand that is all ones except
1085 // the LSB.
1086 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1087 isSignificandAllOnesExceptLSB();
1088 } else {
1089 // The largest number by magnitude in our format will be the floating point
1090 // number with maximum exponent and with significand that is all ones.
1091 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1092 isSignificandAllOnes();
1096 bool IEEEFloat::isInteger() const {
1097 // This could be made more efficient; I'm going for obviously correct.
1098 if (!isFinite()) return false;
1099 IEEEFloat truncated = *this;
1100 truncated.roundToIntegral(rmTowardZero);
1101 return compare(truncated) == cmpEqual;
1104 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1105 if (this == &rhs)
1106 return true;
1107 if (semantics != rhs.semantics ||
1108 category != rhs.category ||
1109 sign != rhs.sign)
1110 return false;
1111 if (category==fcZero || category==fcInfinity)
1112 return true;
1114 if (isFiniteNonZero() && exponent != rhs.exponent)
1115 return false;
1117 return std::equal(significandParts(), significandParts() + partCount(),
1118 rhs.significandParts());
1121 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1122 initialize(&ourSemantics);
1123 sign = 0;
1124 category = fcNormal;
1125 zeroSignificand();
1126 exponent = ourSemantics.precision - 1;
1127 significandParts()[0] = value;
1128 normalize(rmNearestTiesToEven, lfExactlyZero);
1131 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1132 initialize(&ourSemantics);
1133 makeZero(false);
1136 // Delegate to the previous constructor, because later copy constructor may
1137 // actually inspects category, which can't be garbage.
1138 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1139 : IEEEFloat(ourSemantics) {}
1141 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1142 initialize(rhs.semantics);
1143 assign(rhs);
1146 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1147 *this = std::move(rhs);
1150 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1152 unsigned int IEEEFloat::partCount() const {
1153 return partCountForBits(semantics->precision + 1);
1156 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1157 return const_cast<IEEEFloat *>(this)->significandParts();
1160 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1161 if (partCount() > 1)
1162 return significand.parts;
1163 else
1164 return &significand.part;
1167 void IEEEFloat::zeroSignificand() {
1168 APInt::tcSet(significandParts(), 0, partCount());
1171 /* Increment an fcNormal floating point number's significand. */
1172 void IEEEFloat::incrementSignificand() {
1173 integerPart carry;
1175 carry = APInt::tcIncrement(significandParts(), partCount());
1177 /* Our callers should never cause us to overflow. */
1178 assert(carry == 0);
1179 (void)carry;
1182 /* Add the significand of the RHS. Returns the carry flag. */
1183 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1184 integerPart *parts;
1186 parts = significandParts();
1188 assert(semantics == rhs.semantics);
1189 assert(exponent == rhs.exponent);
1191 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1194 /* Subtract the significand of the RHS with a borrow flag. Returns
1195 the borrow flag. */
1196 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1197 integerPart borrow) {
1198 integerPart *parts;
1200 parts = significandParts();
1202 assert(semantics == rhs.semantics);
1203 assert(exponent == rhs.exponent);
1205 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1206 partCount());
1209 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1210 on to the full-precision result of the multiplication. Returns the
1211 lost fraction. */
1212 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1213 IEEEFloat addend) {
1214 unsigned int omsb; // One, not zero, based MSB.
1215 unsigned int partsCount, newPartsCount, precision;
1216 integerPart *lhsSignificand;
1217 integerPart scratch[4];
1218 integerPart *fullSignificand;
1219 lostFraction lost_fraction;
1220 bool ignored;
1222 assert(semantics == rhs.semantics);
1224 precision = semantics->precision;
1226 // Allocate space for twice as many bits as the original significand, plus one
1227 // extra bit for the addition to overflow into.
1228 newPartsCount = partCountForBits(precision * 2 + 1);
1230 if (newPartsCount > 4)
1231 fullSignificand = new integerPart[newPartsCount];
1232 else
1233 fullSignificand = scratch;
1235 lhsSignificand = significandParts();
1236 partsCount = partCount();
1238 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1239 rhs.significandParts(), partsCount, partsCount);
1241 lost_fraction = lfExactlyZero;
1242 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1243 exponent += rhs.exponent;
1245 // Assume the operands involved in the multiplication are single-precision
1246 // FP, and the two multiplicants are:
1247 // *this = a23 . a22 ... a0 * 2^e1
1248 // rhs = b23 . b22 ... b0 * 2^e2
1249 // the result of multiplication is:
1250 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1251 // Note that there are three significant bits at the left-hand side of the
1252 // radix point: two for the multiplication, and an overflow bit for the
1253 // addition (that will always be zero at this point). Move the radix point
1254 // toward left by two bits, and adjust exponent accordingly.
1255 exponent += 2;
1257 if (addend.isNonZero()) {
1258 // The intermediate result of the multiplication has "2 * precision"
1259 // signicant bit; adjust the addend to be consistent with mul result.
1261 Significand savedSignificand = significand;
1262 const fltSemantics *savedSemantics = semantics;
1263 fltSemantics extendedSemantics;
1264 opStatus status;
1265 unsigned int extendedPrecision;
1267 // Normalize our MSB to one below the top bit to allow for overflow.
1268 extendedPrecision = 2 * precision + 1;
1269 if (omsb != extendedPrecision - 1) {
1270 assert(extendedPrecision > omsb);
1271 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1272 (extendedPrecision - 1) - omsb);
1273 exponent -= (extendedPrecision - 1) - omsb;
1276 /* Create new semantics. */
1277 extendedSemantics = *semantics;
1278 extendedSemantics.precision = extendedPrecision;
1280 if (newPartsCount == 1)
1281 significand.part = fullSignificand[0];
1282 else
1283 significand.parts = fullSignificand;
1284 semantics = &extendedSemantics;
1286 // Make a copy so we can convert it to the extended semantics.
1287 // Note that we cannot convert the addend directly, as the extendedSemantics
1288 // is a local variable (which we take a reference to).
1289 IEEEFloat extendedAddend(addend);
1290 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1291 assert(status == opOK);
1292 (void)status;
1294 // Shift the significand of the addend right by one bit. This guarantees
1295 // that the high bit of the significand is zero (same as fullSignificand),
1296 // so the addition will overflow (if it does overflow at all) into the top bit.
1297 lost_fraction = extendedAddend.shiftSignificandRight(1);
1298 assert(lost_fraction == lfExactlyZero &&
1299 "Lost precision while shifting addend for fused-multiply-add.");
1301 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1303 /* Restore our state. */
1304 if (newPartsCount == 1)
1305 fullSignificand[0] = significand.part;
1306 significand = savedSignificand;
1307 semantics = savedSemantics;
1309 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1312 // Convert the result having "2 * precision" significant-bits back to the one
1313 // having "precision" significant-bits. First, move the radix point from
1314 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1315 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1316 exponent -= precision + 1;
1318 // In case MSB resides at the left-hand side of radix point, shift the
1319 // mantissa right by some amount to make sure the MSB reside right before
1320 // the radix point (i.e. "MSB . rest-significant-bits").
1322 // Note that the result is not normalized when "omsb < precision". So, the
1323 // caller needs to call IEEEFloat::normalize() if normalized value is
1324 // expected.
1325 if (omsb > precision) {
1326 unsigned int bits, significantParts;
1327 lostFraction lf;
1329 bits = omsb - precision;
1330 significantParts = partCountForBits(omsb);
1331 lf = shiftRight(fullSignificand, significantParts, bits);
1332 lost_fraction = combineLostFractions(lf, lost_fraction);
1333 exponent += bits;
1336 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1338 if (newPartsCount > 4)
1339 delete [] fullSignificand;
1341 return lost_fraction;
1344 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1345 return multiplySignificand(rhs, IEEEFloat(*semantics));
1348 /* Multiply the significands of LHS and RHS to DST. */
1349 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1350 unsigned int bit, i, partsCount;
1351 const integerPart *rhsSignificand;
1352 integerPart *lhsSignificand, *dividend, *divisor;
1353 integerPart scratch[4];
1354 lostFraction lost_fraction;
1356 assert(semantics == rhs.semantics);
1358 lhsSignificand = significandParts();
1359 rhsSignificand = rhs.significandParts();
1360 partsCount = partCount();
1362 if (partsCount > 2)
1363 dividend = new integerPart[partsCount * 2];
1364 else
1365 dividend = scratch;
1367 divisor = dividend + partsCount;
1369 /* Copy the dividend and divisor as they will be modified in-place. */
1370 for (i = 0; i < partsCount; i++) {
1371 dividend[i] = lhsSignificand[i];
1372 divisor[i] = rhsSignificand[i];
1373 lhsSignificand[i] = 0;
1376 exponent -= rhs.exponent;
1378 unsigned int precision = semantics->precision;
1380 /* Normalize the divisor. */
1381 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1382 if (bit) {
1383 exponent += bit;
1384 APInt::tcShiftLeft(divisor, partsCount, bit);
1387 /* Normalize the dividend. */
1388 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1389 if (bit) {
1390 exponent -= bit;
1391 APInt::tcShiftLeft(dividend, partsCount, bit);
1394 /* Ensure the dividend >= divisor initially for the loop below.
1395 Incidentally, this means that the division loop below is
1396 guaranteed to set the integer bit to one. */
1397 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1398 exponent--;
1399 APInt::tcShiftLeft(dividend, partsCount, 1);
1400 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1403 /* Long division. */
1404 for (bit = precision; bit; bit -= 1) {
1405 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1406 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1407 APInt::tcSetBit(lhsSignificand, bit - 1);
1410 APInt::tcShiftLeft(dividend, partsCount, 1);
1413 /* Figure out the lost fraction. */
1414 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1416 if (cmp > 0)
1417 lost_fraction = lfMoreThanHalf;
1418 else if (cmp == 0)
1419 lost_fraction = lfExactlyHalf;
1420 else if (APInt::tcIsZero(dividend, partsCount))
1421 lost_fraction = lfExactlyZero;
1422 else
1423 lost_fraction = lfLessThanHalf;
1425 if (partsCount > 2)
1426 delete [] dividend;
1428 return lost_fraction;
1431 unsigned int IEEEFloat::significandMSB() const {
1432 return APInt::tcMSB(significandParts(), partCount());
1435 unsigned int IEEEFloat::significandLSB() const {
1436 return APInt::tcLSB(significandParts(), partCount());
1439 /* Note that a zero result is NOT normalized to fcZero. */
1440 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1441 /* Our exponent should not overflow. */
1442 assert((ExponentType) (exponent + bits) >= exponent);
1444 exponent += bits;
1446 return shiftRight(significandParts(), partCount(), bits);
1449 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1450 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1451 assert(bits < semantics->precision);
1453 if (bits) {
1454 unsigned int partsCount = partCount();
1456 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1457 exponent -= bits;
1459 assert(!APInt::tcIsZero(significandParts(), partsCount));
1463 IEEEFloat::cmpResult
1464 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1465 int compare;
1467 assert(semantics == rhs.semantics);
1468 assert(isFiniteNonZero());
1469 assert(rhs.isFiniteNonZero());
1471 compare = exponent - rhs.exponent;
1473 /* If exponents are equal, do an unsigned bignum comparison of the
1474 significands. */
1475 if (compare == 0)
1476 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1477 partCount());
1479 if (compare > 0)
1480 return cmpGreaterThan;
1481 else if (compare < 0)
1482 return cmpLessThan;
1483 else
1484 return cmpEqual;
1487 /* Set the least significant BITS bits of a bignum, clear the
1488 rest. */
1489 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1490 unsigned bits) {
1491 unsigned i = 0;
1492 while (bits > APInt::APINT_BITS_PER_WORD) {
1493 dst[i++] = ~(APInt::WordType)0;
1494 bits -= APInt::APINT_BITS_PER_WORD;
1497 if (bits)
1498 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1500 while (i < parts)
1501 dst[i++] = 0;
1504 /* Handle overflow. Sign is preserved. We either become infinity or
1505 the largest finite number. */
1506 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1507 /* Infinity? */
1508 if (rounding_mode == rmNearestTiesToEven ||
1509 rounding_mode == rmNearestTiesToAway ||
1510 (rounding_mode == rmTowardPositive && !sign) ||
1511 (rounding_mode == rmTowardNegative && sign)) {
1512 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1513 makeNaN(false, sign);
1514 else
1515 category = fcInfinity;
1516 return (opStatus) (opOverflow | opInexact);
1519 /* Otherwise we become the largest finite number. */
1520 category = fcNormal;
1521 exponent = semantics->maxExponent;
1522 tcSetLeastSignificantBits(significandParts(), partCount(),
1523 semantics->precision);
1524 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1525 semantics->nanEncoding == fltNanEncoding::AllOnes)
1526 APInt::tcClearBit(significandParts(), 0);
1528 return opInexact;
1531 /* Returns TRUE if, when truncating the current number, with BIT the
1532 new LSB, with the given lost fraction and rounding mode, the result
1533 would need to be rounded away from zero (i.e., by increasing the
1534 signficand). This routine must work for fcZero of both signs, and
1535 fcNormal numbers. */
1536 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1537 lostFraction lost_fraction,
1538 unsigned int bit) const {
1539 /* NaNs and infinities should not have lost fractions. */
1540 assert(isFiniteNonZero() || category == fcZero);
1542 /* Current callers never pass this so we don't handle it. */
1543 assert(lost_fraction != lfExactlyZero);
1545 switch (rounding_mode) {
1546 case rmNearestTiesToAway:
1547 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1549 case rmNearestTiesToEven:
1550 if (lost_fraction == lfMoreThanHalf)
1551 return true;
1553 /* Our zeroes don't have a significand to test. */
1554 if (lost_fraction == lfExactlyHalf && category != fcZero)
1555 return APInt::tcExtractBit(significandParts(), bit);
1557 return false;
1559 case rmTowardZero:
1560 return false;
1562 case rmTowardPositive:
1563 return !sign;
1565 case rmTowardNegative:
1566 return sign;
1568 default:
1569 break;
1571 llvm_unreachable("Invalid rounding mode found");
1574 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1575 lostFraction lost_fraction) {
1576 unsigned int omsb; /* One, not zero, based MSB. */
1577 int exponentChange;
1579 if (!isFiniteNonZero())
1580 return opOK;
1582 /* Before rounding normalize the exponent of fcNormal numbers. */
1583 omsb = significandMSB() + 1;
1585 if (omsb) {
1586 /* OMSB is numbered from 1. We want to place it in the integer
1587 bit numbered PRECISION if possible, with a compensating change in
1588 the exponent. */
1589 exponentChange = omsb - semantics->precision;
1591 /* If the resulting exponent is too high, overflow according to
1592 the rounding mode. */
1593 if (exponent + exponentChange > semantics->maxExponent)
1594 return handleOverflow(rounding_mode);
1596 /* Subnormal numbers have exponent minExponent, and their MSB
1597 is forced based on that. */
1598 if (exponent + exponentChange < semantics->minExponent)
1599 exponentChange = semantics->minExponent - exponent;
1601 /* Shifting left is easy as we don't lose precision. */
1602 if (exponentChange < 0) {
1603 assert(lost_fraction == lfExactlyZero);
1605 shiftSignificandLeft(-exponentChange);
1607 return opOK;
1610 if (exponentChange > 0) {
1611 lostFraction lf;
1613 /* Shift right and capture any new lost fraction. */
1614 lf = shiftSignificandRight(exponentChange);
1616 lost_fraction = combineLostFractions(lf, lost_fraction);
1618 /* Keep OMSB up-to-date. */
1619 if (omsb > (unsigned) exponentChange)
1620 omsb -= exponentChange;
1621 else
1622 omsb = 0;
1626 // The all-ones values is an overflow if NaN is all ones. If NaN is
1627 // represented by negative zero, then it is a valid finite value.
1628 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1629 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1630 exponent == semantics->maxExponent && isSignificandAllOnes())
1631 return handleOverflow(rounding_mode);
1633 /* Now round the number according to rounding_mode given the lost
1634 fraction. */
1636 /* As specified in IEEE 754, since we do not trap we do not report
1637 underflow for exact results. */
1638 if (lost_fraction == lfExactlyZero) {
1639 /* Canonicalize zeroes. */
1640 if (omsb == 0) {
1641 category = fcZero;
1642 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1643 sign = false;
1646 return opOK;
1649 /* Increment the significand if we're rounding away from zero. */
1650 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1651 if (omsb == 0)
1652 exponent = semantics->minExponent;
1654 incrementSignificand();
1655 omsb = significandMSB() + 1;
1657 /* Did the significand increment overflow? */
1658 if (omsb == (unsigned) semantics->precision + 1) {
1659 /* Renormalize by incrementing the exponent and shifting our
1660 significand right one. However if we already have the
1661 maximum exponent we overflow to infinity. */
1662 if (exponent == semantics->maxExponent)
1663 // Invoke overflow handling with a rounding mode that will guarantee
1664 // that the result gets turned into the correct infinity representation.
1665 // This is needed instead of just setting the category to infinity to
1666 // account for 8-bit floating point types that have no inf, only NaN.
1667 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1669 shiftSignificandRight(1);
1671 return opInexact;
1674 // The all-ones values is an overflow if NaN is all ones. If NaN is
1675 // represented by negative zero, then it is a valid finite value.
1676 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1677 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1678 exponent == semantics->maxExponent && isSignificandAllOnes())
1679 return handleOverflow(rounding_mode);
1682 /* The normal case - we were and are not denormal, and any
1683 significand increment above didn't overflow. */
1684 if (omsb == semantics->precision)
1685 return opInexact;
1687 /* We have a non-zero denormal. */
1688 assert(omsb < semantics->precision);
1690 /* Canonicalize zeroes. */
1691 if (omsb == 0) {
1692 category = fcZero;
1693 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1694 sign = false;
1697 /* The fcZero case is a denormal that underflowed to zero. */
1698 return (opStatus) (opUnderflow | opInexact);
1701 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1702 bool subtract) {
1703 switch (PackCategoriesIntoKey(category, rhs.category)) {
1704 default:
1705 llvm_unreachable(nullptr);
1707 case PackCategoriesIntoKey(fcZero, fcNaN):
1708 case PackCategoriesIntoKey(fcNormal, fcNaN):
1709 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1710 assign(rhs);
1711 [[fallthrough]];
1712 case PackCategoriesIntoKey(fcNaN, fcZero):
1713 case PackCategoriesIntoKey(fcNaN, fcNormal):
1714 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1715 case PackCategoriesIntoKey(fcNaN, fcNaN):
1716 if (isSignaling()) {
1717 makeQuiet();
1718 return opInvalidOp;
1720 return rhs.isSignaling() ? opInvalidOp : opOK;
1722 case PackCategoriesIntoKey(fcNormal, fcZero):
1723 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1724 case PackCategoriesIntoKey(fcInfinity, fcZero):
1725 return opOK;
1727 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1728 case PackCategoriesIntoKey(fcZero, fcInfinity):
1729 category = fcInfinity;
1730 sign = rhs.sign ^ subtract;
1731 return opOK;
1733 case PackCategoriesIntoKey(fcZero, fcNormal):
1734 assign(rhs);
1735 sign = rhs.sign ^ subtract;
1736 return opOK;
1738 case PackCategoriesIntoKey(fcZero, fcZero):
1739 /* Sign depends on rounding mode; handled by caller. */
1740 return opOK;
1742 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1743 /* Differently signed infinities can only be validly
1744 subtracted. */
1745 if (((sign ^ rhs.sign)!=0) != subtract) {
1746 makeNaN();
1747 return opInvalidOp;
1750 return opOK;
1752 case PackCategoriesIntoKey(fcNormal, fcNormal):
1753 return opDivByZero;
1757 /* Add or subtract two normal numbers. */
1758 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1759 bool subtract) {
1760 integerPart carry;
1761 lostFraction lost_fraction;
1762 int bits;
1764 /* Determine if the operation on the absolute values is effectively
1765 an addition or subtraction. */
1766 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1768 /* Are we bigger exponent-wise than the RHS? */
1769 bits = exponent - rhs.exponent;
1771 /* Subtraction is more subtle than one might naively expect. */
1772 if (subtract) {
1773 IEEEFloat temp_rhs(rhs);
1775 if (bits == 0)
1776 lost_fraction = lfExactlyZero;
1777 else if (bits > 0) {
1778 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1779 shiftSignificandLeft(1);
1780 } else {
1781 lost_fraction = shiftSignificandRight(-bits - 1);
1782 temp_rhs.shiftSignificandLeft(1);
1785 // Should we reverse the subtraction.
1786 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1787 carry = temp_rhs.subtractSignificand
1788 (*this, lost_fraction != lfExactlyZero);
1789 copySignificand(temp_rhs);
1790 sign = !sign;
1791 } else {
1792 carry = subtractSignificand
1793 (temp_rhs, lost_fraction != lfExactlyZero);
1796 /* Invert the lost fraction - it was on the RHS and
1797 subtracted. */
1798 if (lost_fraction == lfLessThanHalf)
1799 lost_fraction = lfMoreThanHalf;
1800 else if (lost_fraction == lfMoreThanHalf)
1801 lost_fraction = lfLessThanHalf;
1803 /* The code above is intended to ensure that no borrow is
1804 necessary. */
1805 assert(!carry);
1806 (void)carry;
1807 } else {
1808 if (bits > 0) {
1809 IEEEFloat temp_rhs(rhs);
1811 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1812 carry = addSignificand(temp_rhs);
1813 } else {
1814 lost_fraction = shiftSignificandRight(-bits);
1815 carry = addSignificand(rhs);
1818 /* We have a guard bit; generating a carry cannot happen. */
1819 assert(!carry);
1820 (void)carry;
1823 return lost_fraction;
1826 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1827 switch (PackCategoriesIntoKey(category, rhs.category)) {
1828 default:
1829 llvm_unreachable(nullptr);
1831 case PackCategoriesIntoKey(fcZero, fcNaN):
1832 case PackCategoriesIntoKey(fcNormal, fcNaN):
1833 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1834 assign(rhs);
1835 sign = false;
1836 [[fallthrough]];
1837 case PackCategoriesIntoKey(fcNaN, fcZero):
1838 case PackCategoriesIntoKey(fcNaN, fcNormal):
1839 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1840 case PackCategoriesIntoKey(fcNaN, fcNaN):
1841 sign ^= rhs.sign; // restore the original sign
1842 if (isSignaling()) {
1843 makeQuiet();
1844 return opInvalidOp;
1846 return rhs.isSignaling() ? opInvalidOp : opOK;
1848 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1849 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1850 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1851 category = fcInfinity;
1852 return opOK;
1854 case PackCategoriesIntoKey(fcZero, fcNormal):
1855 case PackCategoriesIntoKey(fcNormal, fcZero):
1856 case PackCategoriesIntoKey(fcZero, fcZero):
1857 category = fcZero;
1858 return opOK;
1860 case PackCategoriesIntoKey(fcZero, fcInfinity):
1861 case PackCategoriesIntoKey(fcInfinity, fcZero):
1862 makeNaN();
1863 return opInvalidOp;
1865 case PackCategoriesIntoKey(fcNormal, fcNormal):
1866 return opOK;
1870 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1871 switch (PackCategoriesIntoKey(category, rhs.category)) {
1872 default:
1873 llvm_unreachable(nullptr);
1875 case PackCategoriesIntoKey(fcZero, fcNaN):
1876 case PackCategoriesIntoKey(fcNormal, fcNaN):
1877 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1878 assign(rhs);
1879 sign = false;
1880 [[fallthrough]];
1881 case PackCategoriesIntoKey(fcNaN, fcZero):
1882 case PackCategoriesIntoKey(fcNaN, fcNormal):
1883 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1884 case PackCategoriesIntoKey(fcNaN, fcNaN):
1885 sign ^= rhs.sign; // restore the original sign
1886 if (isSignaling()) {
1887 makeQuiet();
1888 return opInvalidOp;
1890 return rhs.isSignaling() ? opInvalidOp : opOK;
1892 case PackCategoriesIntoKey(fcInfinity, fcZero):
1893 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1894 case PackCategoriesIntoKey(fcZero, fcInfinity):
1895 case PackCategoriesIntoKey(fcZero, fcNormal):
1896 return opOK;
1898 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1899 category = fcZero;
1900 return opOK;
1902 case PackCategoriesIntoKey(fcNormal, fcZero):
1903 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1904 makeNaN(false, sign);
1905 else
1906 category = fcInfinity;
1907 return opDivByZero;
1909 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1910 case PackCategoriesIntoKey(fcZero, fcZero):
1911 makeNaN();
1912 return opInvalidOp;
1914 case PackCategoriesIntoKey(fcNormal, fcNormal):
1915 return opOK;
1919 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1920 switch (PackCategoriesIntoKey(category, rhs.category)) {
1921 default:
1922 llvm_unreachable(nullptr);
1924 case PackCategoriesIntoKey(fcZero, fcNaN):
1925 case PackCategoriesIntoKey(fcNormal, fcNaN):
1926 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1927 assign(rhs);
1928 [[fallthrough]];
1929 case PackCategoriesIntoKey(fcNaN, fcZero):
1930 case PackCategoriesIntoKey(fcNaN, fcNormal):
1931 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1932 case PackCategoriesIntoKey(fcNaN, fcNaN):
1933 if (isSignaling()) {
1934 makeQuiet();
1935 return opInvalidOp;
1937 return rhs.isSignaling() ? opInvalidOp : opOK;
1939 case PackCategoriesIntoKey(fcZero, fcInfinity):
1940 case PackCategoriesIntoKey(fcZero, fcNormal):
1941 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1942 return opOK;
1944 case PackCategoriesIntoKey(fcNormal, fcZero):
1945 case PackCategoriesIntoKey(fcInfinity, fcZero):
1946 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1947 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1948 case PackCategoriesIntoKey(fcZero, fcZero):
1949 makeNaN();
1950 return opInvalidOp;
1952 case PackCategoriesIntoKey(fcNormal, fcNormal):
1953 return opOK;
1957 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1958 switch (PackCategoriesIntoKey(category, rhs.category)) {
1959 default:
1960 llvm_unreachable(nullptr);
1962 case PackCategoriesIntoKey(fcZero, fcNaN):
1963 case PackCategoriesIntoKey(fcNormal, fcNaN):
1964 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1965 assign(rhs);
1966 [[fallthrough]];
1967 case PackCategoriesIntoKey(fcNaN, fcZero):
1968 case PackCategoriesIntoKey(fcNaN, fcNormal):
1969 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1970 case PackCategoriesIntoKey(fcNaN, fcNaN):
1971 if (isSignaling()) {
1972 makeQuiet();
1973 return opInvalidOp;
1975 return rhs.isSignaling() ? opInvalidOp : opOK;
1977 case PackCategoriesIntoKey(fcZero, fcInfinity):
1978 case PackCategoriesIntoKey(fcZero, fcNormal):
1979 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1980 return opOK;
1982 case PackCategoriesIntoKey(fcNormal, fcZero):
1983 case PackCategoriesIntoKey(fcInfinity, fcZero):
1984 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1985 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1986 case PackCategoriesIntoKey(fcZero, fcZero):
1987 makeNaN();
1988 return opInvalidOp;
1990 case PackCategoriesIntoKey(fcNormal, fcNormal):
1991 return opDivByZero; // fake status, indicating this is not a special case
1995 /* Change sign. */
1996 void IEEEFloat::changeSign() {
1997 // With NaN-as-negative-zero, neither NaN or negative zero can change
1998 // their signs.
1999 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2000 (isZero() || isNaN()))
2001 return;
2002 /* Look mummy, this one's easy. */
2003 sign = !sign;
2006 /* Normalized addition or subtraction. */
2007 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2008 roundingMode rounding_mode,
2009 bool subtract) {
2010 opStatus fs;
2012 fs = addOrSubtractSpecials(rhs, subtract);
2014 /* This return code means it was not a simple case. */
2015 if (fs == opDivByZero) {
2016 lostFraction lost_fraction;
2018 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2019 fs = normalize(rounding_mode, lost_fraction);
2021 /* Can only be zero if we lost no fraction. */
2022 assert(category != fcZero || lost_fraction == lfExactlyZero);
2025 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2026 positive zero unless rounding to minus infinity, except that
2027 adding two like-signed zeroes gives that zero. */
2028 if (category == fcZero) {
2029 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2030 sign = (rounding_mode == rmTowardNegative);
2031 // NaN-in-negative-zero means zeros need to be normalized to +0.
2032 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2033 sign = false;
2036 return fs;
2039 /* Normalized addition. */
2040 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2041 roundingMode rounding_mode) {
2042 return addOrSubtract(rhs, rounding_mode, false);
2045 /* Normalized subtraction. */
2046 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2047 roundingMode rounding_mode) {
2048 return addOrSubtract(rhs, rounding_mode, true);
2051 /* Normalized multiply. */
2052 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2053 roundingMode rounding_mode) {
2054 opStatus fs;
2056 sign ^= rhs.sign;
2057 fs = multiplySpecials(rhs);
2059 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2060 sign = false;
2061 if (isFiniteNonZero()) {
2062 lostFraction lost_fraction = multiplySignificand(rhs);
2063 fs = normalize(rounding_mode, lost_fraction);
2064 if (lost_fraction != lfExactlyZero)
2065 fs = (opStatus) (fs | opInexact);
2068 return fs;
2071 /* Normalized divide. */
2072 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2073 roundingMode rounding_mode) {
2074 opStatus fs;
2076 sign ^= rhs.sign;
2077 fs = divideSpecials(rhs);
2079 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2080 sign = false;
2081 if (isFiniteNonZero()) {
2082 lostFraction lost_fraction = divideSignificand(rhs);
2083 fs = normalize(rounding_mode, lost_fraction);
2084 if (lost_fraction != lfExactlyZero)
2085 fs = (opStatus) (fs | opInexact);
2088 return fs;
2091 /* Normalized remainder. */
2092 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2093 opStatus fs;
2094 unsigned int origSign = sign;
2096 // First handle the special cases.
2097 fs = remainderSpecials(rhs);
2098 if (fs != opDivByZero)
2099 return fs;
2101 fs = opOK;
2103 // Make sure the current value is less than twice the denom. If the addition
2104 // did not succeed (an overflow has happened), which means that the finite
2105 // value we currently posses must be less than twice the denom (as we are
2106 // using the same semantics).
2107 IEEEFloat P2 = rhs;
2108 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2109 fs = mod(P2);
2110 assert(fs == opOK);
2113 // Lets work with absolute numbers.
2114 IEEEFloat P = rhs;
2115 P.sign = false;
2116 sign = false;
2119 // To calculate the remainder we use the following scheme.
2121 // The remainder is defained as follows:
2123 // remainder = numer - rquot * denom = x - r * p
2125 // Where r is the result of: x/p, rounded toward the nearest integral value
2126 // (with halfway cases rounded toward the even number).
2128 // Currently, (after x mod 2p):
2129 // r is the number of 2p's present inside x, which is inherently, an even
2130 // number of p's.
2132 // We may split the remaining calculation into 4 options:
2133 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2134 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2135 // are done as well.
2136 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2137 // to subtract 1p at least once.
2138 // - if x >= p then we must subtract p at least once, as x must be a
2139 // remainder.
2141 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2143 // We can now split the remaining calculation to the following 3 options:
2144 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2145 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2146 // must round up to the next even number. so we must subtract p once more.
2147 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2148 // integral, and subtract p once more.
2151 // Extend the semantics to prevent an overflow/underflow or inexact result.
2152 bool losesInfo;
2153 fltSemantics extendedSemantics = *semantics;
2154 extendedSemantics.maxExponent++;
2155 extendedSemantics.minExponent--;
2156 extendedSemantics.precision += 2;
2158 IEEEFloat VEx = *this;
2159 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2160 assert(fs == opOK && !losesInfo);
2161 IEEEFloat PEx = P;
2162 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2163 assert(fs == opOK && !losesInfo);
2165 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2166 // any fraction.
2167 fs = VEx.add(VEx, rmNearestTiesToEven);
2168 assert(fs == opOK);
2170 if (VEx.compare(PEx) == cmpGreaterThan) {
2171 fs = subtract(P, rmNearestTiesToEven);
2172 assert(fs == opOK);
2174 // Make VEx = this.add(this), but because we have different semantics, we do
2175 // not want to `convert` again, so we just subtract PEx twice (which equals
2176 // to the desired value).
2177 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2178 assert(fs == opOK);
2179 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2180 assert(fs == opOK);
2182 cmpResult result = VEx.compare(PEx);
2183 if (result == cmpGreaterThan || result == cmpEqual) {
2184 fs = subtract(P, rmNearestTiesToEven);
2185 assert(fs == opOK);
2189 if (isZero()) {
2190 sign = origSign; // IEEE754 requires this
2191 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2192 // But some 8-bit floats only have positive 0.
2193 sign = false;
2196 else
2197 sign ^= origSign;
2198 return fs;
2201 /* Normalized llvm frem (C fmod). */
2202 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2203 opStatus fs;
2204 fs = modSpecials(rhs);
2205 unsigned int origSign = sign;
2207 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2208 compareAbsoluteValue(rhs) != cmpLessThan) {
2209 int Exp = ilogb(*this) - ilogb(rhs);
2210 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2211 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2212 // check for it.
2213 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2214 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2215 V.sign = sign;
2217 fs = subtract(V, rmNearestTiesToEven);
2218 assert(fs==opOK);
2220 if (isZero()) {
2221 sign = origSign; // fmod requires this
2222 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2223 sign = false;
2225 return fs;
2228 /* Normalized fused-multiply-add. */
2229 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2230 const IEEEFloat &addend,
2231 roundingMode rounding_mode) {
2232 opStatus fs;
2234 /* Post-multiplication sign, before addition. */
2235 sign ^= multiplicand.sign;
2237 /* If and only if all arguments are normal do we need to do an
2238 extended-precision calculation. */
2239 if (isFiniteNonZero() &&
2240 multiplicand.isFiniteNonZero() &&
2241 addend.isFinite()) {
2242 lostFraction lost_fraction;
2244 lost_fraction = multiplySignificand(multiplicand, addend);
2245 fs = normalize(rounding_mode, lost_fraction);
2246 if (lost_fraction != lfExactlyZero)
2247 fs = (opStatus) (fs | opInexact);
2249 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2250 positive zero unless rounding to minus infinity, except that
2251 adding two like-signed zeroes gives that zero. */
2252 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2253 sign = (rounding_mode == rmTowardNegative);
2254 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2255 sign = false;
2257 } else {
2258 fs = multiplySpecials(multiplicand);
2260 /* FS can only be opOK or opInvalidOp. There is no more work
2261 to do in the latter case. The IEEE-754R standard says it is
2262 implementation-defined in this case whether, if ADDEND is a
2263 quiet NaN, we raise invalid op; this implementation does so.
2265 If we need to do the addition we can do so with normal
2266 precision. */
2267 if (fs == opOK)
2268 fs = addOrSubtract(addend, rounding_mode, false);
2271 return fs;
2274 /* Rounding-mode correct round to integral value. */
2275 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2276 opStatus fs;
2278 if (isInfinity())
2279 // [IEEE Std 754-2008 6.1]:
2280 // The behavior of infinity in floating-point arithmetic is derived from the
2281 // limiting cases of real arithmetic with operands of arbitrarily
2282 // large magnitude, when such a limit exists.
2283 // ...
2284 // Operations on infinite operands are usually exact and therefore signal no
2285 // exceptions ...
2286 return opOK;
2288 if (isNaN()) {
2289 if (isSignaling()) {
2290 // [IEEE Std 754-2008 6.2]:
2291 // Under default exception handling, any operation signaling an invalid
2292 // operation exception and for which a floating-point result is to be
2293 // delivered shall deliver a quiet NaN.
2294 makeQuiet();
2295 // [IEEE Std 754-2008 6.2]:
2296 // Signaling NaNs shall be reserved operands that, under default exception
2297 // handling, signal the invalid operation exception(see 7.2) for every
2298 // general-computational and signaling-computational operation except for
2299 // the conversions described in 5.12.
2300 return opInvalidOp;
2301 } else {
2302 // [IEEE Std 754-2008 6.2]:
2303 // For an operation with quiet NaN inputs, other than maximum and minimum
2304 // operations, if a floating-point result is to be delivered the result
2305 // shall be a quiet NaN which should be one of the input NaNs.
2306 // ...
2307 // Every general-computational and quiet-computational operation involving
2308 // one or more input NaNs, none of them signaling, shall signal no
2309 // exception, except fusedMultiplyAdd might signal the invalid operation
2310 // exception(see 7.2).
2311 return opOK;
2315 if (isZero()) {
2316 // [IEEE Std 754-2008 6.3]:
2317 // ... the sign of the result of conversions, the quantize operation, the
2318 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2319 // the sign of the first or only operand.
2320 return opOK;
2323 // If the exponent is large enough, we know that this value is already
2324 // integral, and the arithmetic below would potentially cause it to saturate
2325 // to +/-Inf. Bail out early instead.
2326 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2327 return opOK;
2329 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2330 // precision of our format, and then subtract it back off again. The choice
2331 // of rounding modes for the addition/subtraction determines the rounding mode
2332 // for our integral rounding as well.
2333 // NOTE: When the input value is negative, we do subtraction followed by
2334 // addition instead.
2335 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2336 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2337 IEEEFloat MagicConstant(*semantics);
2338 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2339 rmNearestTiesToEven);
2340 assert(fs == opOK);
2341 MagicConstant.sign = sign;
2343 // Preserve the input sign so that we can handle the case of zero result
2344 // correctly.
2345 bool inputSign = isNegative();
2347 fs = add(MagicConstant, rounding_mode);
2349 // Current value and 'MagicConstant' are both integers, so the result of the
2350 // subtraction is always exact according to Sterbenz' lemma.
2351 subtract(MagicConstant, rounding_mode);
2353 // Restore the input sign.
2354 if (inputSign != isNegative())
2355 changeSign();
2357 return fs;
2361 /* Comparison requires normalized numbers. */
2362 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2363 cmpResult result;
2365 assert(semantics == rhs.semantics);
2367 switch (PackCategoriesIntoKey(category, rhs.category)) {
2368 default:
2369 llvm_unreachable(nullptr);
2371 case PackCategoriesIntoKey(fcNaN, fcZero):
2372 case PackCategoriesIntoKey(fcNaN, fcNormal):
2373 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2374 case PackCategoriesIntoKey(fcNaN, fcNaN):
2375 case PackCategoriesIntoKey(fcZero, fcNaN):
2376 case PackCategoriesIntoKey(fcNormal, fcNaN):
2377 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2378 return cmpUnordered;
2380 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2381 case PackCategoriesIntoKey(fcInfinity, fcZero):
2382 case PackCategoriesIntoKey(fcNormal, fcZero):
2383 if (sign)
2384 return cmpLessThan;
2385 else
2386 return cmpGreaterThan;
2388 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2389 case PackCategoriesIntoKey(fcZero, fcInfinity):
2390 case PackCategoriesIntoKey(fcZero, fcNormal):
2391 if (rhs.sign)
2392 return cmpGreaterThan;
2393 else
2394 return cmpLessThan;
2396 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2397 if (sign == rhs.sign)
2398 return cmpEqual;
2399 else if (sign)
2400 return cmpLessThan;
2401 else
2402 return cmpGreaterThan;
2404 case PackCategoriesIntoKey(fcZero, fcZero):
2405 return cmpEqual;
2407 case PackCategoriesIntoKey(fcNormal, fcNormal):
2408 break;
2411 /* Two normal numbers. Do they have the same sign? */
2412 if (sign != rhs.sign) {
2413 if (sign)
2414 result = cmpLessThan;
2415 else
2416 result = cmpGreaterThan;
2417 } else {
2418 /* Compare absolute values; invert result if negative. */
2419 result = compareAbsoluteValue(rhs);
2421 if (sign) {
2422 if (result == cmpLessThan)
2423 result = cmpGreaterThan;
2424 else if (result == cmpGreaterThan)
2425 result = cmpLessThan;
2429 return result;
2432 /// IEEEFloat::convert - convert a value of one floating point type to another.
2433 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2434 /// records whether the transformation lost information, i.e. whether
2435 /// converting the result back to the original type will produce the
2436 /// original value (this is almost the same as return value==fsOK, but there
2437 /// are edge cases where this is not so).
2439 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2440 roundingMode rounding_mode,
2441 bool *losesInfo) {
2442 lostFraction lostFraction;
2443 unsigned int newPartCount, oldPartCount;
2444 opStatus fs;
2445 int shift;
2446 const fltSemantics &fromSemantics = *semantics;
2447 bool is_signaling = isSignaling();
2449 lostFraction = lfExactlyZero;
2450 newPartCount = partCountForBits(toSemantics.precision + 1);
2451 oldPartCount = partCount();
2452 shift = toSemantics.precision - fromSemantics.precision;
2454 bool X86SpecialNan = false;
2455 if (&fromSemantics == &semX87DoubleExtended &&
2456 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2457 (!(*significandParts() & 0x8000000000000000ULL) ||
2458 !(*significandParts() & 0x4000000000000000ULL))) {
2459 // x86 has some unusual NaNs which cannot be represented in any other
2460 // format; note them here.
2461 X86SpecialNan = true;
2464 // If this is a truncation of a denormal number, and the target semantics
2465 // has larger exponent range than the source semantics (this can happen
2466 // when truncating from PowerPC double-double to double format), the
2467 // right shift could lose result mantissa bits. Adjust exponent instead
2468 // of performing excessive shift.
2469 // Also do a similar trick in case shifting denormal would produce zero
2470 // significand as this case isn't handled correctly by normalize.
2471 if (shift < 0 && isFiniteNonZero()) {
2472 int omsb = significandMSB() + 1;
2473 int exponentChange = omsb - fromSemantics.precision;
2474 if (exponent + exponentChange < toSemantics.minExponent)
2475 exponentChange = toSemantics.minExponent - exponent;
2476 if (exponentChange < shift)
2477 exponentChange = shift;
2478 if (exponentChange < 0) {
2479 shift -= exponentChange;
2480 exponent += exponentChange;
2481 } else if (omsb <= -shift) {
2482 exponentChange = omsb + shift - 1; // leave at least one bit set
2483 shift -= exponentChange;
2484 exponent += exponentChange;
2488 // If this is a truncation, perform the shift before we narrow the storage.
2489 if (shift < 0 && (isFiniteNonZero() ||
2490 (category == fcNaN && semantics->nonFiniteBehavior !=
2491 fltNonfiniteBehavior::NanOnly)))
2492 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2494 // Fix the storage so it can hold to new value.
2495 if (newPartCount > oldPartCount) {
2496 // The new type requires more storage; make it available.
2497 integerPart *newParts;
2498 newParts = new integerPart[newPartCount];
2499 APInt::tcSet(newParts, 0, newPartCount);
2500 if (isFiniteNonZero() || category==fcNaN)
2501 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2502 freeSignificand();
2503 significand.parts = newParts;
2504 } else if (newPartCount == 1 && oldPartCount != 1) {
2505 // Switch to built-in storage for a single part.
2506 integerPart newPart = 0;
2507 if (isFiniteNonZero() || category==fcNaN)
2508 newPart = significandParts()[0];
2509 freeSignificand();
2510 significand.part = newPart;
2513 // Now that we have the right storage, switch the semantics.
2514 semantics = &toSemantics;
2516 // If this is an extension, perform the shift now that the storage is
2517 // available.
2518 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2519 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2521 if (isFiniteNonZero()) {
2522 fs = normalize(rounding_mode, lostFraction);
2523 *losesInfo = (fs != opOK);
2524 } else if (category == fcNaN) {
2525 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2526 *losesInfo =
2527 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2528 makeNaN(false, sign);
2529 return is_signaling ? opInvalidOp : opOK;
2532 // If NaN is negative zero, we need to create a new NaN to avoid converting
2533 // NaN to -Inf.
2534 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2535 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2536 makeNaN(false, false);
2538 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2540 // For x87 extended precision, we want to make a NaN, not a special NaN if
2541 // the input wasn't special either.
2542 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2543 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2545 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2546 // This also guarantees that a sNaN does not become Inf on a truncation
2547 // that loses all payload bits.
2548 if (is_signaling) {
2549 makeQuiet();
2550 fs = opInvalidOp;
2551 } else {
2552 fs = opOK;
2554 } else if (category == fcInfinity &&
2555 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2556 makeNaN(false, sign);
2557 *losesInfo = true;
2558 fs = opInexact;
2559 } else if (category == fcZero &&
2560 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2561 // Negative zero loses info, but positive zero doesn't.
2562 *losesInfo =
2563 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2564 fs = *losesInfo ? opInexact : opOK;
2565 // NaN is negative zero means -0 -> +0, which can lose information
2566 sign = false;
2567 } else {
2568 *losesInfo = false;
2569 fs = opOK;
2572 return fs;
2575 /* Convert a floating point number to an integer according to the
2576 rounding mode. If the rounded integer value is out of range this
2577 returns an invalid operation exception and the contents of the
2578 destination parts are unspecified. If the rounded value is in
2579 range but the floating point number is not the exact integer, the C
2580 standard doesn't require an inexact exception to be raised. IEEE
2581 854 does require it so we do that.
2583 Note that for conversions to integer type the C standard requires
2584 round-to-zero to always be used. */
2585 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2586 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2587 roundingMode rounding_mode, bool *isExact) const {
2588 lostFraction lost_fraction;
2589 const integerPart *src;
2590 unsigned int dstPartsCount, truncatedBits;
2592 *isExact = false;
2594 /* Handle the three special cases first. */
2595 if (category == fcInfinity || category == fcNaN)
2596 return opInvalidOp;
2598 dstPartsCount = partCountForBits(width);
2599 assert(dstPartsCount <= parts.size() && "Integer too big");
2601 if (category == fcZero) {
2602 APInt::tcSet(parts.data(), 0, dstPartsCount);
2603 // Negative zero can't be represented as an int.
2604 *isExact = !sign;
2605 return opOK;
2608 src = significandParts();
2610 /* Step 1: place our absolute value, with any fraction truncated, in
2611 the destination. */
2612 if (exponent < 0) {
2613 /* Our absolute value is less than one; truncate everything. */
2614 APInt::tcSet(parts.data(), 0, dstPartsCount);
2615 /* For exponent -1 the integer bit represents .5, look at that.
2616 For smaller exponents leftmost truncated bit is 0. */
2617 truncatedBits = semantics->precision -1U - exponent;
2618 } else {
2619 /* We want the most significant (exponent + 1) bits; the rest are
2620 truncated. */
2621 unsigned int bits = exponent + 1U;
2623 /* Hopelessly large in magnitude? */
2624 if (bits > width)
2625 return opInvalidOp;
2627 if (bits < semantics->precision) {
2628 /* We truncate (semantics->precision - bits) bits. */
2629 truncatedBits = semantics->precision - bits;
2630 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2631 } else {
2632 /* We want at least as many bits as are available. */
2633 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2635 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2636 bits - semantics->precision);
2637 truncatedBits = 0;
2641 /* Step 2: work out any lost fraction, and increment the absolute
2642 value if we would round away from zero. */
2643 if (truncatedBits) {
2644 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2645 truncatedBits);
2646 if (lost_fraction != lfExactlyZero &&
2647 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2648 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2649 return opInvalidOp; /* Overflow. */
2651 } else {
2652 lost_fraction = lfExactlyZero;
2655 /* Step 3: check if we fit in the destination. */
2656 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2658 if (sign) {
2659 if (!isSigned) {
2660 /* Negative numbers cannot be represented as unsigned. */
2661 if (omsb != 0)
2662 return opInvalidOp;
2663 } else {
2664 /* It takes omsb bits to represent the unsigned integer value.
2665 We lose a bit for the sign, but care is needed as the
2666 maximally negative integer is a special case. */
2667 if (omsb == width &&
2668 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2669 return opInvalidOp;
2671 /* This case can happen because of rounding. */
2672 if (omsb > width)
2673 return opInvalidOp;
2676 APInt::tcNegate (parts.data(), dstPartsCount);
2677 } else {
2678 if (omsb >= width + !isSigned)
2679 return opInvalidOp;
2682 if (lost_fraction == lfExactlyZero) {
2683 *isExact = true;
2684 return opOK;
2685 } else
2686 return opInexact;
2689 /* Same as convertToSignExtendedInteger, except we provide
2690 deterministic values in case of an invalid operation exception,
2691 namely zero for NaNs and the minimal or maximal value respectively
2692 for underflow or overflow.
2693 The *isExact output tells whether the result is exact, in the sense
2694 that converting it back to the original floating point type produces
2695 the original value. This is almost equivalent to result==opOK,
2696 except for negative zeroes.
2698 IEEEFloat::opStatus
2699 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2700 unsigned int width, bool isSigned,
2701 roundingMode rounding_mode, bool *isExact) const {
2702 opStatus fs;
2704 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2705 isExact);
2707 if (fs == opInvalidOp) {
2708 unsigned int bits, dstPartsCount;
2710 dstPartsCount = partCountForBits(width);
2711 assert(dstPartsCount <= parts.size() && "Integer too big");
2713 if (category == fcNaN)
2714 bits = 0;
2715 else if (sign)
2716 bits = isSigned;
2717 else
2718 bits = width - isSigned;
2720 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2721 if (sign && isSigned)
2722 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2725 return fs;
2728 /* Convert an unsigned integer SRC to a floating point number,
2729 rounding according to ROUNDING_MODE. The sign of the floating
2730 point number is not modified. */
2731 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2732 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2733 unsigned int omsb, precision, dstCount;
2734 integerPart *dst;
2735 lostFraction lost_fraction;
2737 category = fcNormal;
2738 omsb = APInt::tcMSB(src, srcCount) + 1;
2739 dst = significandParts();
2740 dstCount = partCount();
2741 precision = semantics->precision;
2743 /* We want the most significant PRECISION bits of SRC. There may not
2744 be that many; extract what we can. */
2745 if (precision <= omsb) {
2746 exponent = omsb - 1;
2747 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2748 omsb - precision);
2749 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2750 } else {
2751 exponent = precision - 1;
2752 lost_fraction = lfExactlyZero;
2753 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2756 return normalize(rounding_mode, lost_fraction);
2759 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2760 roundingMode rounding_mode) {
2761 unsigned int partCount = Val.getNumWords();
2762 APInt api = Val;
2764 sign = false;
2765 if (isSigned && api.isNegative()) {
2766 sign = true;
2767 api = -api;
2770 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2773 /* Convert a two's complement integer SRC to a floating point number,
2774 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2775 integer is signed, in which case it must be sign-extended. */
2776 IEEEFloat::opStatus
2777 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2778 unsigned int srcCount, bool isSigned,
2779 roundingMode rounding_mode) {
2780 opStatus status;
2782 if (isSigned &&
2783 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2784 integerPart *copy;
2786 /* If we're signed and negative negate a copy. */
2787 sign = true;
2788 copy = new integerPart[srcCount];
2789 APInt::tcAssign(copy, src, srcCount);
2790 APInt::tcNegate(copy, srcCount);
2791 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2792 delete [] copy;
2793 } else {
2794 sign = false;
2795 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2798 return status;
2801 /* FIXME: should this just take a const APInt reference? */
2802 IEEEFloat::opStatus
2803 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2804 unsigned int width, bool isSigned,
2805 roundingMode rounding_mode) {
2806 unsigned int partCount = partCountForBits(width);
2807 APInt api = APInt(width, ArrayRef(parts, partCount));
2809 sign = false;
2810 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2811 sign = true;
2812 api = -api;
2815 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2818 Expected<IEEEFloat::opStatus>
2819 IEEEFloat::convertFromHexadecimalString(StringRef s,
2820 roundingMode rounding_mode) {
2821 lostFraction lost_fraction = lfExactlyZero;
2823 category = fcNormal;
2824 zeroSignificand();
2825 exponent = 0;
2827 integerPart *significand = significandParts();
2828 unsigned partsCount = partCount();
2829 unsigned bitPos = partsCount * integerPartWidth;
2830 bool computedTrailingFraction = false;
2832 // Skip leading zeroes and any (hexa)decimal point.
2833 StringRef::iterator begin = s.begin();
2834 StringRef::iterator end = s.end();
2835 StringRef::iterator dot;
2836 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2837 if (!PtrOrErr)
2838 return PtrOrErr.takeError();
2839 StringRef::iterator p = *PtrOrErr;
2840 StringRef::iterator firstSignificantDigit = p;
2842 while (p != end) {
2843 integerPart hex_value;
2845 if (*p == '.') {
2846 if (dot != end)
2847 return createError("String contains multiple dots");
2848 dot = p++;
2849 continue;
2852 hex_value = hexDigitValue(*p);
2853 if (hex_value == UINT_MAX)
2854 break;
2856 p++;
2858 // Store the number while we have space.
2859 if (bitPos) {
2860 bitPos -= 4;
2861 hex_value <<= bitPos % integerPartWidth;
2862 significand[bitPos / integerPartWidth] |= hex_value;
2863 } else if (!computedTrailingFraction) {
2864 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2865 if (!FractOrErr)
2866 return FractOrErr.takeError();
2867 lost_fraction = *FractOrErr;
2868 computedTrailingFraction = true;
2872 /* Hex floats require an exponent but not a hexadecimal point. */
2873 if (p == end)
2874 return createError("Hex strings require an exponent");
2875 if (*p != 'p' && *p != 'P')
2876 return createError("Invalid character in significand");
2877 if (p == begin)
2878 return createError("Significand has no digits");
2879 if (dot != end && p - begin == 1)
2880 return createError("Significand has no digits");
2882 /* Ignore the exponent if we are zero. */
2883 if (p != firstSignificantDigit) {
2884 int expAdjustment;
2886 /* Implicit hexadecimal point? */
2887 if (dot == end)
2888 dot = p;
2890 /* Calculate the exponent adjustment implicit in the number of
2891 significant digits. */
2892 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2893 if (expAdjustment < 0)
2894 expAdjustment++;
2895 expAdjustment = expAdjustment * 4 - 1;
2897 /* Adjust for writing the significand starting at the most
2898 significant nibble. */
2899 expAdjustment += semantics->precision;
2900 expAdjustment -= partsCount * integerPartWidth;
2902 /* Adjust for the given exponent. */
2903 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2904 if (!ExpOrErr)
2905 return ExpOrErr.takeError();
2906 exponent = *ExpOrErr;
2909 return normalize(rounding_mode, lost_fraction);
2912 IEEEFloat::opStatus
2913 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2914 unsigned sigPartCount, int exp,
2915 roundingMode rounding_mode) {
2916 unsigned int parts, pow5PartCount;
2917 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2918 integerPart pow5Parts[maxPowerOfFiveParts];
2919 bool isNearest;
2921 isNearest = (rounding_mode == rmNearestTiesToEven ||
2922 rounding_mode == rmNearestTiesToAway);
2924 parts = partCountForBits(semantics->precision + 11);
2926 /* Calculate pow(5, abs(exp)). */
2927 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2929 for (;; parts *= 2) {
2930 opStatus sigStatus, powStatus;
2931 unsigned int excessPrecision, truncatedBits;
2933 calcSemantics.precision = parts * integerPartWidth - 1;
2934 excessPrecision = calcSemantics.precision - semantics->precision;
2935 truncatedBits = excessPrecision;
2937 IEEEFloat decSig(calcSemantics, uninitialized);
2938 decSig.makeZero(sign);
2939 IEEEFloat pow5(calcSemantics);
2941 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2942 rmNearestTiesToEven);
2943 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2944 rmNearestTiesToEven);
2945 /* Add exp, as 10^n = 5^n * 2^n. */
2946 decSig.exponent += exp;
2948 lostFraction calcLostFraction;
2949 integerPart HUerr, HUdistance;
2950 unsigned int powHUerr;
2952 if (exp >= 0) {
2953 /* multiplySignificand leaves the precision-th bit set to 1. */
2954 calcLostFraction = decSig.multiplySignificand(pow5);
2955 powHUerr = powStatus != opOK;
2956 } else {
2957 calcLostFraction = decSig.divideSignificand(pow5);
2958 /* Denormal numbers have less precision. */
2959 if (decSig.exponent < semantics->minExponent) {
2960 excessPrecision += (semantics->minExponent - decSig.exponent);
2961 truncatedBits = excessPrecision;
2962 if (excessPrecision > calcSemantics.precision)
2963 excessPrecision = calcSemantics.precision;
2965 /* Extra half-ulp lost in reciprocal of exponent. */
2966 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2969 /* Both multiplySignificand and divideSignificand return the
2970 result with the integer bit set. */
2971 assert(APInt::tcExtractBit
2972 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2974 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2975 powHUerr);
2976 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2977 excessPrecision, isNearest);
2979 /* Are we guaranteed to round correctly if we truncate? */
2980 if (HUdistance >= HUerr) {
2981 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2982 calcSemantics.precision - excessPrecision,
2983 excessPrecision);
2984 /* Take the exponent of decSig. If we tcExtract-ed less bits
2985 above we must adjust our exponent to compensate for the
2986 implicit right shift. */
2987 exponent = (decSig.exponent + semantics->precision
2988 - (calcSemantics.precision - excessPrecision));
2989 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2990 decSig.partCount(),
2991 truncatedBits);
2992 return normalize(rounding_mode, calcLostFraction);
2997 Expected<IEEEFloat::opStatus>
2998 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2999 decimalInfo D;
3000 opStatus fs;
3002 /* Scan the text. */
3003 StringRef::iterator p = str.begin();
3004 if (Error Err = interpretDecimal(p, str.end(), &D))
3005 return std::move(Err);
3007 /* Handle the quick cases. First the case of no significant digits,
3008 i.e. zero, and then exponents that are obviously too large or too
3009 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3010 definitely overflows if
3012 (exp - 1) * L >= maxExponent
3014 and definitely underflows to zero where
3016 (exp + 1) * L <= minExponent - precision
3018 With integer arithmetic the tightest bounds for L are
3020 93/28 < L < 196/59 [ numerator <= 256 ]
3021 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3024 // Test if we have a zero number allowing for strings with no null terminators
3025 // and zero decimals with non-zero exponents.
3027 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3028 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3029 // be at most one dot. On the other hand, if we have a zero with a non-zero
3030 // exponent, then we know that D.firstSigDigit will be non-numeric.
3031 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3032 category = fcZero;
3033 fs = opOK;
3034 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3035 sign = false;
3037 /* Check whether the normalized exponent is high enough to overflow
3038 max during the log-rebasing in the max-exponent check below. */
3039 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3040 fs = handleOverflow(rounding_mode);
3042 /* If it wasn't, then it also wasn't high enough to overflow max
3043 during the log-rebasing in the min-exponent check. Check that it
3044 won't overflow min in either check, then perform the min-exponent
3045 check. */
3046 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3047 (D.normalizedExponent + 1) * 28738 <=
3048 8651 * (semantics->minExponent - (int) semantics->precision)) {
3049 /* Underflow to zero and round. */
3050 category = fcNormal;
3051 zeroSignificand();
3052 fs = normalize(rounding_mode, lfLessThanHalf);
3054 /* We can finally safely perform the max-exponent check. */
3055 } else if ((D.normalizedExponent - 1) * 42039
3056 >= 12655 * semantics->maxExponent) {
3057 /* Overflow and round. */
3058 fs = handleOverflow(rounding_mode);
3059 } else {
3060 integerPart *decSignificand;
3061 unsigned int partCount;
3063 /* A tight upper bound on number of bits required to hold an
3064 N-digit decimal integer is N * 196 / 59. Allocate enough space
3065 to hold the full significand, and an extra part required by
3066 tcMultiplyPart. */
3067 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3068 partCount = partCountForBits(1 + 196 * partCount / 59);
3069 decSignificand = new integerPart[partCount + 1];
3070 partCount = 0;
3072 /* Convert to binary efficiently - we do almost all multiplication
3073 in an integerPart. When this would overflow do we do a single
3074 bignum multiplication, and then revert again to multiplication
3075 in an integerPart. */
3076 do {
3077 integerPart decValue, val, multiplier;
3079 val = 0;
3080 multiplier = 1;
3082 do {
3083 if (*p == '.') {
3084 p++;
3085 if (p == str.end()) {
3086 break;
3089 decValue = decDigitValue(*p++);
3090 if (decValue >= 10U) {
3091 delete[] decSignificand;
3092 return createError("Invalid character in significand");
3094 multiplier *= 10;
3095 val = val * 10 + decValue;
3096 /* The maximum number that can be multiplied by ten with any
3097 digit added without overflowing an integerPart. */
3098 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3100 /* Multiply out the current part. */
3101 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3102 partCount, partCount + 1, false);
3104 /* If we used another part (likely but not guaranteed), increase
3105 the count. */
3106 if (decSignificand[partCount])
3107 partCount++;
3108 } while (p <= D.lastSigDigit);
3110 category = fcNormal;
3111 fs = roundSignificandWithExponent(decSignificand, partCount,
3112 D.exponent, rounding_mode);
3114 delete [] decSignificand;
3117 return fs;
3120 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3121 const size_t MIN_NAME_SIZE = 3;
3123 if (str.size() < MIN_NAME_SIZE)
3124 return false;
3126 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
3127 makeInf(false);
3128 return true;
3131 bool IsNegative = str.front() == '-';
3132 if (IsNegative) {
3133 str = str.drop_front();
3134 if (str.size() < MIN_NAME_SIZE)
3135 return false;
3137 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
3138 makeInf(true);
3139 return true;
3143 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3144 bool IsSignaling = str.front() == 's' || str.front() == 'S';
3145 if (IsSignaling) {
3146 str = str.drop_front();
3147 if (str.size() < MIN_NAME_SIZE)
3148 return false;
3151 if (str.starts_with("nan") || str.starts_with("NaN")) {
3152 str = str.drop_front(3);
3154 // A NaN without payload.
3155 if (str.empty()) {
3156 makeNaN(IsSignaling, IsNegative);
3157 return true;
3160 // Allow the payload to be inside parentheses.
3161 if (str.front() == '(') {
3162 // Parentheses should be balanced (and not empty).
3163 if (str.size() <= 2 || str.back() != ')')
3164 return false;
3166 str = str.slice(1, str.size() - 1);
3169 // Determine the payload number's radix.
3170 unsigned Radix = 10;
3171 if (str[0] == '0') {
3172 if (str.size() > 1 && tolower(str[1]) == 'x') {
3173 str = str.drop_front(2);
3174 Radix = 16;
3175 } else
3176 Radix = 8;
3179 // Parse the payload and make the NaN.
3180 APInt Payload;
3181 if (!str.getAsInteger(Radix, Payload)) {
3182 makeNaN(IsSignaling, IsNegative, &Payload);
3183 return true;
3187 return false;
3190 Expected<IEEEFloat::opStatus>
3191 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3192 if (str.empty())
3193 return createError("Invalid string length");
3195 // Handle special cases.
3196 if (convertFromStringSpecials(str))
3197 return opOK;
3199 /* Handle a leading minus sign. */
3200 StringRef::iterator p = str.begin();
3201 size_t slen = str.size();
3202 sign = *p == '-' ? 1 : 0;
3203 if (*p == '-' || *p == '+') {
3204 p++;
3205 slen--;
3206 if (!slen)
3207 return createError("String has no digits");
3210 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3211 if (slen == 2)
3212 return createError("Invalid string");
3213 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3214 rounding_mode);
3217 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3220 /* Write out a hexadecimal representation of the floating point value
3221 to DST, which must be of sufficient size, in the C99 form
3222 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3223 excluding the terminating NUL.
3225 If UPPERCASE, the output is in upper case, otherwise in lower case.
3227 HEXDIGITS digits appear altogether, rounding the value if
3228 necessary. If HEXDIGITS is 0, the minimal precision to display the
3229 number precisely is used instead. If nothing would appear after
3230 the decimal point it is suppressed.
3232 The decimal exponent is always printed and has at least one digit.
3233 Zero values display an exponent of zero. Infinities and NaNs
3234 appear as "infinity" or "nan" respectively.
3236 The above rules are as specified by C99. There is ambiguity about
3237 what the leading hexadecimal digit should be. This implementation
3238 uses whatever is necessary so that the exponent is displayed as
3239 stored. This implies the exponent will fall within the IEEE format
3240 range, and the leading hexadecimal digit will be 0 (for denormals),
3241 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3242 any other digits zero).
3244 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3245 bool upperCase,
3246 roundingMode rounding_mode) const {
3247 char *p;
3249 p = dst;
3250 if (sign)
3251 *dst++ = '-';
3253 switch (category) {
3254 case fcInfinity:
3255 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3256 dst += sizeof infinityL - 1;
3257 break;
3259 case fcNaN:
3260 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3261 dst += sizeof NaNU - 1;
3262 break;
3264 case fcZero:
3265 *dst++ = '0';
3266 *dst++ = upperCase ? 'X': 'x';
3267 *dst++ = '0';
3268 if (hexDigits > 1) {
3269 *dst++ = '.';
3270 memset (dst, '0', hexDigits - 1);
3271 dst += hexDigits - 1;
3273 *dst++ = upperCase ? 'P': 'p';
3274 *dst++ = '0';
3275 break;
3277 case fcNormal:
3278 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3279 break;
3282 *dst = 0;
3284 return static_cast<unsigned int>(dst - p);
3287 /* Does the hard work of outputting the correctly rounded hexadecimal
3288 form of a normal floating point number with the specified number of
3289 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3290 digits necessary to print the value precisely is output. */
3291 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3292 bool upperCase,
3293 roundingMode rounding_mode) const {
3294 unsigned int count, valueBits, shift, partsCount, outputDigits;
3295 const char *hexDigitChars;
3296 const integerPart *significand;
3297 char *p;
3298 bool roundUp;
3300 *dst++ = '0';
3301 *dst++ = upperCase ? 'X': 'x';
3303 roundUp = false;
3304 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3306 significand = significandParts();
3307 partsCount = partCount();
3309 /* +3 because the first digit only uses the single integer bit, so
3310 we have 3 virtual zero most-significant-bits. */
3311 valueBits = semantics->precision + 3;
3312 shift = integerPartWidth - valueBits % integerPartWidth;
3314 /* The natural number of digits required ignoring trailing
3315 insignificant zeroes. */
3316 outputDigits = (valueBits - significandLSB () + 3) / 4;
3318 /* hexDigits of zero means use the required number for the
3319 precision. Otherwise, see if we are truncating. If we are,
3320 find out if we need to round away from zero. */
3321 if (hexDigits) {
3322 if (hexDigits < outputDigits) {
3323 /* We are dropping non-zero bits, so need to check how to round.
3324 "bits" is the number of dropped bits. */
3325 unsigned int bits;
3326 lostFraction fraction;
3328 bits = valueBits - hexDigits * 4;
3329 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3330 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3332 outputDigits = hexDigits;
3335 /* Write the digits consecutively, and start writing in the location
3336 of the hexadecimal point. We move the most significant digit
3337 left and add the hexadecimal point later. */
3338 p = ++dst;
3340 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3342 while (outputDigits && count) {
3343 integerPart part;
3345 /* Put the most significant integerPartWidth bits in "part". */
3346 if (--count == partsCount)
3347 part = 0; /* An imaginary higher zero part. */
3348 else
3349 part = significand[count] << shift;
3351 if (count && shift)
3352 part |= significand[count - 1] >> (integerPartWidth - shift);
3354 /* Convert as much of "part" to hexdigits as we can. */
3355 unsigned int curDigits = integerPartWidth / 4;
3357 if (curDigits > outputDigits)
3358 curDigits = outputDigits;
3359 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3360 outputDigits -= curDigits;
3363 if (roundUp) {
3364 char *q = dst;
3366 /* Note that hexDigitChars has a trailing '0'. */
3367 do {
3368 q--;
3369 *q = hexDigitChars[hexDigitValue (*q) + 1];
3370 } while (*q == '0');
3371 assert(q >= p);
3372 } else {
3373 /* Add trailing zeroes. */
3374 memset (dst, '0', outputDigits);
3375 dst += outputDigits;
3378 /* Move the most significant digit to before the point, and if there
3379 is something after the decimal point add it. This must come
3380 after rounding above. */
3381 p[-1] = p[0];
3382 if (dst -1 == p)
3383 dst--;
3384 else
3385 p[0] = '.';
3387 /* Finally output the exponent. */
3388 *dst++ = upperCase ? 'P': 'p';
3390 return writeSignedDecimal (dst, exponent);
3393 hash_code hash_value(const IEEEFloat &Arg) {
3394 if (!Arg.isFiniteNonZero())
3395 return hash_combine((uint8_t)Arg.category,
3396 // NaN has no sign, fix it at zero.
3397 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3398 Arg.semantics->precision);
3400 // Normal floats need their exponent and significand hashed.
3401 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3402 Arg.semantics->precision, Arg.exponent,
3403 hash_combine_range(
3404 Arg.significandParts(),
3405 Arg.significandParts() + Arg.partCount()));
3408 // Conversion from APFloat to/from host float/double. It may eventually be
3409 // possible to eliminate these and have everybody deal with APFloats, but that
3410 // will take a while. This approach will not easily extend to long double.
3411 // Current implementation requires integerPartWidth==64, which is correct at
3412 // the moment but could be made more general.
3414 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3415 // the actual IEEE respresentations. We compensate for that here.
3417 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3418 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3419 assert(partCount()==2);
3421 uint64_t myexponent, mysignificand;
3423 if (isFiniteNonZero()) {
3424 myexponent = exponent+16383; //bias
3425 mysignificand = significandParts()[0];
3426 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3427 myexponent = 0; // denormal
3428 } else if (category==fcZero) {
3429 myexponent = 0;
3430 mysignificand = 0;
3431 } else if (category==fcInfinity) {
3432 myexponent = 0x7fff;
3433 mysignificand = 0x8000000000000000ULL;
3434 } else {
3435 assert(category == fcNaN && "Unknown category");
3436 myexponent = 0x7fff;
3437 mysignificand = significandParts()[0];
3440 uint64_t words[2];
3441 words[0] = mysignificand;
3442 words[1] = ((uint64_t)(sign & 1) << 15) |
3443 (myexponent & 0x7fffLL);
3444 return APInt(80, words);
3447 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3448 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3449 assert(partCount()==2);
3451 uint64_t words[2];
3452 opStatus fs;
3453 bool losesInfo;
3455 // Convert number to double. To avoid spurious underflows, we re-
3456 // normalize against the "double" minExponent first, and only *then*
3457 // truncate the mantissa. The result of that second conversion
3458 // may be inexact, but should never underflow.
3459 // Declare fltSemantics before APFloat that uses it (and
3460 // saves pointer to it) to ensure correct destruction order.
3461 fltSemantics extendedSemantics = *semantics;
3462 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3463 IEEEFloat extended(*this);
3464 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3465 assert(fs == opOK && !losesInfo);
3466 (void)fs;
3468 IEEEFloat u(extended);
3469 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3470 assert(fs == opOK || fs == opInexact);
3471 (void)fs;
3472 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3474 // If conversion was exact or resulted in a special case, we're done;
3475 // just set the second double to zero. Otherwise, re-convert back to
3476 // the extended format and compute the difference. This now should
3477 // convert exactly to double.
3478 if (u.isFiniteNonZero() && losesInfo) {
3479 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3480 assert(fs == opOK && !losesInfo);
3481 (void)fs;
3483 IEEEFloat v(extended);
3484 v.subtract(u, rmNearestTiesToEven);
3485 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3486 assert(fs == opOK && !losesInfo);
3487 (void)fs;
3488 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3489 } else {
3490 words[1] = 0;
3493 return APInt(128, words);
3496 template <const fltSemantics &S>
3497 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3498 assert(semantics == &S);
3500 constexpr int bias = -(S.minExponent - 1);
3501 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3502 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3503 constexpr integerPart integer_bit =
3504 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3505 constexpr uint64_t significand_mask = integer_bit - 1;
3506 constexpr unsigned int exponent_bits =
3507 S.sizeInBits - 1 - trailing_significand_bits;
3508 static_assert(exponent_bits < 64);
3509 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3511 uint64_t myexponent;
3512 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3513 mysignificand;
3515 if (isFiniteNonZero()) {
3516 myexponent = exponent + bias;
3517 std::copy_n(significandParts(), mysignificand.size(),
3518 mysignificand.begin());
3519 if (myexponent == 1 &&
3520 !(significandParts()[integer_bit_part] & integer_bit))
3521 myexponent = 0; // denormal
3522 } else if (category == fcZero) {
3523 myexponent = ::exponentZero(S) + bias;
3524 mysignificand.fill(0);
3525 } else if (category == fcInfinity) {
3526 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3527 llvm_unreachable("semantics don't support inf!");
3529 myexponent = ::exponentInf(S) + bias;
3530 mysignificand.fill(0);
3531 } else {
3532 assert(category == fcNaN && "Unknown category!");
3533 myexponent = ::exponentNaN(S) + bias;
3534 std::copy_n(significandParts(), mysignificand.size(),
3535 mysignificand.begin());
3537 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3538 auto words_iter =
3539 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3540 if constexpr (significand_mask != 0) {
3541 // Clear the integer bit.
3542 words[mysignificand.size() - 1] &= significand_mask;
3544 std::fill(words_iter, words.end(), uint64_t{0});
3545 constexpr size_t last_word = words.size() - 1;
3546 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3547 << ((S.sizeInBits - 1) % 64);
3548 words[last_word] |= shifted_sign;
3549 uint64_t shifted_exponent = (myexponent & exponent_mask)
3550 << (trailing_significand_bits % 64);
3551 words[last_word] |= shifted_exponent;
3552 if constexpr (last_word == 0) {
3553 return APInt(S.sizeInBits, words[0]);
3555 return APInt(S.sizeInBits, words);
3558 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3559 assert(partCount() == 2);
3560 return convertIEEEFloatToAPInt<semIEEEquad>();
3563 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3564 assert(partCount()==1);
3565 return convertIEEEFloatToAPInt<semIEEEdouble>();
3568 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3569 assert(partCount()==1);
3570 return convertIEEEFloatToAPInt<semIEEEsingle>();
3573 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3574 assert(partCount() == 1);
3575 return convertIEEEFloatToAPInt<semBFloat>();
3578 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3579 assert(partCount()==1);
3580 return convertIEEEFloatToAPInt<semIEEEhalf>();
3583 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3584 assert(partCount() == 1);
3585 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3588 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3589 assert(partCount() == 1);
3590 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3593 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3594 assert(partCount() == 1);
3595 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3598 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3599 assert(partCount() == 1);
3600 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3603 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3604 assert(partCount() == 1);
3605 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3608 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3609 assert(partCount() == 1);
3610 return convertIEEEFloatToAPInt<semFloatTF32>();
3613 // This function creates an APInt that is just a bit map of the floating
3614 // point constant as it would appear in memory. It is not a conversion,
3615 // and treating the result as a normal integer is unlikely to be useful.
3617 APInt IEEEFloat::bitcastToAPInt() const {
3618 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3619 return convertHalfAPFloatToAPInt();
3621 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3622 return convertBFloatAPFloatToAPInt();
3624 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3625 return convertFloatAPFloatToAPInt();
3627 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3628 return convertDoubleAPFloatToAPInt();
3630 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3631 return convertQuadrupleAPFloatToAPInt();
3633 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3634 return convertPPCDoubleDoubleAPFloatToAPInt();
3636 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3637 return convertFloat8E5M2APFloatToAPInt();
3639 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3640 return convertFloat8E5M2FNUZAPFloatToAPInt();
3642 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3643 return convertFloat8E4M3FNAPFloatToAPInt();
3645 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3646 return convertFloat8E4M3FNUZAPFloatToAPInt();
3648 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3649 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3651 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3652 return convertFloatTF32APFloatToAPInt();
3654 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3655 "unknown format!");
3656 return convertF80LongDoubleAPFloatToAPInt();
3659 float IEEEFloat::convertToFloat() const {
3660 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3661 "Float semantics are not IEEEsingle");
3662 APInt api = bitcastToAPInt();
3663 return api.bitsToFloat();
3666 double IEEEFloat::convertToDouble() const {
3667 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3668 "Float semantics are not IEEEdouble");
3669 APInt api = bitcastToAPInt();
3670 return api.bitsToDouble();
3673 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3674 /// does not support these bit patterns:
3675 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678 /// exponent = 0, integer bit 1 ("pseudodenormal")
3679 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3680 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3681 uint64_t i1 = api.getRawData()[0];
3682 uint64_t i2 = api.getRawData()[1];
3683 uint64_t myexponent = (i2 & 0x7fff);
3684 uint64_t mysignificand = i1;
3685 uint8_t myintegerbit = mysignificand >> 63;
3687 initialize(&semX87DoubleExtended);
3688 assert(partCount()==2);
3690 sign = static_cast<unsigned int>(i2>>15);
3691 if (myexponent == 0 && mysignificand == 0) {
3692 makeZero(sign);
3693 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3694 makeInf(sign);
3695 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3696 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3697 category = fcNaN;
3698 exponent = exponentNaN();
3699 significandParts()[0] = mysignificand;
3700 significandParts()[1] = 0;
3701 } else {
3702 category = fcNormal;
3703 exponent = myexponent - 16383;
3704 significandParts()[0] = mysignificand;
3705 significandParts()[1] = 0;
3706 if (myexponent==0) // denormal
3707 exponent = -16382;
3711 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3712 uint64_t i1 = api.getRawData()[0];
3713 uint64_t i2 = api.getRawData()[1];
3714 opStatus fs;
3715 bool losesInfo;
3717 // Get the first double and convert to our format.
3718 initFromDoubleAPInt(APInt(64, i1));
3719 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3720 assert(fs == opOK && !losesInfo);
3721 (void)fs;
3723 // Unless we have a special case, add in second double.
3724 if (isFiniteNonZero()) {
3725 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3726 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3727 assert(fs == opOK && !losesInfo);
3728 (void)fs;
3730 add(v, rmNearestTiesToEven);
3734 template <const fltSemantics &S>
3735 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3736 assert(api.getBitWidth() == S.sizeInBits);
3737 constexpr integerPart integer_bit = integerPart{1}
3738 << ((S.precision - 1) % integerPartWidth);
3739 constexpr uint64_t significand_mask = integer_bit - 1;
3740 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3741 constexpr unsigned int stored_significand_parts =
3742 partCountForBits(trailing_significand_bits);
3743 constexpr unsigned int exponent_bits =
3744 S.sizeInBits - 1 - trailing_significand_bits;
3745 static_assert(exponent_bits < 64);
3746 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3747 constexpr int bias = -(S.minExponent - 1);
3749 // Copy the bits of the significand. We need to clear out the exponent and
3750 // sign bit in the last word.
3751 std::array<integerPart, stored_significand_parts> mysignificand;
3752 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3753 if constexpr (significand_mask != 0) {
3754 mysignificand[mysignificand.size() - 1] &= significand_mask;
3757 // We assume the last word holds the sign bit, the exponent, and potentially
3758 // some of the trailing significand field.
3759 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3760 uint64_t myexponent =
3761 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3763 initialize(&S);
3764 assert(partCount() == mysignificand.size());
3766 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3768 bool all_zero_significand =
3769 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3771 bool is_zero = myexponent == 0 && all_zero_significand;
3773 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3774 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3775 makeInf(sign);
3776 return;
3780 bool is_nan = false;
3782 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3783 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3784 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3785 bool all_ones_significand =
3786 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3787 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3788 (!significand_mask ||
3789 mysignificand[mysignificand.size() - 1] == significand_mask);
3790 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3791 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3792 is_nan = is_zero && sign;
3795 if (is_nan) {
3796 category = fcNaN;
3797 exponent = ::exponentNaN(S);
3798 std::copy_n(mysignificand.begin(), mysignificand.size(),
3799 significandParts());
3800 return;
3803 if (is_zero) {
3804 makeZero(sign);
3805 return;
3808 category = fcNormal;
3809 exponent = myexponent - bias;
3810 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3811 if (myexponent == 0) // denormal
3812 exponent = S.minExponent;
3813 else
3814 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3817 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3818 initFromIEEEAPInt<semIEEEquad>(api);
3821 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3822 initFromIEEEAPInt<semIEEEdouble>(api);
3825 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3826 initFromIEEEAPInt<semIEEEsingle>(api);
3829 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3830 initFromIEEEAPInt<semBFloat>(api);
3833 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3834 initFromIEEEAPInt<semIEEEhalf>(api);
3837 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3838 initFromIEEEAPInt<semFloat8E5M2>(api);
3841 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3842 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3845 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3846 initFromIEEEAPInt<semFloat8E4M3FN>(api);
3849 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3850 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3853 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3854 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3857 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3858 initFromIEEEAPInt<semFloatTF32>(api);
3861 /// Treat api as containing the bits of a floating point number.
3862 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3863 assert(api.getBitWidth() == Sem->sizeInBits);
3864 if (Sem == &semIEEEhalf)
3865 return initFromHalfAPInt(api);
3866 if (Sem == &semBFloat)
3867 return initFromBFloatAPInt(api);
3868 if (Sem == &semIEEEsingle)
3869 return initFromFloatAPInt(api);
3870 if (Sem == &semIEEEdouble)
3871 return initFromDoubleAPInt(api);
3872 if (Sem == &semX87DoubleExtended)
3873 return initFromF80LongDoubleAPInt(api);
3874 if (Sem == &semIEEEquad)
3875 return initFromQuadrupleAPInt(api);
3876 if (Sem == &semPPCDoubleDoubleLegacy)
3877 return initFromPPCDoubleDoubleAPInt(api);
3878 if (Sem == &semFloat8E5M2)
3879 return initFromFloat8E5M2APInt(api);
3880 if (Sem == &semFloat8E5M2FNUZ)
3881 return initFromFloat8E5M2FNUZAPInt(api);
3882 if (Sem == &semFloat8E4M3FN)
3883 return initFromFloat8E4M3FNAPInt(api);
3884 if (Sem == &semFloat8E4M3FNUZ)
3885 return initFromFloat8E4M3FNUZAPInt(api);
3886 if (Sem == &semFloat8E4M3B11FNUZ)
3887 return initFromFloat8E4M3B11FNUZAPInt(api);
3888 if (Sem == &semFloatTF32)
3889 return initFromFloatTF32APInt(api);
3891 llvm_unreachable(nullptr);
3894 /// Make this number the largest magnitude normal number in the given
3895 /// semantics.
3896 void IEEEFloat::makeLargest(bool Negative) {
3897 // We want (in interchange format):
3898 // sign = {Negative}
3899 // exponent = 1..10
3900 // significand = 1..1
3901 category = fcNormal;
3902 sign = Negative;
3903 exponent = semantics->maxExponent;
3905 // Use memset to set all but the highest integerPart to all ones.
3906 integerPart *significand = significandParts();
3907 unsigned PartCount = partCount();
3908 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3910 // Set the high integerPart especially setting all unused top bits for
3911 // internal consistency.
3912 const unsigned NumUnusedHighBits =
3913 PartCount*integerPartWidth - semantics->precision;
3914 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3915 ? (~integerPart(0) >> NumUnusedHighBits)
3916 : 0;
3918 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3919 semantics->nanEncoding == fltNanEncoding::AllOnes)
3920 significand[0] &= ~integerPart(1);
3923 /// Make this number the smallest magnitude denormal number in the given
3924 /// semantics.
3925 void IEEEFloat::makeSmallest(bool Negative) {
3926 // We want (in interchange format):
3927 // sign = {Negative}
3928 // exponent = 0..0
3929 // significand = 0..01
3930 category = fcNormal;
3931 sign = Negative;
3932 exponent = semantics->minExponent;
3933 APInt::tcSet(significandParts(), 1, partCount());
3936 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3937 // We want (in interchange format):
3938 // sign = {Negative}
3939 // exponent = 0..0
3940 // significand = 10..0
3942 category = fcNormal;
3943 zeroSignificand();
3944 sign = Negative;
3945 exponent = semantics->minExponent;
3946 APInt::tcSetBit(significandParts(), semantics->precision - 1);
3949 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3950 initFromAPInt(&Sem, API);
3953 IEEEFloat::IEEEFloat(float f) {
3954 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3957 IEEEFloat::IEEEFloat(double d) {
3958 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3961 namespace {
3962 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3963 Buffer.append(Str.begin(), Str.end());
3966 /// Removes data from the given significand until it is no more
3967 /// precise than is required for the desired precision.
3968 void AdjustToPrecision(APInt &significand,
3969 int &exp, unsigned FormatPrecision) {
3970 unsigned bits = significand.getActiveBits();
3972 // 196/59 is a very slight overestimate of lg_2(10).
3973 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3975 if (bits <= bitsRequired) return;
3977 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3978 if (!tensRemovable) return;
3980 exp += tensRemovable;
3982 APInt divisor(significand.getBitWidth(), 1);
3983 APInt powten(significand.getBitWidth(), 10);
3984 while (true) {
3985 if (tensRemovable & 1)
3986 divisor *= powten;
3987 tensRemovable >>= 1;
3988 if (!tensRemovable) break;
3989 powten *= powten;
3992 significand = significand.udiv(divisor);
3994 // Truncate the significand down to its active bit count.
3995 significand = significand.trunc(significand.getActiveBits());
3999 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4000 int &exp, unsigned FormatPrecision) {
4001 unsigned N = buffer.size();
4002 if (N <= FormatPrecision) return;
4004 // The most significant figures are the last ones in the buffer.
4005 unsigned FirstSignificant = N - FormatPrecision;
4007 // Round.
4008 // FIXME: this probably shouldn't use 'round half up'.
4010 // Rounding down is just a truncation, except we also want to drop
4011 // trailing zeros from the new result.
4012 if (buffer[FirstSignificant - 1] < '5') {
4013 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4014 FirstSignificant++;
4016 exp += FirstSignificant;
4017 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4018 return;
4021 // Rounding up requires a decimal add-with-carry. If we continue
4022 // the carry, the newly-introduced zeros will just be truncated.
4023 for (unsigned I = FirstSignificant; I != N; ++I) {
4024 if (buffer[I] == '9') {
4025 FirstSignificant++;
4026 } else {
4027 buffer[I]++;
4028 break;
4032 // If we carried through, we have exactly one digit of precision.
4033 if (FirstSignificant == N) {
4034 exp += FirstSignificant;
4035 buffer.clear();
4036 buffer.push_back('1');
4037 return;
4040 exp += FirstSignificant;
4041 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4043 } // namespace
4045 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4046 unsigned FormatMaxPadding, bool TruncateZero) const {
4047 switch (category) {
4048 case fcInfinity:
4049 if (isNegative())
4050 return append(Str, "-Inf");
4051 else
4052 return append(Str, "+Inf");
4054 case fcNaN: return append(Str, "NaN");
4056 case fcZero:
4057 if (isNegative())
4058 Str.push_back('-');
4060 if (!FormatMaxPadding) {
4061 if (TruncateZero)
4062 append(Str, "0.0E+0");
4063 else {
4064 append(Str, "0.0");
4065 if (FormatPrecision > 1)
4066 Str.append(FormatPrecision - 1, '0');
4067 append(Str, "e+00");
4069 } else
4070 Str.push_back('0');
4071 return;
4073 case fcNormal:
4074 break;
4077 if (isNegative())
4078 Str.push_back('-');
4080 // Decompose the number into an APInt and an exponent.
4081 int exp = exponent - ((int) semantics->precision - 1);
4082 APInt significand(
4083 semantics->precision,
4084 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4086 // Set FormatPrecision if zero. We want to do this before we
4087 // truncate trailing zeros, as those are part of the precision.
4088 if (!FormatPrecision) {
4089 // We use enough digits so the number can be round-tripped back to an
4090 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4091 // Accurately" by Steele and White.
4092 // FIXME: Using a formula based purely on the precision is conservative;
4093 // we can print fewer digits depending on the actual value being printed.
4095 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4096 FormatPrecision = 2 + semantics->precision * 59 / 196;
4099 // Ignore trailing binary zeros.
4100 int trailingZeros = significand.countr_zero();
4101 exp += trailingZeros;
4102 significand.lshrInPlace(trailingZeros);
4104 // Change the exponent from 2^e to 10^e.
4105 if (exp == 0) {
4106 // Nothing to do.
4107 } else if (exp > 0) {
4108 // Just shift left.
4109 significand = significand.zext(semantics->precision + exp);
4110 significand <<= exp;
4111 exp = 0;
4112 } else { /* exp < 0 */
4113 int texp = -exp;
4115 // We transform this using the identity:
4116 // (N)(2^-e) == (N)(5^e)(10^-e)
4117 // This means we have to multiply N (the significand) by 5^e.
4118 // To avoid overflow, we have to operate on numbers large
4119 // enough to store N * 5^e:
4120 // log2(N * 5^e) == log2(N) + e * log2(5)
4121 // <= semantics->precision + e * 137 / 59
4122 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4124 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4126 // Multiply significand by 5^e.
4127 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4128 significand = significand.zext(precision);
4129 APInt five_to_the_i(precision, 5);
4130 while (true) {
4131 if (texp & 1) significand *= five_to_the_i;
4133 texp >>= 1;
4134 if (!texp) break;
4135 five_to_the_i *= five_to_the_i;
4139 AdjustToPrecision(significand, exp, FormatPrecision);
4141 SmallVector<char, 256> buffer;
4143 // Fill the buffer.
4144 unsigned precision = significand.getBitWidth();
4145 if (precision < 4) {
4146 // We need enough precision to store the value 10.
4147 precision = 4;
4148 significand = significand.zext(precision);
4150 APInt ten(precision, 10);
4151 APInt digit(precision, 0);
4153 bool inTrail = true;
4154 while (significand != 0) {
4155 // digit <- significand % 10
4156 // significand <- significand / 10
4157 APInt::udivrem(significand, ten, significand, digit);
4159 unsigned d = digit.getZExtValue();
4161 // Drop trailing zeros.
4162 if (inTrail && !d) exp++;
4163 else {
4164 buffer.push_back((char) ('0' + d));
4165 inTrail = false;
4169 assert(!buffer.empty() && "no characters in buffer!");
4171 // Drop down to FormatPrecision.
4172 // TODO: don't do more precise calculations above than are required.
4173 AdjustToPrecision(buffer, exp, FormatPrecision);
4175 unsigned NDigits = buffer.size();
4177 // Check whether we should use scientific notation.
4178 bool FormatScientific;
4179 if (!FormatMaxPadding)
4180 FormatScientific = true;
4181 else {
4182 if (exp >= 0) {
4183 // 765e3 --> 765000
4184 // ^^^
4185 // But we shouldn't make the number look more precise than it is.
4186 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4187 NDigits + (unsigned) exp > FormatPrecision);
4188 } else {
4189 // Power of the most significant digit.
4190 int MSD = exp + (int) (NDigits - 1);
4191 if (MSD >= 0) {
4192 // 765e-2 == 7.65
4193 FormatScientific = false;
4194 } else {
4195 // 765e-5 == 0.00765
4196 // ^ ^^
4197 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4202 // Scientific formatting is pretty straightforward.
4203 if (FormatScientific) {
4204 exp += (NDigits - 1);
4206 Str.push_back(buffer[NDigits-1]);
4207 Str.push_back('.');
4208 if (NDigits == 1 && TruncateZero)
4209 Str.push_back('0');
4210 else
4211 for (unsigned I = 1; I != NDigits; ++I)
4212 Str.push_back(buffer[NDigits-1-I]);
4213 // Fill with zeros up to FormatPrecision.
4214 if (!TruncateZero && FormatPrecision > NDigits - 1)
4215 Str.append(FormatPrecision - NDigits + 1, '0');
4216 // For !TruncateZero we use lower 'e'.
4217 Str.push_back(TruncateZero ? 'E' : 'e');
4219 Str.push_back(exp >= 0 ? '+' : '-');
4220 if (exp < 0) exp = -exp;
4221 SmallVector<char, 6> expbuf;
4222 do {
4223 expbuf.push_back((char) ('0' + (exp % 10)));
4224 exp /= 10;
4225 } while (exp);
4226 // Exponent always at least two digits if we do not truncate zeros.
4227 if (!TruncateZero && expbuf.size() < 2)
4228 expbuf.push_back('0');
4229 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4230 Str.push_back(expbuf[E-1-I]);
4231 return;
4234 // Non-scientific, positive exponents.
4235 if (exp >= 0) {
4236 for (unsigned I = 0; I != NDigits; ++I)
4237 Str.push_back(buffer[NDigits-1-I]);
4238 for (unsigned I = 0; I != (unsigned) exp; ++I)
4239 Str.push_back('0');
4240 return;
4243 // Non-scientific, negative exponents.
4245 // The number of digits to the left of the decimal point.
4246 int NWholeDigits = exp + (int) NDigits;
4248 unsigned I = 0;
4249 if (NWholeDigits > 0) {
4250 for (; I != (unsigned) NWholeDigits; ++I)
4251 Str.push_back(buffer[NDigits-I-1]);
4252 Str.push_back('.');
4253 } else {
4254 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4256 Str.push_back('0');
4257 Str.push_back('.');
4258 for (unsigned Z = 1; Z != NZeros; ++Z)
4259 Str.push_back('0');
4262 for (; I != NDigits; ++I)
4263 Str.push_back(buffer[NDigits-I-1]);
4266 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4267 // Special floats and denormals have no exact inverse.
4268 if (!isFiniteNonZero())
4269 return false;
4271 // Check that the number is a power of two by making sure that only the
4272 // integer bit is set in the significand.
4273 if (significandLSB() != semantics->precision - 1)
4274 return false;
4276 // Get the inverse.
4277 IEEEFloat reciprocal(*semantics, 1ULL);
4278 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4279 return false;
4281 // Avoid multiplication with a denormal, it is not safe on all platforms and
4282 // may be slower than a normal division.
4283 if (reciprocal.isDenormal())
4284 return false;
4286 assert(reciprocal.isFiniteNonZero() &&
4287 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4289 if (inv)
4290 *inv = APFloat(reciprocal, *semantics);
4292 return true;
4295 int IEEEFloat::getExactLog2Abs() const {
4296 if (!isFinite() || isZero())
4297 return INT_MIN;
4299 const integerPart *Parts = significandParts();
4300 const int PartCount = partCountForBits(semantics->precision);
4302 int PopCount = 0;
4303 for (int i = 0; i < PartCount; ++i) {
4304 PopCount += llvm::popcount(Parts[i]);
4305 if (PopCount > 1)
4306 return INT_MIN;
4309 if (exponent != semantics->minExponent)
4310 return exponent;
4312 int CountrParts = 0;
4313 for (int i = 0; i < PartCount;
4314 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4315 if (Parts[i] != 0) {
4316 return exponent - semantics->precision + CountrParts +
4317 llvm::countr_zero(Parts[i]) + 1;
4321 llvm_unreachable("didn't find the set bit");
4324 bool IEEEFloat::isSignaling() const {
4325 if (!isNaN())
4326 return false;
4327 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4328 return false;
4330 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4331 // first bit of the trailing significand being 0.
4332 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4335 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4337 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4338 /// appropriate sign switching before/after the computation.
4339 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4340 // If we are performing nextDown, swap sign so we have -x.
4341 if (nextDown)
4342 changeSign();
4344 // Compute nextUp(x)
4345 opStatus result = opOK;
4347 // Handle each float category separately.
4348 switch (category) {
4349 case fcInfinity:
4350 // nextUp(+inf) = +inf
4351 if (!isNegative())
4352 break;
4353 // nextUp(-inf) = -getLargest()
4354 makeLargest(true);
4355 break;
4356 case fcNaN:
4357 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4358 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4359 // change the payload.
4360 if (isSignaling()) {
4361 result = opInvalidOp;
4362 // For consistency, propagate the sign of the sNaN to the qNaN.
4363 makeNaN(false, isNegative(), nullptr);
4365 break;
4366 case fcZero:
4367 // nextUp(pm 0) = +getSmallest()
4368 makeSmallest(false);
4369 break;
4370 case fcNormal:
4371 // nextUp(-getSmallest()) = -0
4372 if (isSmallest() && isNegative()) {
4373 APInt::tcSet(significandParts(), 0, partCount());
4374 category = fcZero;
4375 exponent = 0;
4376 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4377 sign = false;
4378 break;
4381 if (isLargest() && !isNegative()) {
4382 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4383 // nextUp(getLargest()) == NAN
4384 makeNaN();
4385 break;
4386 } else {
4387 // nextUp(getLargest()) == INFINITY
4388 APInt::tcSet(significandParts(), 0, partCount());
4389 category = fcInfinity;
4390 exponent = semantics->maxExponent + 1;
4391 break;
4395 // nextUp(normal) == normal + inc.
4396 if (isNegative()) {
4397 // If we are negative, we need to decrement the significand.
4399 // We only cross a binade boundary that requires adjusting the exponent
4400 // if:
4401 // 1. exponent != semantics->minExponent. This implies we are not in the
4402 // smallest binade or are dealing with denormals.
4403 // 2. Our significand excluding the integral bit is all zeros.
4404 bool WillCrossBinadeBoundary =
4405 exponent != semantics->minExponent && isSignificandAllZeros();
4407 // Decrement the significand.
4409 // We always do this since:
4410 // 1. If we are dealing with a non-binade decrement, by definition we
4411 // just decrement the significand.
4412 // 2. If we are dealing with a normal -> normal binade decrement, since
4413 // we have an explicit integral bit the fact that all bits but the
4414 // integral bit are zero implies that subtracting one will yield a
4415 // significand with 0 integral bit and 1 in all other spots. Thus we
4416 // must just adjust the exponent and set the integral bit to 1.
4417 // 3. If we are dealing with a normal -> denormal binade decrement,
4418 // since we set the integral bit to 0 when we represent denormals, we
4419 // just decrement the significand.
4420 integerPart *Parts = significandParts();
4421 APInt::tcDecrement(Parts, partCount());
4423 if (WillCrossBinadeBoundary) {
4424 // Our result is a normal number. Do the following:
4425 // 1. Set the integral bit to 1.
4426 // 2. Decrement the exponent.
4427 APInt::tcSetBit(Parts, semantics->precision - 1);
4428 exponent--;
4430 } else {
4431 // If we are positive, we need to increment the significand.
4433 // We only cross a binade boundary that requires adjusting the exponent if
4434 // the input is not a denormal and all of said input's significand bits
4435 // are set. If all of said conditions are true: clear the significand, set
4436 // the integral bit to 1, and increment the exponent. If we have a
4437 // denormal always increment since moving denormals and the numbers in the
4438 // smallest normal binade have the same exponent in our representation.
4439 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4441 if (WillCrossBinadeBoundary) {
4442 integerPart *Parts = significandParts();
4443 APInt::tcSet(Parts, 0, partCount());
4444 APInt::tcSetBit(Parts, semantics->precision - 1);
4445 assert(exponent != semantics->maxExponent &&
4446 "We can not increment an exponent beyond the maxExponent allowed"
4447 " by the given floating point semantics.");
4448 exponent++;
4449 } else {
4450 incrementSignificand();
4453 break;
4456 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4457 if (nextDown)
4458 changeSign();
4460 return result;
4463 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4464 return ::exponentNaN(*semantics);
4467 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4468 return ::exponentInf(*semantics);
4471 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4472 return ::exponentZero(*semantics);
4475 void IEEEFloat::makeInf(bool Negative) {
4476 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4477 // There is no Inf, so make NaN instead.
4478 makeNaN(false, Negative);
4479 return;
4481 category = fcInfinity;
4482 sign = Negative;
4483 exponent = exponentInf();
4484 APInt::tcSet(significandParts(), 0, partCount());
4487 void IEEEFloat::makeZero(bool Negative) {
4488 category = fcZero;
4489 sign = Negative;
4490 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4491 // Merge negative zero to positive because 0b10000...000 is used for NaN
4492 sign = false;
4494 exponent = exponentZero();
4495 APInt::tcSet(significandParts(), 0, partCount());
4498 void IEEEFloat::makeQuiet() {
4499 assert(isNaN());
4500 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4501 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4504 int ilogb(const IEEEFloat &Arg) {
4505 if (Arg.isNaN())
4506 return IEEEFloat::IEK_NaN;
4507 if (Arg.isZero())
4508 return IEEEFloat::IEK_Zero;
4509 if (Arg.isInfinity())
4510 return IEEEFloat::IEK_Inf;
4511 if (!Arg.isDenormal())
4512 return Arg.exponent;
4514 IEEEFloat Normalized(Arg);
4515 int SignificandBits = Arg.getSemantics().precision - 1;
4517 Normalized.exponent += SignificandBits;
4518 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4519 return Normalized.exponent - SignificandBits;
4522 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4523 auto MaxExp = X.getSemantics().maxExponent;
4524 auto MinExp = X.getSemantics().minExponent;
4526 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4527 // overflow; clamp it to a safe range before adding, but ensure that the range
4528 // is large enough that the clamp does not change the result. The range we
4529 // need to support is the difference between the largest possible exponent and
4530 // the normalized exponent of half the smallest denormal.
4532 int SignificandBits = X.getSemantics().precision - 1;
4533 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4535 // Clamp to one past the range ends to let normalize handle overlflow.
4536 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4537 X.normalize(RoundingMode, lfExactlyZero);
4538 if (X.isNaN())
4539 X.makeQuiet();
4540 return X;
4543 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4544 Exp = ilogb(Val);
4546 // Quiet signalling nans.
4547 if (Exp == IEEEFloat::IEK_NaN) {
4548 IEEEFloat Quiet(Val);
4549 Quiet.makeQuiet();
4550 return Quiet;
4553 if (Exp == IEEEFloat::IEK_Inf)
4554 return Val;
4556 // 1 is added because frexp is defined to return a normalized fraction in
4557 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4558 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4559 return scalbn(Val, -Exp, RM);
4562 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4563 : Semantics(&S),
4564 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4565 assert(Semantics == &semPPCDoubleDouble);
4568 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4569 : Semantics(&S),
4570 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4571 APFloat(semIEEEdouble, uninitialized)}) {
4572 assert(Semantics == &semPPCDoubleDouble);
4575 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4576 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4577 APFloat(semIEEEdouble)}) {
4578 assert(Semantics == &semPPCDoubleDouble);
4581 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4582 : Semantics(&S),
4583 Floats(new APFloat[2]{
4584 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4585 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4586 assert(Semantics == &semPPCDoubleDouble);
4589 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4590 APFloat &&Second)
4591 : Semantics(&S),
4592 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4593 assert(Semantics == &semPPCDoubleDouble);
4594 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4595 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4598 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4599 : Semantics(RHS.Semantics),
4600 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4601 APFloat(RHS.Floats[1])}
4602 : nullptr) {
4603 assert(Semantics == &semPPCDoubleDouble);
4606 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4607 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4608 RHS.Semantics = &semBogus;
4609 assert(Semantics == &semPPCDoubleDouble);
4612 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4613 if (Semantics == RHS.Semantics && RHS.Floats) {
4614 Floats[0] = RHS.Floats[0];
4615 Floats[1] = RHS.Floats[1];
4616 } else if (this != &RHS) {
4617 this->~DoubleAPFloat();
4618 new (this) DoubleAPFloat(RHS);
4620 return *this;
4623 // Implement addition, subtraction, multiplication and division based on:
4624 // "Software for Doubled-Precision Floating-Point Computations",
4625 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4626 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4627 const APFloat &c, const APFloat &cc,
4628 roundingMode RM) {
4629 int Status = opOK;
4630 APFloat z = a;
4631 Status |= z.add(c, RM);
4632 if (!z.isFinite()) {
4633 if (!z.isInfinity()) {
4634 Floats[0] = std::move(z);
4635 Floats[1].makeZero(/* Neg = */ false);
4636 return (opStatus)Status;
4638 Status = opOK;
4639 auto AComparedToC = a.compareAbsoluteValue(c);
4640 z = cc;
4641 Status |= z.add(aa, RM);
4642 if (AComparedToC == APFloat::cmpGreaterThan) {
4643 // z = cc + aa + c + a;
4644 Status |= z.add(c, RM);
4645 Status |= z.add(a, RM);
4646 } else {
4647 // z = cc + aa + a + c;
4648 Status |= z.add(a, RM);
4649 Status |= z.add(c, RM);
4651 if (!z.isFinite()) {
4652 Floats[0] = std::move(z);
4653 Floats[1].makeZero(/* Neg = */ false);
4654 return (opStatus)Status;
4656 Floats[0] = z;
4657 APFloat zz = aa;
4658 Status |= zz.add(cc, RM);
4659 if (AComparedToC == APFloat::cmpGreaterThan) {
4660 // Floats[1] = a - z + c + zz;
4661 Floats[1] = a;
4662 Status |= Floats[1].subtract(z, RM);
4663 Status |= Floats[1].add(c, RM);
4664 Status |= Floats[1].add(zz, RM);
4665 } else {
4666 // Floats[1] = c - z + a + zz;
4667 Floats[1] = c;
4668 Status |= Floats[1].subtract(z, RM);
4669 Status |= Floats[1].add(a, RM);
4670 Status |= Floats[1].add(zz, RM);
4672 } else {
4673 // q = a - z;
4674 APFloat q = a;
4675 Status |= q.subtract(z, RM);
4677 // zz = q + c + (a - (q + z)) + aa + cc;
4678 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4679 auto zz = q;
4680 Status |= zz.add(c, RM);
4681 Status |= q.add(z, RM);
4682 Status |= q.subtract(a, RM);
4683 q.changeSign();
4684 Status |= zz.add(q, RM);
4685 Status |= zz.add(aa, RM);
4686 Status |= zz.add(cc, RM);
4687 if (zz.isZero() && !zz.isNegative()) {
4688 Floats[0] = std::move(z);
4689 Floats[1].makeZero(/* Neg = */ false);
4690 return opOK;
4692 Floats[0] = z;
4693 Status |= Floats[0].add(zz, RM);
4694 if (!Floats[0].isFinite()) {
4695 Floats[1].makeZero(/* Neg = */ false);
4696 return (opStatus)Status;
4698 Floats[1] = std::move(z);
4699 Status |= Floats[1].subtract(Floats[0], RM);
4700 Status |= Floats[1].add(zz, RM);
4702 return (opStatus)Status;
4705 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4706 const DoubleAPFloat &RHS,
4707 DoubleAPFloat &Out,
4708 roundingMode RM) {
4709 if (LHS.getCategory() == fcNaN) {
4710 Out = LHS;
4711 return opOK;
4713 if (RHS.getCategory() == fcNaN) {
4714 Out = RHS;
4715 return opOK;
4717 if (LHS.getCategory() == fcZero) {
4718 Out = RHS;
4719 return opOK;
4721 if (RHS.getCategory() == fcZero) {
4722 Out = LHS;
4723 return opOK;
4725 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4726 LHS.isNegative() != RHS.isNegative()) {
4727 Out.makeNaN(false, Out.isNegative(), nullptr);
4728 return opInvalidOp;
4730 if (LHS.getCategory() == fcInfinity) {
4731 Out = LHS;
4732 return opOK;
4734 if (RHS.getCategory() == fcInfinity) {
4735 Out = RHS;
4736 return opOK;
4738 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4740 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4741 CC(RHS.Floats[1]);
4742 assert(&A.getSemantics() == &semIEEEdouble);
4743 assert(&AA.getSemantics() == &semIEEEdouble);
4744 assert(&C.getSemantics() == &semIEEEdouble);
4745 assert(&CC.getSemantics() == &semIEEEdouble);
4746 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4747 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4748 return Out.addImpl(A, AA, C, CC, RM);
4751 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4752 roundingMode RM) {
4753 return addWithSpecial(*this, RHS, *this, RM);
4756 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4757 roundingMode RM) {
4758 changeSign();
4759 auto Ret = add(RHS, RM);
4760 changeSign();
4761 return Ret;
4764 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4765 APFloat::roundingMode RM) {
4766 const auto &LHS = *this;
4767 auto &Out = *this;
4768 /* Interesting observation: For special categories, finding the lowest
4769 common ancestor of the following layered graph gives the correct
4770 return category:
4774 Zero Inf
4776 Normal
4778 e.g. NaN * NaN = NaN
4779 Zero * Inf = NaN
4780 Normal * Zero = Zero
4781 Normal * Inf = Inf
4783 if (LHS.getCategory() == fcNaN) {
4784 Out = LHS;
4785 return opOK;
4787 if (RHS.getCategory() == fcNaN) {
4788 Out = RHS;
4789 return opOK;
4791 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4792 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4793 Out.makeNaN(false, false, nullptr);
4794 return opOK;
4796 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4797 Out = LHS;
4798 return opOK;
4800 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4801 Out = RHS;
4802 return opOK;
4804 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4805 "Special cases not handled exhaustively");
4807 int Status = opOK;
4808 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4809 // t = a * c
4810 APFloat T = A;
4811 Status |= T.multiply(C, RM);
4812 if (!T.isFiniteNonZero()) {
4813 Floats[0] = T;
4814 Floats[1].makeZero(/* Neg = */ false);
4815 return (opStatus)Status;
4818 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4819 APFloat Tau = A;
4820 T.changeSign();
4821 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4822 T.changeSign();
4824 // v = a * d
4825 APFloat V = A;
4826 Status |= V.multiply(D, RM);
4827 // w = b * c
4828 APFloat W = B;
4829 Status |= W.multiply(C, RM);
4830 Status |= V.add(W, RM);
4831 // tau += v + w
4832 Status |= Tau.add(V, RM);
4834 // u = t + tau
4835 APFloat U = T;
4836 Status |= U.add(Tau, RM);
4838 Floats[0] = U;
4839 if (!U.isFinite()) {
4840 Floats[1].makeZero(/* Neg = */ false);
4841 } else {
4842 // Floats[1] = (t - u) + tau
4843 Status |= T.subtract(U, RM);
4844 Status |= T.add(Tau, RM);
4845 Floats[1] = T;
4847 return (opStatus)Status;
4850 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4851 APFloat::roundingMode RM) {
4852 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4853 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4854 auto Ret =
4855 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4856 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4857 return Ret;
4860 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4861 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4862 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4863 auto Ret =
4864 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4865 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4866 return Ret;
4869 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4870 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4871 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4872 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4873 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4874 return Ret;
4877 APFloat::opStatus
4878 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4879 const DoubleAPFloat &Addend,
4880 APFloat::roundingMode RM) {
4881 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4882 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4883 auto Ret = Tmp.fusedMultiplyAdd(
4884 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4885 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4886 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4887 return Ret;
4890 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4891 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4892 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4893 auto Ret = Tmp.roundToIntegral(RM);
4894 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4895 return Ret;
4898 void DoubleAPFloat::changeSign() {
4899 Floats[0].changeSign();
4900 Floats[1].changeSign();
4903 APFloat::cmpResult
4904 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4905 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4906 if (Result != cmpEqual)
4907 return Result;
4908 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4909 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4910 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4911 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4912 if (Against && !RHSAgainst)
4913 return cmpLessThan;
4914 if (!Against && RHSAgainst)
4915 return cmpGreaterThan;
4916 if (!Against && !RHSAgainst)
4917 return Result;
4918 if (Against && RHSAgainst)
4919 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4921 return Result;
4924 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4925 return Floats[0].getCategory();
4928 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4930 void DoubleAPFloat::makeInf(bool Neg) {
4931 Floats[0].makeInf(Neg);
4932 Floats[1].makeZero(/* Neg = */ false);
4935 void DoubleAPFloat::makeZero(bool Neg) {
4936 Floats[0].makeZero(Neg);
4937 Floats[1].makeZero(/* Neg = */ false);
4940 void DoubleAPFloat::makeLargest(bool Neg) {
4941 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4942 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4943 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4944 if (Neg)
4945 changeSign();
4948 void DoubleAPFloat::makeSmallest(bool Neg) {
4949 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4950 Floats[0].makeSmallest(Neg);
4951 Floats[1].makeZero(/* Neg = */ false);
4954 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4955 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4956 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4957 if (Neg)
4958 Floats[0].changeSign();
4959 Floats[1].makeZero(/* Neg = */ false);
4962 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4963 Floats[0].makeNaN(SNaN, Neg, fill);
4964 Floats[1].makeZero(/* Neg = */ false);
4967 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4968 auto Result = Floats[0].compare(RHS.Floats[0]);
4969 // |Float[0]| > |Float[1]|
4970 if (Result == APFloat::cmpEqual)
4971 return Floats[1].compare(RHS.Floats[1]);
4972 return Result;
4975 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4976 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4977 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4980 hash_code hash_value(const DoubleAPFloat &Arg) {
4981 if (Arg.Floats)
4982 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4983 return hash_combine(Arg.Semantics);
4986 APInt DoubleAPFloat::bitcastToAPInt() const {
4987 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4988 uint64_t Data[] = {
4989 Floats[0].bitcastToAPInt().getRawData()[0],
4990 Floats[1].bitcastToAPInt().getRawData()[0],
4992 return APInt(128, 2, Data);
4995 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4996 roundingMode RM) {
4997 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4998 APFloat Tmp(semPPCDoubleDoubleLegacy);
4999 auto Ret = Tmp.convertFromString(S, RM);
5000 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5001 return Ret;
5004 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5005 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5006 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5007 auto Ret = Tmp.next(nextDown);
5008 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5009 return Ret;
5012 APFloat::opStatus
5013 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5014 unsigned int Width, bool IsSigned,
5015 roundingMode RM, bool *IsExact) const {
5016 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5017 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5018 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5021 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5022 bool IsSigned,
5023 roundingMode RM) {
5024 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5025 APFloat Tmp(semPPCDoubleDoubleLegacy);
5026 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5027 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5028 return Ret;
5031 APFloat::opStatus
5032 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5033 unsigned int InputSize,
5034 bool IsSigned, roundingMode RM) {
5035 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5036 APFloat Tmp(semPPCDoubleDoubleLegacy);
5037 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5038 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5039 return Ret;
5042 APFloat::opStatus
5043 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5044 unsigned int InputSize,
5045 bool IsSigned, roundingMode RM) {
5046 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5047 APFloat Tmp(semPPCDoubleDoubleLegacy);
5048 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5049 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5050 return Ret;
5053 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5054 unsigned int HexDigits,
5055 bool UpperCase,
5056 roundingMode RM) const {
5057 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5058 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5059 .convertToHexString(DST, HexDigits, UpperCase, RM);
5062 bool DoubleAPFloat::isDenormal() const {
5063 return getCategory() == fcNormal &&
5064 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5065 // (double)(Hi + Lo) == Hi defines a normal number.
5066 Floats[0] != Floats[0] + Floats[1]);
5069 bool DoubleAPFloat::isSmallest() const {
5070 if (getCategory() != fcNormal)
5071 return false;
5072 DoubleAPFloat Tmp(*this);
5073 Tmp.makeSmallest(this->isNegative());
5074 return Tmp.compare(*this) == cmpEqual;
5077 bool DoubleAPFloat::isSmallestNormalized() const {
5078 if (getCategory() != fcNormal)
5079 return false;
5081 DoubleAPFloat Tmp(*this);
5082 Tmp.makeSmallestNormalized(this->isNegative());
5083 return Tmp.compare(*this) == cmpEqual;
5086 bool DoubleAPFloat::isLargest() const {
5087 if (getCategory() != fcNormal)
5088 return false;
5089 DoubleAPFloat Tmp(*this);
5090 Tmp.makeLargest(this->isNegative());
5091 return Tmp.compare(*this) == cmpEqual;
5094 bool DoubleAPFloat::isInteger() const {
5095 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5096 return Floats[0].isInteger() && Floats[1].isInteger();
5099 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5100 unsigned FormatPrecision,
5101 unsigned FormatMaxPadding,
5102 bool TruncateZero) const {
5103 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5104 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5105 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5108 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5109 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5110 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5111 if (!inv)
5112 return Tmp.getExactInverse(nullptr);
5113 APFloat Inv(semPPCDoubleDoubleLegacy);
5114 auto Ret = Tmp.getExactInverse(&Inv);
5115 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5116 return Ret;
5119 int DoubleAPFloat::getExactLog2() const {
5120 // TODO: Implement me
5121 return INT_MIN;
5124 int DoubleAPFloat::getExactLog2Abs() const {
5125 // TODO: Implement me
5126 return INT_MIN;
5129 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5130 APFloat::roundingMode RM) {
5131 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5132 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5133 scalbn(Arg.Floats[1], Exp, RM));
5136 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5137 APFloat::roundingMode RM) {
5138 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5139 APFloat First = frexp(Arg.Floats[0], Exp, RM);
5140 APFloat Second = Arg.Floats[1];
5141 if (Arg.getCategory() == APFloat::fcNormal)
5142 Second = scalbn(Second, -Exp, RM);
5143 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5146 } // namespace detail
5148 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5149 if (usesLayout<IEEEFloat>(Semantics)) {
5150 new (&IEEE) IEEEFloat(std::move(F));
5151 return;
5153 if (usesLayout<DoubleAPFloat>(Semantics)) {
5154 const fltSemantics& S = F.getSemantics();
5155 new (&Double)
5156 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5157 APFloat(semIEEEdouble));
5158 return;
5160 llvm_unreachable("Unexpected semantics");
5163 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5164 roundingMode RM) {
5165 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5168 hash_code hash_value(const APFloat &Arg) {
5169 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5170 return hash_value(Arg.U.IEEE);
5171 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5172 return hash_value(Arg.U.Double);
5173 llvm_unreachable("Unexpected semantics");
5176 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5177 : APFloat(Semantics) {
5178 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5179 assert(StatusOrErr && "Invalid floating point representation");
5180 consumeError(StatusOrErr.takeError());
5183 FPClassTest APFloat::classify() const {
5184 if (isZero())
5185 return isNegative() ? fcNegZero : fcPosZero;
5186 if (isNormal())
5187 return isNegative() ? fcNegNormal : fcPosNormal;
5188 if (isDenormal())
5189 return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5190 if (isInfinity())
5191 return isNegative() ? fcNegInf : fcPosInf;
5192 assert(isNaN() && "Other class of FP constant");
5193 return isSignaling() ? fcSNan : fcQNan;
5196 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5197 roundingMode RM, bool *losesInfo) {
5198 if (&getSemantics() == &ToSemantics) {
5199 *losesInfo = false;
5200 return opOK;
5202 if (usesLayout<IEEEFloat>(getSemantics()) &&
5203 usesLayout<IEEEFloat>(ToSemantics))
5204 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5205 if (usesLayout<IEEEFloat>(getSemantics()) &&
5206 usesLayout<DoubleAPFloat>(ToSemantics)) {
5207 assert(&ToSemantics == &semPPCDoubleDouble);
5208 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5209 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5210 return Ret;
5212 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5213 usesLayout<IEEEFloat>(ToSemantics)) {
5214 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5215 *this = APFloat(std::move(getIEEE()), ToSemantics);
5216 return Ret;
5218 llvm_unreachable("Unexpected semantics");
5221 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5222 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5225 void APFloat::print(raw_ostream &OS) const {
5226 SmallVector<char, 16> Buffer;
5227 toString(Buffer);
5228 OS << Buffer << "\n";
5231 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5232 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5233 #endif
5235 void APFloat::Profile(FoldingSetNodeID &NID) const {
5236 NID.Add(bitcastToAPInt());
5239 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5240 an APSInt, whose initial bit-width and signed-ness are used to determine the
5241 precision of the conversion.
5243 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5244 roundingMode rounding_mode,
5245 bool *isExact) const {
5246 unsigned bitWidth = result.getBitWidth();
5247 SmallVector<uint64_t, 4> parts(result.getNumWords());
5248 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5249 rounding_mode, isExact);
5250 // Keeps the original signed-ness.
5251 result = APInt(bitWidth, parts);
5252 return status;
5255 double APFloat::convertToDouble() const {
5256 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5257 return getIEEE().convertToDouble();
5258 assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5259 "Float semantics is not representable by IEEEdouble");
5260 APFloat Temp = *this;
5261 bool LosesInfo;
5262 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5263 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5264 (void)St;
5265 return Temp.getIEEE().convertToDouble();
5268 float APFloat::convertToFloat() const {
5269 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5270 return getIEEE().convertToFloat();
5271 assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5272 "Float semantics is not representable by IEEEsingle");
5273 APFloat Temp = *this;
5274 bool LosesInfo;
5275 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5276 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5277 (void)St;
5278 return Temp.getIEEE().convertToFloat();
5281 } // namespace llvm
5283 #undef APFLOAT_DISPATCH_ON_SEMANTICS