[clang] Add test for CWG190 "Layout-compatible POD-struct types" (#121668)
[llvm-project.git] / llvm / lib / Support / APFloat.cpp
blobc9adfca8b3b7685cda60a1123573cce485bb67dd
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
40 using namespace llvm;
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
50 /* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
54 namespace llvm {
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
74 FiniteOnly,
77 // How NaN values are represented. This is curently only used in combination
78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79 // while having IEEE non-finite behavior is liable to lead to unexpected
80 // results.
81 enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
84 IEEE,
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
92 AllOnes,
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
99 NegativeZero,
102 /* Represents floating point arithmetic semantics. */
103 struct fltSemantics {
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
106 APFloatBase::ExponentType maxExponent;
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
110 APFloatBase::ExponentType minExponent;
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
123 /* Whether this semantics has an encoding for Zero */
124 bool hasZero = true;
126 /* Whether this semantics can represent signed values */
127 bool hasSignedRepr = true;
129 // Returns true if any number described by this semantics can be precisely
130 // represented by the specified semantics. Does not take into account
131 // the value of fltNonfiniteBehavior.
132 bool isRepresentableBy(const fltSemantics &S) const {
133 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
134 precision <= S.precision;
138 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
139 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
140 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
141 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
142 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
143 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
144 static constexpr fltSemantics semFloat8E5M2FNUZ = {
145 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
147 static constexpr fltSemantics semFloat8E4M3FN = {
148 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
149 static constexpr fltSemantics semFloat8E4M3FNUZ = {
150 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
151 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
152 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
153 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
154 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
155 static constexpr fltSemantics semFloat8E8M0FNU = {
156 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes,
157 false, false};
159 static constexpr fltSemantics semFloat6E3M2FN = {
160 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
161 static constexpr fltSemantics semFloat6E2M3FN = {
162 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
163 static constexpr fltSemantics semFloat4E2M1FN = {
164 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
165 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
166 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
167 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
168 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
169 53 + 53, 128};
171 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
172 switch (S) {
173 case S_IEEEhalf:
174 return IEEEhalf();
175 case S_BFloat:
176 return BFloat();
177 case S_IEEEsingle:
178 return IEEEsingle();
179 case S_IEEEdouble:
180 return IEEEdouble();
181 case S_IEEEquad:
182 return IEEEquad();
183 case S_PPCDoubleDouble:
184 return PPCDoubleDouble();
185 case S_PPCDoubleDoubleLegacy:
186 return PPCDoubleDoubleLegacy();
187 case S_Float8E5M2:
188 return Float8E5M2();
189 case S_Float8E5M2FNUZ:
190 return Float8E5M2FNUZ();
191 case S_Float8E4M3:
192 return Float8E4M3();
193 case S_Float8E4M3FN:
194 return Float8E4M3FN();
195 case S_Float8E4M3FNUZ:
196 return Float8E4M3FNUZ();
197 case S_Float8E4M3B11FNUZ:
198 return Float8E4M3B11FNUZ();
199 case S_Float8E3M4:
200 return Float8E3M4();
201 case S_FloatTF32:
202 return FloatTF32();
203 case S_Float8E8M0FNU:
204 return Float8E8M0FNU();
205 case S_Float6E3M2FN:
206 return Float6E3M2FN();
207 case S_Float6E2M3FN:
208 return Float6E2M3FN();
209 case S_Float4E2M1FN:
210 return Float4E2M1FN();
211 case S_x87DoubleExtended:
212 return x87DoubleExtended();
214 llvm_unreachable("Unrecognised floating semantics");
217 APFloatBase::Semantics
218 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
219 if (&Sem == &llvm::APFloat::IEEEhalf())
220 return S_IEEEhalf;
221 else if (&Sem == &llvm::APFloat::BFloat())
222 return S_BFloat;
223 else if (&Sem == &llvm::APFloat::IEEEsingle())
224 return S_IEEEsingle;
225 else if (&Sem == &llvm::APFloat::IEEEdouble())
226 return S_IEEEdouble;
227 else if (&Sem == &llvm::APFloat::IEEEquad())
228 return S_IEEEquad;
229 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
230 return S_PPCDoubleDouble;
231 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
232 return S_PPCDoubleDoubleLegacy;
233 else if (&Sem == &llvm::APFloat::Float8E5M2())
234 return S_Float8E5M2;
235 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
236 return S_Float8E5M2FNUZ;
237 else if (&Sem == &llvm::APFloat::Float8E4M3())
238 return S_Float8E4M3;
239 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
240 return S_Float8E4M3FN;
241 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
242 return S_Float8E4M3FNUZ;
243 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
244 return S_Float8E4M3B11FNUZ;
245 else if (&Sem == &llvm::APFloat::Float8E3M4())
246 return S_Float8E3M4;
247 else if (&Sem == &llvm::APFloat::FloatTF32())
248 return S_FloatTF32;
249 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
250 return S_Float8E8M0FNU;
251 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
252 return S_Float6E3M2FN;
253 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
254 return S_Float6E2M3FN;
255 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
256 return S_Float4E2M1FN;
257 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
258 return S_x87DoubleExtended;
259 else
260 llvm_unreachable("Unknown floating semantics");
263 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
264 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
265 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
266 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
267 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
268 const fltSemantics &APFloatBase::PPCDoubleDouble() {
269 return semPPCDoubleDouble;
271 const fltSemantics &APFloatBase::PPCDoubleDoubleLegacy() {
272 return semPPCDoubleDoubleLegacy;
274 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
275 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
276 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
277 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
278 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
279 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
280 return semFloat8E4M3B11FNUZ;
282 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
283 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
284 const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; }
285 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
286 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
287 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
288 const fltSemantics &APFloatBase::x87DoubleExtended() {
289 return semX87DoubleExtended;
291 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
293 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
294 constexpr RoundingMode APFloatBase::rmTowardPositive;
295 constexpr RoundingMode APFloatBase::rmTowardNegative;
296 constexpr RoundingMode APFloatBase::rmTowardZero;
297 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
299 /* A tight upper bound on number of parts required to hold the value
300 pow(5, power) is
302 power * 815 / (351 * integerPartWidth) + 1
304 However, whilst the result may require only this many parts,
305 because we are multiplying two values to get it, the
306 multiplication may require an extra part with the excess part
307 being zero (consider the trivial case of 1 * 1, tcFullMultiply
308 requires two parts to hold the single-part result). So we add an
309 extra one to guarantee enough space whilst multiplying. */
310 const unsigned int maxExponent = 16383;
311 const unsigned int maxPrecision = 113;
312 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
313 const unsigned int maxPowerOfFiveParts =
315 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
317 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
318 return semantics.precision;
320 APFloatBase::ExponentType
321 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
322 return semantics.maxExponent;
324 APFloatBase::ExponentType
325 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
326 return semantics.minExponent;
328 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
329 return semantics.sizeInBits;
331 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
332 bool isSigned) {
333 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
334 // at least one more bit than the MaxExponent to hold the max FP value.
335 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
336 // Extra sign bit needed.
337 if (isSigned)
338 ++MinBitWidth;
339 return MinBitWidth;
342 bool APFloatBase::semanticsHasZero(const fltSemantics &semantics) {
343 return semantics.hasZero;
346 bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) {
347 return semantics.hasSignedRepr;
350 bool APFloatBase::semanticsHasInf(const fltSemantics &semantics) {
351 return semantics.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754;
354 bool APFloatBase::semanticsHasNaN(const fltSemantics &semantics) {
355 return semantics.nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly;
358 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
359 const fltSemantics &Dst) {
360 // Exponent range must be larger.
361 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
362 return false;
364 // If the mantissa is long enough, the result value could still be denormal
365 // with a larger exponent range.
367 // FIXME: This condition is probably not accurate but also shouldn't be a
368 // practical concern with existing types.
369 return Dst.precision >= Src.precision;
372 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
373 return Sem.sizeInBits;
376 static constexpr APFloatBase::ExponentType
377 exponentZero(const fltSemantics &semantics) {
378 return semantics.minExponent - 1;
381 static constexpr APFloatBase::ExponentType
382 exponentInf(const fltSemantics &semantics) {
383 return semantics.maxExponent + 1;
386 static constexpr APFloatBase::ExponentType
387 exponentNaN(const fltSemantics &semantics) {
388 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
389 if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
390 return exponentZero(semantics);
391 if (semantics.hasSignedRepr)
392 return semantics.maxExponent;
394 return semantics.maxExponent + 1;
397 /* A bunch of private, handy routines. */
399 static inline Error createError(const Twine &Err) {
400 return make_error<StringError>(Err, inconvertibleErrorCode());
403 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
404 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
405 APFloatBase::integerPartWidth);
408 /* Returns 0U-9U. Return values >= 10U are not digits. */
409 static inline unsigned int
410 decDigitValue(unsigned int c)
412 return c - '0';
415 /* Return the value of a decimal exponent of the form
416 [+-]ddddddd.
418 If the exponent overflows, returns a large exponent with the
419 appropriate sign. */
420 static Expected<int> readExponent(StringRef::iterator begin,
421 StringRef::iterator end) {
422 bool isNegative;
423 unsigned int absExponent;
424 const unsigned int overlargeExponent = 24000; /* FIXME. */
425 StringRef::iterator p = begin;
427 // Treat no exponent as 0 to match binutils
428 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
429 return 0;
432 isNegative = (*p == '-');
433 if (*p == '-' || *p == '+') {
434 p++;
435 if (p == end)
436 return createError("Exponent has no digits");
439 absExponent = decDigitValue(*p++);
440 if (absExponent >= 10U)
441 return createError("Invalid character in exponent");
443 for (; p != end; ++p) {
444 unsigned int value;
446 value = decDigitValue(*p);
447 if (value >= 10U)
448 return createError("Invalid character in exponent");
450 absExponent = absExponent * 10U + value;
451 if (absExponent >= overlargeExponent) {
452 absExponent = overlargeExponent;
453 break;
457 if (isNegative)
458 return -(int) absExponent;
459 else
460 return (int) absExponent;
463 /* This is ugly and needs cleaning up, but I don't immediately see
464 how whilst remaining safe. */
465 static Expected<int> totalExponent(StringRef::iterator p,
466 StringRef::iterator end,
467 int exponentAdjustment) {
468 int unsignedExponent;
469 bool negative, overflow;
470 int exponent = 0;
472 if (p == end)
473 return createError("Exponent has no digits");
475 negative = *p == '-';
476 if (*p == '-' || *p == '+') {
477 p++;
478 if (p == end)
479 return createError("Exponent has no digits");
482 unsignedExponent = 0;
483 overflow = false;
484 for (; p != end; ++p) {
485 unsigned int value;
487 value = decDigitValue(*p);
488 if (value >= 10U)
489 return createError("Invalid character in exponent");
491 unsignedExponent = unsignedExponent * 10 + value;
492 if (unsignedExponent > 32767) {
493 overflow = true;
494 break;
498 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
499 overflow = true;
501 if (!overflow) {
502 exponent = unsignedExponent;
503 if (negative)
504 exponent = -exponent;
505 exponent += exponentAdjustment;
506 if (exponent > 32767 || exponent < -32768)
507 overflow = true;
510 if (overflow)
511 exponent = negative ? -32768: 32767;
513 return exponent;
516 static Expected<StringRef::iterator>
517 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
518 StringRef::iterator *dot) {
519 StringRef::iterator p = begin;
520 *dot = end;
521 while (p != end && *p == '0')
522 p++;
524 if (p != end && *p == '.') {
525 *dot = p++;
527 if (end - begin == 1)
528 return createError("Significand has no digits");
530 while (p != end && *p == '0')
531 p++;
534 return p;
537 /* Given a normal decimal floating point number of the form
539 dddd.dddd[eE][+-]ddd
541 where the decimal point and exponent are optional, fill out the
542 structure D. Exponent is appropriate if the significand is
543 treated as an integer, and normalizedExponent if the significand
544 is taken to have the decimal point after a single leading
545 non-zero digit.
547 If the value is zero, V->firstSigDigit points to a non-digit, and
548 the return exponent is zero.
550 struct decimalInfo {
551 const char *firstSigDigit;
552 const char *lastSigDigit;
553 int exponent;
554 int normalizedExponent;
557 static Error interpretDecimal(StringRef::iterator begin,
558 StringRef::iterator end, decimalInfo *D) {
559 StringRef::iterator dot = end;
561 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
562 if (!PtrOrErr)
563 return PtrOrErr.takeError();
564 StringRef::iterator p = *PtrOrErr;
566 D->firstSigDigit = p;
567 D->exponent = 0;
568 D->normalizedExponent = 0;
570 for (; p != end; ++p) {
571 if (*p == '.') {
572 if (dot != end)
573 return createError("String contains multiple dots");
574 dot = p++;
575 if (p == end)
576 break;
578 if (decDigitValue(*p) >= 10U)
579 break;
582 if (p != end) {
583 if (*p != 'e' && *p != 'E')
584 return createError("Invalid character in significand");
585 if (p == begin)
586 return createError("Significand has no digits");
587 if (dot != end && p - begin == 1)
588 return createError("Significand has no digits");
590 /* p points to the first non-digit in the string */
591 auto ExpOrErr = readExponent(p + 1, end);
592 if (!ExpOrErr)
593 return ExpOrErr.takeError();
594 D->exponent = *ExpOrErr;
596 /* Implied decimal point? */
597 if (dot == end)
598 dot = p;
601 /* If number is all zeroes accept any exponent. */
602 if (p != D->firstSigDigit) {
603 /* Drop insignificant trailing zeroes. */
604 if (p != begin) {
607 p--;
608 while (p != begin && *p == '0');
609 while (p != begin && *p == '.');
612 /* Adjust the exponents for any decimal point. */
613 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
614 D->normalizedExponent = (D->exponent +
615 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
616 - (dot > D->firstSigDigit && dot < p)));
619 D->lastSigDigit = p;
620 return Error::success();
623 /* Return the trailing fraction of a hexadecimal number.
624 DIGITVALUE is the first hex digit of the fraction, P points to
625 the next digit. */
626 static Expected<lostFraction>
627 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
628 unsigned int digitValue) {
629 unsigned int hexDigit;
631 /* If the first trailing digit isn't 0 or 8 we can work out the
632 fraction immediately. */
633 if (digitValue > 8)
634 return lfMoreThanHalf;
635 else if (digitValue < 8 && digitValue > 0)
636 return lfLessThanHalf;
638 // Otherwise we need to find the first non-zero digit.
639 while (p != end && (*p == '0' || *p == '.'))
640 p++;
642 if (p == end)
643 return createError("Invalid trailing hexadecimal fraction!");
645 hexDigit = hexDigitValue(*p);
647 /* If we ran off the end it is exactly zero or one-half, otherwise
648 a little more. */
649 if (hexDigit == UINT_MAX)
650 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
651 else
652 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
655 /* Return the fraction lost were a bignum truncated losing the least
656 significant BITS bits. */
657 static lostFraction
658 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
659 unsigned int partCount,
660 unsigned int bits)
662 unsigned int lsb;
664 lsb = APInt::tcLSB(parts, partCount);
666 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
667 if (bits <= lsb)
668 return lfExactlyZero;
669 if (bits == lsb + 1)
670 return lfExactlyHalf;
671 if (bits <= partCount * APFloatBase::integerPartWidth &&
672 APInt::tcExtractBit(parts, bits - 1))
673 return lfMoreThanHalf;
675 return lfLessThanHalf;
678 /* Shift DST right BITS bits noting lost fraction. */
679 static lostFraction
680 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
682 lostFraction lost_fraction;
684 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
686 APInt::tcShiftRight(dst, parts, bits);
688 return lost_fraction;
691 /* Combine the effect of two lost fractions. */
692 static lostFraction
693 combineLostFractions(lostFraction moreSignificant,
694 lostFraction lessSignificant)
696 if (lessSignificant != lfExactlyZero) {
697 if (moreSignificant == lfExactlyZero)
698 moreSignificant = lfLessThanHalf;
699 else if (moreSignificant == lfExactlyHalf)
700 moreSignificant = lfMoreThanHalf;
703 return moreSignificant;
706 /* The error from the true value, in half-ulps, on multiplying two
707 floating point numbers, which differ from the value they
708 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
709 than the returned value.
711 See "How to Read Floating Point Numbers Accurately" by William D
712 Clinger. */
713 static unsigned int
714 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
716 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
718 if (HUerr1 + HUerr2 == 0)
719 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
720 else
721 return inexactMultiply + 2 * (HUerr1 + HUerr2);
724 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
725 when the least significant BITS are truncated. BITS cannot be
726 zero. */
727 static APFloatBase::integerPart
728 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
729 bool isNearest) {
730 unsigned int count, partBits;
731 APFloatBase::integerPart part, boundary;
733 assert(bits != 0);
735 bits--;
736 count = bits / APFloatBase::integerPartWidth;
737 partBits = bits % APFloatBase::integerPartWidth + 1;
739 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
741 if (isNearest)
742 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
743 else
744 boundary = 0;
746 if (count == 0) {
747 if (part - boundary <= boundary - part)
748 return part - boundary;
749 else
750 return boundary - part;
753 if (part == boundary) {
754 while (--count)
755 if (parts[count])
756 return ~(APFloatBase::integerPart) 0; /* A lot. */
758 return parts[0];
759 } else if (part == boundary - 1) {
760 while (--count)
761 if (~parts[count])
762 return ~(APFloatBase::integerPart) 0; /* A lot. */
764 return -parts[0];
767 return ~(APFloatBase::integerPart) 0; /* A lot. */
770 /* Place pow(5, power) in DST, and return the number of parts used.
771 DST must be at least one part larger than size of the answer. */
772 static unsigned int
773 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
774 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
775 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
776 pow5s[0] = 78125 * 5;
778 unsigned int partsCount = 1;
779 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
780 unsigned int result;
781 assert(power <= maxExponent);
783 p1 = dst;
784 p2 = scratch;
786 *p1 = firstEightPowers[power & 7];
787 power >>= 3;
789 result = 1;
790 pow5 = pow5s;
792 for (unsigned int n = 0; power; power >>= 1, n++) {
793 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
794 if (n != 0) {
795 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
796 partsCount, partsCount);
797 partsCount *= 2;
798 if (pow5[partsCount - 1] == 0)
799 partsCount--;
802 if (power & 1) {
803 APFloatBase::integerPart *tmp;
805 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
806 result += partsCount;
807 if (p2[result - 1] == 0)
808 result--;
810 /* Now result is in p1 with partsCount parts and p2 is scratch
811 space. */
812 tmp = p1;
813 p1 = p2;
814 p2 = tmp;
817 pow5 += partsCount;
820 if (p1 != dst)
821 APInt::tcAssign(dst, p1, result);
823 return result;
826 /* Zero at the end to avoid modular arithmetic when adding one; used
827 when rounding up during hexadecimal output. */
828 static const char hexDigitsLower[] = "0123456789abcdef0";
829 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
830 static const char infinityL[] = "infinity";
831 static const char infinityU[] = "INFINITY";
832 static const char NaNL[] = "nan";
833 static const char NaNU[] = "NAN";
835 /* Write out an integerPart in hexadecimal, starting with the most
836 significant nibble. Write out exactly COUNT hexdigits, return
837 COUNT. */
838 static unsigned int
839 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
840 const char *hexDigitChars)
842 unsigned int result = count;
844 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
846 part >>= (APFloatBase::integerPartWidth - 4 * count);
847 while (count--) {
848 dst[count] = hexDigitChars[part & 0xf];
849 part >>= 4;
852 return result;
855 /* Write out an unsigned decimal integer. */
856 static char *
857 writeUnsignedDecimal (char *dst, unsigned int n)
859 char buff[40], *p;
861 p = buff;
863 *p++ = '0' + n % 10;
864 while (n /= 10);
867 *dst++ = *--p;
868 while (p != buff);
870 return dst;
873 /* Write out a signed decimal integer. */
874 static char *
875 writeSignedDecimal (char *dst, int value)
877 if (value < 0) {
878 *dst++ = '-';
879 dst = writeUnsignedDecimal(dst, -(unsigned) value);
880 } else
881 dst = writeUnsignedDecimal(dst, value);
883 return dst;
886 namespace detail {
887 /* Constructors. */
888 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
889 unsigned int count;
891 semantics = ourSemantics;
892 count = partCount();
893 if (count > 1)
894 significand.parts = new integerPart[count];
897 void IEEEFloat::freeSignificand() {
898 if (needsCleanup())
899 delete [] significand.parts;
902 void IEEEFloat::assign(const IEEEFloat &rhs) {
903 assert(semantics == rhs.semantics);
905 sign = rhs.sign;
906 category = rhs.category;
907 exponent = rhs.exponent;
908 if (isFiniteNonZero() || category == fcNaN)
909 copySignificand(rhs);
912 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
913 assert(isFiniteNonZero() || category == fcNaN);
914 assert(rhs.partCount() >= partCount());
916 APInt::tcAssign(significandParts(), rhs.significandParts(),
917 partCount());
920 /* Make this number a NaN, with an arbitrary but deterministic value
921 for the significand. If double or longer, this is a signalling NaN,
922 which may not be ideal. If float, this is QNaN(0). */
923 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
924 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
925 llvm_unreachable("This floating point format does not support NaN");
927 if (Negative && !semantics->hasSignedRepr)
928 llvm_unreachable(
929 "This floating point format does not support signed values");
931 category = fcNaN;
932 sign = Negative;
933 exponent = exponentNaN();
935 integerPart *significand = significandParts();
936 unsigned numParts = partCount();
938 APInt fill_storage;
939 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
940 // Finite-only types do not distinguish signalling and quiet NaN, so
941 // make them all signalling.
942 SNaN = false;
943 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
944 sign = true;
945 fill_storage = APInt::getZero(semantics->precision - 1);
946 } else {
947 fill_storage = APInt::getAllOnes(semantics->precision - 1);
949 fill = &fill_storage;
952 // Set the significand bits to the fill.
953 if (!fill || fill->getNumWords() < numParts)
954 APInt::tcSet(significand, 0, numParts);
955 if (fill) {
956 APInt::tcAssign(significand, fill->getRawData(),
957 std::min(fill->getNumWords(), numParts));
959 // Zero out the excess bits of the significand.
960 unsigned bitsToPreserve = semantics->precision - 1;
961 unsigned part = bitsToPreserve / 64;
962 bitsToPreserve %= 64;
963 significand[part] &= ((1ULL << bitsToPreserve) - 1);
964 for (part++; part != numParts; ++part)
965 significand[part] = 0;
968 unsigned QNaNBit =
969 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
971 if (SNaN) {
972 // We always have to clear the QNaN bit to make it an SNaN.
973 APInt::tcClearBit(significand, QNaNBit);
975 // If there are no bits set in the payload, we have to set
976 // *something* to make it a NaN instead of an infinity;
977 // conventionally, this is the next bit down from the QNaN bit.
978 if (APInt::tcIsZero(significand, numParts))
979 APInt::tcSetBit(significand, QNaNBit - 1);
980 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
981 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
982 // Do nothing.
983 } else {
984 // We always have to set the QNaN bit to make it a QNaN.
985 APInt::tcSetBit(significand, QNaNBit);
988 // For x87 extended precision, we want to make a NaN, not a
989 // pseudo-NaN. Maybe we should expose the ability to make
990 // pseudo-NaNs?
991 if (semantics == &semX87DoubleExtended)
992 APInt::tcSetBit(significand, QNaNBit + 1);
995 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
996 if (this != &rhs) {
997 if (semantics != rhs.semantics) {
998 freeSignificand();
999 initialize(rhs.semantics);
1001 assign(rhs);
1004 return *this;
1007 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
1008 freeSignificand();
1010 semantics = rhs.semantics;
1011 significand = rhs.significand;
1012 exponent = rhs.exponent;
1013 category = rhs.category;
1014 sign = rhs.sign;
1016 rhs.semantics = &semBogus;
1017 return *this;
1020 bool IEEEFloat::isDenormal() const {
1021 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1022 (APInt::tcExtractBit(significandParts(),
1023 semantics->precision - 1) == 0);
1026 bool IEEEFloat::isSmallest() const {
1027 // The smallest number by magnitude in our format will be the smallest
1028 // denormal, i.e. the floating point number with exponent being minimum
1029 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1030 return isFiniteNonZero() && exponent == semantics->minExponent &&
1031 significandMSB() == 0;
1034 bool IEEEFloat::isSmallestNormalized() const {
1035 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1036 isSignificandAllZerosExceptMSB();
1039 unsigned int IEEEFloat::getNumHighBits() const {
1040 const unsigned int PartCount = partCountForBits(semantics->precision);
1041 const unsigned int Bits = PartCount * integerPartWidth;
1043 // Compute how many bits are used in the final word.
1044 // When precision is just 1, it represents the 'Pth'
1045 // Precision bit and not the actual significand bit.
1046 const unsigned int NumHighBits = (semantics->precision > 1)
1047 ? (Bits - semantics->precision + 1)
1048 : (Bits - semantics->precision);
1049 return NumHighBits;
1052 bool IEEEFloat::isSignificandAllOnes() const {
1053 // Test if the significand excluding the integral bit is all ones. This allows
1054 // us to test for binade boundaries.
1055 const integerPart *Parts = significandParts();
1056 const unsigned PartCount = partCountForBits(semantics->precision);
1057 for (unsigned i = 0; i < PartCount - 1; i++)
1058 if (~Parts[i])
1059 return false;
1061 // Set the unused high bits to all ones when we compare.
1062 const unsigned NumHighBits = getNumHighBits();
1063 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1064 "Can not have more high bits to fill than integerPartWidth");
1065 const integerPart HighBitFill =
1066 ~integerPart(0) << (integerPartWidth - NumHighBits);
1067 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1068 return false;
1070 return true;
1073 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1074 // Test if the significand excluding the integral bit is all ones except for
1075 // the least significant bit.
1076 const integerPart *Parts = significandParts();
1078 if (Parts[0] & 1)
1079 return false;
1081 const unsigned PartCount = partCountForBits(semantics->precision);
1082 for (unsigned i = 0; i < PartCount - 1; i++) {
1083 if (~Parts[i] & ~unsigned{!i})
1084 return false;
1087 // Set the unused high bits to all ones when we compare.
1088 const unsigned NumHighBits = getNumHighBits();
1089 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1090 "Can not have more high bits to fill than integerPartWidth");
1091 const integerPart HighBitFill = ~integerPart(0)
1092 << (integerPartWidth - NumHighBits);
1093 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1094 return false;
1096 return true;
1099 bool IEEEFloat::isSignificandAllZeros() const {
1100 // Test if the significand excluding the integral bit is all zeros. This
1101 // allows us to test for binade boundaries.
1102 const integerPart *Parts = significandParts();
1103 const unsigned PartCount = partCountForBits(semantics->precision);
1105 for (unsigned i = 0; i < PartCount - 1; i++)
1106 if (Parts[i])
1107 return false;
1109 // Compute how many bits are used in the final word.
1110 const unsigned NumHighBits = getNumHighBits();
1111 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1112 "clear than integerPartWidth");
1113 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1115 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1116 return false;
1118 return true;
1121 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1122 const integerPart *Parts = significandParts();
1123 const unsigned PartCount = partCountForBits(semantics->precision);
1125 for (unsigned i = 0; i < PartCount - 1; i++) {
1126 if (Parts[i])
1127 return false;
1130 const unsigned NumHighBits = getNumHighBits();
1131 const integerPart MSBMask = integerPart(1)
1132 << (integerPartWidth - NumHighBits);
1133 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1136 bool IEEEFloat::isLargest() const {
1137 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1138 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1139 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1140 // The largest number by magnitude in our format will be the floating point
1141 // number with maximum exponent and with significand that is all ones except
1142 // the LSB.
1143 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1144 ? isSignificandAllOnesExceptLSB()
1145 : IsMaxExp;
1146 } else {
1147 // The largest number by magnitude in our format will be the floating point
1148 // number with maximum exponent and with significand that is all ones.
1149 return IsMaxExp && isSignificandAllOnes();
1153 bool IEEEFloat::isInteger() const {
1154 // This could be made more efficient; I'm going for obviously correct.
1155 if (!isFinite()) return false;
1156 IEEEFloat truncated = *this;
1157 truncated.roundToIntegral(rmTowardZero);
1158 return compare(truncated) == cmpEqual;
1161 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1162 if (this == &rhs)
1163 return true;
1164 if (semantics != rhs.semantics ||
1165 category != rhs.category ||
1166 sign != rhs.sign)
1167 return false;
1168 if (category==fcZero || category==fcInfinity)
1169 return true;
1171 if (isFiniteNonZero() && exponent != rhs.exponent)
1172 return false;
1174 return std::equal(significandParts(), significandParts() + partCount(),
1175 rhs.significandParts());
1178 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1179 initialize(&ourSemantics);
1180 sign = 0;
1181 category = fcNormal;
1182 zeroSignificand();
1183 exponent = ourSemantics.precision - 1;
1184 significandParts()[0] = value;
1185 normalize(rmNearestTiesToEven, lfExactlyZero);
1188 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1189 initialize(&ourSemantics);
1190 // The Float8E8MOFNU format does not have a representation
1191 // for zero. So, use the closest representation instead.
1192 // Moreover, the all-zero encoding represents a valid
1193 // normal value (which is the smallestNormalized here).
1194 // Hence, we call makeSmallestNormalized (where category is
1195 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1196 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1199 // Delegate to the previous constructor, because later copy constructor may
1200 // actually inspects category, which can't be garbage.
1201 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1202 : IEEEFloat(ourSemantics) {}
1204 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1205 initialize(rhs.semantics);
1206 assign(rhs);
1209 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1210 *this = std::move(rhs);
1213 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1215 unsigned int IEEEFloat::partCount() const {
1216 return partCountForBits(semantics->precision + 1);
1219 const APFloat::integerPart *IEEEFloat::significandParts() const {
1220 return const_cast<IEEEFloat *>(this)->significandParts();
1223 APFloat::integerPart *IEEEFloat::significandParts() {
1224 if (partCount() > 1)
1225 return significand.parts;
1226 else
1227 return &significand.part;
1230 void IEEEFloat::zeroSignificand() {
1231 APInt::tcSet(significandParts(), 0, partCount());
1234 /* Increment an fcNormal floating point number's significand. */
1235 void IEEEFloat::incrementSignificand() {
1236 integerPart carry;
1238 carry = APInt::tcIncrement(significandParts(), partCount());
1240 /* Our callers should never cause us to overflow. */
1241 assert(carry == 0);
1242 (void)carry;
1245 /* Add the significand of the RHS. Returns the carry flag. */
1246 APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1247 integerPart *parts;
1249 parts = significandParts();
1251 assert(semantics == rhs.semantics);
1252 assert(exponent == rhs.exponent);
1254 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1257 /* Subtract the significand of the RHS with a borrow flag. Returns
1258 the borrow flag. */
1259 APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1260 integerPart borrow) {
1261 integerPart *parts;
1263 parts = significandParts();
1265 assert(semantics == rhs.semantics);
1266 assert(exponent == rhs.exponent);
1268 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1269 partCount());
1272 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1273 on to the full-precision result of the multiplication. Returns the
1274 lost fraction. */
1275 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1276 IEEEFloat addend,
1277 bool ignoreAddend) {
1278 unsigned int omsb; // One, not zero, based MSB.
1279 unsigned int partsCount, newPartsCount, precision;
1280 integerPart *lhsSignificand;
1281 integerPart scratch[4];
1282 integerPart *fullSignificand;
1283 lostFraction lost_fraction;
1284 bool ignored;
1286 assert(semantics == rhs.semantics);
1288 precision = semantics->precision;
1290 // Allocate space for twice as many bits as the original significand, plus one
1291 // extra bit for the addition to overflow into.
1292 newPartsCount = partCountForBits(precision * 2 + 1);
1294 if (newPartsCount > 4)
1295 fullSignificand = new integerPart[newPartsCount];
1296 else
1297 fullSignificand = scratch;
1299 lhsSignificand = significandParts();
1300 partsCount = partCount();
1302 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1303 rhs.significandParts(), partsCount, partsCount);
1305 lost_fraction = lfExactlyZero;
1306 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1307 exponent += rhs.exponent;
1309 // Assume the operands involved in the multiplication are single-precision
1310 // FP, and the two multiplicants are:
1311 // *this = a23 . a22 ... a0 * 2^e1
1312 // rhs = b23 . b22 ... b0 * 2^e2
1313 // the result of multiplication is:
1314 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1315 // Note that there are three significant bits at the left-hand side of the
1316 // radix point: two for the multiplication, and an overflow bit for the
1317 // addition (that will always be zero at this point). Move the radix point
1318 // toward left by two bits, and adjust exponent accordingly.
1319 exponent += 2;
1321 if (!ignoreAddend && addend.isNonZero()) {
1322 // The intermediate result of the multiplication has "2 * precision"
1323 // signicant bit; adjust the addend to be consistent with mul result.
1325 Significand savedSignificand = significand;
1326 const fltSemantics *savedSemantics = semantics;
1327 fltSemantics extendedSemantics;
1328 opStatus status;
1329 unsigned int extendedPrecision;
1331 // Normalize our MSB to one below the top bit to allow for overflow.
1332 extendedPrecision = 2 * precision + 1;
1333 if (omsb != extendedPrecision - 1) {
1334 assert(extendedPrecision > omsb);
1335 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1336 (extendedPrecision - 1) - omsb);
1337 exponent -= (extendedPrecision - 1) - omsb;
1340 /* Create new semantics. */
1341 extendedSemantics = *semantics;
1342 extendedSemantics.precision = extendedPrecision;
1344 if (newPartsCount == 1)
1345 significand.part = fullSignificand[0];
1346 else
1347 significand.parts = fullSignificand;
1348 semantics = &extendedSemantics;
1350 // Make a copy so we can convert it to the extended semantics.
1351 // Note that we cannot convert the addend directly, as the extendedSemantics
1352 // is a local variable (which we take a reference to).
1353 IEEEFloat extendedAddend(addend);
1354 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1355 &ignored);
1356 assert(status == APFloat::opOK);
1357 (void)status;
1359 // Shift the significand of the addend right by one bit. This guarantees
1360 // that the high bit of the significand is zero (same as fullSignificand),
1361 // so the addition will overflow (if it does overflow at all) into the top bit.
1362 lost_fraction = extendedAddend.shiftSignificandRight(1);
1363 assert(lost_fraction == lfExactlyZero &&
1364 "Lost precision while shifting addend for fused-multiply-add.");
1366 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1368 /* Restore our state. */
1369 if (newPartsCount == 1)
1370 fullSignificand[0] = significand.part;
1371 significand = savedSignificand;
1372 semantics = savedSemantics;
1374 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1377 // Convert the result having "2 * precision" significant-bits back to the one
1378 // having "precision" significant-bits. First, move the radix point from
1379 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1380 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1381 exponent -= precision + 1;
1383 // In case MSB resides at the left-hand side of radix point, shift the
1384 // mantissa right by some amount to make sure the MSB reside right before
1385 // the radix point (i.e. "MSB . rest-significant-bits").
1387 // Note that the result is not normalized when "omsb < precision". So, the
1388 // caller needs to call IEEEFloat::normalize() if normalized value is
1389 // expected.
1390 if (omsb > precision) {
1391 unsigned int bits, significantParts;
1392 lostFraction lf;
1394 bits = omsb - precision;
1395 significantParts = partCountForBits(omsb);
1396 lf = shiftRight(fullSignificand, significantParts, bits);
1397 lost_fraction = combineLostFractions(lf, lost_fraction);
1398 exponent += bits;
1401 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1403 if (newPartsCount > 4)
1404 delete [] fullSignificand;
1406 return lost_fraction;
1409 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1410 // When the given semantics has zero, the addend here is a zero.
1411 // i.e . it belongs to the 'fcZero' category.
1412 // But when the semantics does not support zero, we need to
1413 // explicitly convey that this addend should be ignored
1414 // for multiplication.
1415 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1418 /* Multiply the significands of LHS and RHS to DST. */
1419 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1420 unsigned int bit, i, partsCount;
1421 const integerPart *rhsSignificand;
1422 integerPart *lhsSignificand, *dividend, *divisor;
1423 integerPart scratch[4];
1424 lostFraction lost_fraction;
1426 assert(semantics == rhs.semantics);
1428 lhsSignificand = significandParts();
1429 rhsSignificand = rhs.significandParts();
1430 partsCount = partCount();
1432 if (partsCount > 2)
1433 dividend = new integerPart[partsCount * 2];
1434 else
1435 dividend = scratch;
1437 divisor = dividend + partsCount;
1439 /* Copy the dividend and divisor as they will be modified in-place. */
1440 for (i = 0; i < partsCount; i++) {
1441 dividend[i] = lhsSignificand[i];
1442 divisor[i] = rhsSignificand[i];
1443 lhsSignificand[i] = 0;
1446 exponent -= rhs.exponent;
1448 unsigned int precision = semantics->precision;
1450 /* Normalize the divisor. */
1451 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1452 if (bit) {
1453 exponent += bit;
1454 APInt::tcShiftLeft(divisor, partsCount, bit);
1457 /* Normalize the dividend. */
1458 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1459 if (bit) {
1460 exponent -= bit;
1461 APInt::tcShiftLeft(dividend, partsCount, bit);
1464 /* Ensure the dividend >= divisor initially for the loop below.
1465 Incidentally, this means that the division loop below is
1466 guaranteed to set the integer bit to one. */
1467 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1468 exponent--;
1469 APInt::tcShiftLeft(dividend, partsCount, 1);
1470 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1473 /* Long division. */
1474 for (bit = precision; bit; bit -= 1) {
1475 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1476 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1477 APInt::tcSetBit(lhsSignificand, bit - 1);
1480 APInt::tcShiftLeft(dividend, partsCount, 1);
1483 /* Figure out the lost fraction. */
1484 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1486 if (cmp > 0)
1487 lost_fraction = lfMoreThanHalf;
1488 else if (cmp == 0)
1489 lost_fraction = lfExactlyHalf;
1490 else if (APInt::tcIsZero(dividend, partsCount))
1491 lost_fraction = lfExactlyZero;
1492 else
1493 lost_fraction = lfLessThanHalf;
1495 if (partsCount > 2)
1496 delete [] dividend;
1498 return lost_fraction;
1501 unsigned int IEEEFloat::significandMSB() const {
1502 return APInt::tcMSB(significandParts(), partCount());
1505 unsigned int IEEEFloat::significandLSB() const {
1506 return APInt::tcLSB(significandParts(), partCount());
1509 /* Note that a zero result is NOT normalized to fcZero. */
1510 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1511 /* Our exponent should not overflow. */
1512 assert((ExponentType) (exponent + bits) >= exponent);
1514 exponent += bits;
1516 return shiftRight(significandParts(), partCount(), bits);
1519 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1520 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1521 assert(bits < semantics->precision ||
1522 (semantics->precision == 1 && bits <= 1));
1524 if (bits) {
1525 unsigned int partsCount = partCount();
1527 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1528 exponent -= bits;
1530 assert(!APInt::tcIsZero(significandParts(), partsCount));
1534 APFloat::cmpResult IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1535 int compare;
1537 assert(semantics == rhs.semantics);
1538 assert(isFiniteNonZero());
1539 assert(rhs.isFiniteNonZero());
1541 compare = exponent - rhs.exponent;
1543 /* If exponents are equal, do an unsigned bignum comparison of the
1544 significands. */
1545 if (compare == 0)
1546 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1547 partCount());
1549 if (compare > 0)
1550 return cmpGreaterThan;
1551 else if (compare < 0)
1552 return cmpLessThan;
1553 else
1554 return cmpEqual;
1557 /* Set the least significant BITS bits of a bignum, clear the
1558 rest. */
1559 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1560 unsigned bits) {
1561 unsigned i = 0;
1562 while (bits > APInt::APINT_BITS_PER_WORD) {
1563 dst[i++] = ~(APInt::WordType)0;
1564 bits -= APInt::APINT_BITS_PER_WORD;
1567 if (bits)
1568 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1570 while (i < parts)
1571 dst[i++] = 0;
1574 /* Handle overflow. Sign is preserved. We either become infinity or
1575 the largest finite number. */
1576 APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1577 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1578 /* Infinity? */
1579 if (rounding_mode == rmNearestTiesToEven ||
1580 rounding_mode == rmNearestTiesToAway ||
1581 (rounding_mode == rmTowardPositive && !sign) ||
1582 (rounding_mode == rmTowardNegative && sign)) {
1583 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1584 makeNaN(false, sign);
1585 else
1586 category = fcInfinity;
1587 return static_cast<opStatus>(opOverflow | opInexact);
1591 /* Otherwise we become the largest finite number. */
1592 category = fcNormal;
1593 exponent = semantics->maxExponent;
1594 tcSetLeastSignificantBits(significandParts(), partCount(),
1595 semantics->precision);
1596 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1597 semantics->nanEncoding == fltNanEncoding::AllOnes)
1598 APInt::tcClearBit(significandParts(), 0);
1600 return opInexact;
1603 /* Returns TRUE if, when truncating the current number, with BIT the
1604 new LSB, with the given lost fraction and rounding mode, the result
1605 would need to be rounded away from zero (i.e., by increasing the
1606 signficand). This routine must work for fcZero of both signs, and
1607 fcNormal numbers. */
1608 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1609 lostFraction lost_fraction,
1610 unsigned int bit) const {
1611 /* NaNs and infinities should not have lost fractions. */
1612 assert(isFiniteNonZero() || category == fcZero);
1614 /* Current callers never pass this so we don't handle it. */
1615 assert(lost_fraction != lfExactlyZero);
1617 switch (rounding_mode) {
1618 case rmNearestTiesToAway:
1619 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1621 case rmNearestTiesToEven:
1622 if (lost_fraction == lfMoreThanHalf)
1623 return true;
1625 /* Our zeroes don't have a significand to test. */
1626 if (lost_fraction == lfExactlyHalf && category != fcZero)
1627 return APInt::tcExtractBit(significandParts(), bit);
1629 return false;
1631 case rmTowardZero:
1632 return false;
1634 case rmTowardPositive:
1635 return !sign;
1637 case rmTowardNegative:
1638 return sign;
1640 default:
1641 break;
1643 llvm_unreachable("Invalid rounding mode found");
1646 APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1647 lostFraction lost_fraction) {
1648 unsigned int omsb; /* One, not zero, based MSB. */
1649 int exponentChange;
1651 if (!isFiniteNonZero())
1652 return opOK;
1654 /* Before rounding normalize the exponent of fcNormal numbers. */
1655 omsb = significandMSB() + 1;
1657 if (omsb) {
1658 /* OMSB is numbered from 1. We want to place it in the integer
1659 bit numbered PRECISION if possible, with a compensating change in
1660 the exponent. */
1661 exponentChange = omsb - semantics->precision;
1663 /* If the resulting exponent is too high, overflow according to
1664 the rounding mode. */
1665 if (exponent + exponentChange > semantics->maxExponent)
1666 return handleOverflow(rounding_mode);
1668 /* Subnormal numbers have exponent minExponent, and their MSB
1669 is forced based on that. */
1670 if (exponent + exponentChange < semantics->minExponent)
1671 exponentChange = semantics->minExponent - exponent;
1673 /* Shifting left is easy as we don't lose precision. */
1674 if (exponentChange < 0) {
1675 assert(lost_fraction == lfExactlyZero);
1677 shiftSignificandLeft(-exponentChange);
1679 return opOK;
1682 if (exponentChange > 0) {
1683 lostFraction lf;
1685 /* Shift right and capture any new lost fraction. */
1686 lf = shiftSignificandRight(exponentChange);
1688 lost_fraction = combineLostFractions(lf, lost_fraction);
1690 /* Keep OMSB up-to-date. */
1691 if (omsb > (unsigned) exponentChange)
1692 omsb -= exponentChange;
1693 else
1694 omsb = 0;
1698 // The all-ones values is an overflow if NaN is all ones. If NaN is
1699 // represented by negative zero, then it is a valid finite value.
1700 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1701 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1702 exponent == semantics->maxExponent && isSignificandAllOnes())
1703 return handleOverflow(rounding_mode);
1705 /* Now round the number according to rounding_mode given the lost
1706 fraction. */
1708 /* As specified in IEEE 754, since we do not trap we do not report
1709 underflow for exact results. */
1710 if (lost_fraction == lfExactlyZero) {
1711 /* Canonicalize zeroes. */
1712 if (omsb == 0) {
1713 category = fcZero;
1714 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1715 sign = false;
1716 if (!semantics->hasZero)
1717 makeSmallestNormalized(false);
1720 return opOK;
1723 /* Increment the significand if we're rounding away from zero. */
1724 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1725 if (omsb == 0)
1726 exponent = semantics->minExponent;
1728 incrementSignificand();
1729 omsb = significandMSB() + 1;
1731 /* Did the significand increment overflow? */
1732 if (omsb == (unsigned) semantics->precision + 1) {
1733 /* Renormalize by incrementing the exponent and shifting our
1734 significand right one. However if we already have the
1735 maximum exponent we overflow to infinity. */
1736 if (exponent == semantics->maxExponent)
1737 // Invoke overflow handling with a rounding mode that will guarantee
1738 // that the result gets turned into the correct infinity representation.
1739 // This is needed instead of just setting the category to infinity to
1740 // account for 8-bit floating point types that have no inf, only NaN.
1741 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1743 shiftSignificandRight(1);
1745 return opInexact;
1748 // The all-ones values is an overflow if NaN is all ones. If NaN is
1749 // represented by negative zero, then it is a valid finite value.
1750 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1751 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1752 exponent == semantics->maxExponent && isSignificandAllOnes())
1753 return handleOverflow(rounding_mode);
1756 /* The normal case - we were and are not denormal, and any
1757 significand increment above didn't overflow. */
1758 if (omsb == semantics->precision)
1759 return opInexact;
1761 /* We have a non-zero denormal. */
1762 assert(omsb < semantics->precision);
1764 /* Canonicalize zeroes. */
1765 if (omsb == 0) {
1766 category = fcZero;
1767 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1768 sign = false;
1769 // This condition handles the case where the semantics
1770 // does not have zero but uses the all-zero encoding
1771 // to represent the smallest normal value.
1772 if (!semantics->hasZero)
1773 makeSmallestNormalized(false);
1776 /* The fcZero case is a denormal that underflowed to zero. */
1777 return (opStatus) (opUnderflow | opInexact);
1780 APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1781 bool subtract) {
1782 switch (PackCategoriesIntoKey(category, rhs.category)) {
1783 default:
1784 llvm_unreachable(nullptr);
1786 case PackCategoriesIntoKey(fcZero, fcNaN):
1787 case PackCategoriesIntoKey(fcNormal, fcNaN):
1788 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1789 assign(rhs);
1790 [[fallthrough]];
1791 case PackCategoriesIntoKey(fcNaN, fcZero):
1792 case PackCategoriesIntoKey(fcNaN, fcNormal):
1793 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1794 case PackCategoriesIntoKey(fcNaN, fcNaN):
1795 if (isSignaling()) {
1796 makeQuiet();
1797 return opInvalidOp;
1799 return rhs.isSignaling() ? opInvalidOp : opOK;
1801 case PackCategoriesIntoKey(fcNormal, fcZero):
1802 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1803 case PackCategoriesIntoKey(fcInfinity, fcZero):
1804 return opOK;
1806 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1807 case PackCategoriesIntoKey(fcZero, fcInfinity):
1808 category = fcInfinity;
1809 sign = rhs.sign ^ subtract;
1810 return opOK;
1812 case PackCategoriesIntoKey(fcZero, fcNormal):
1813 assign(rhs);
1814 sign = rhs.sign ^ subtract;
1815 return opOK;
1817 case PackCategoriesIntoKey(fcZero, fcZero):
1818 /* Sign depends on rounding mode; handled by caller. */
1819 return opOK;
1821 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1822 /* Differently signed infinities can only be validly
1823 subtracted. */
1824 if (((sign ^ rhs.sign)!=0) != subtract) {
1825 makeNaN();
1826 return opInvalidOp;
1829 return opOK;
1831 case PackCategoriesIntoKey(fcNormal, fcNormal):
1832 return opDivByZero;
1836 /* Add or subtract two normal numbers. */
1837 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1838 bool subtract) {
1839 integerPart carry;
1840 lostFraction lost_fraction;
1841 int bits;
1843 /* Determine if the operation on the absolute values is effectively
1844 an addition or subtraction. */
1845 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1847 /* Are we bigger exponent-wise than the RHS? */
1848 bits = exponent - rhs.exponent;
1850 /* Subtraction is more subtle than one might naively expect. */
1851 if (subtract) {
1852 if ((bits < 0) && !semantics->hasSignedRepr)
1853 llvm_unreachable(
1854 "This floating point format does not support signed values");
1856 IEEEFloat temp_rhs(rhs);
1858 if (bits == 0)
1859 lost_fraction = lfExactlyZero;
1860 else if (bits > 0) {
1861 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1862 shiftSignificandLeft(1);
1863 } else {
1864 lost_fraction = shiftSignificandRight(-bits - 1);
1865 temp_rhs.shiftSignificandLeft(1);
1868 // Should we reverse the subtraction.
1869 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1870 carry = temp_rhs.subtractSignificand
1871 (*this, lost_fraction != lfExactlyZero);
1872 copySignificand(temp_rhs);
1873 sign = !sign;
1874 } else {
1875 carry = subtractSignificand
1876 (temp_rhs, lost_fraction != lfExactlyZero);
1879 /* Invert the lost fraction - it was on the RHS and
1880 subtracted. */
1881 if (lost_fraction == lfLessThanHalf)
1882 lost_fraction = lfMoreThanHalf;
1883 else if (lost_fraction == lfMoreThanHalf)
1884 lost_fraction = lfLessThanHalf;
1886 /* The code above is intended to ensure that no borrow is
1887 necessary. */
1888 assert(!carry);
1889 (void)carry;
1890 } else {
1891 if (bits > 0) {
1892 IEEEFloat temp_rhs(rhs);
1894 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1895 carry = addSignificand(temp_rhs);
1896 } else {
1897 lost_fraction = shiftSignificandRight(-bits);
1898 carry = addSignificand(rhs);
1901 /* We have a guard bit; generating a carry cannot happen. */
1902 assert(!carry);
1903 (void)carry;
1906 return lost_fraction;
1909 APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1910 switch (PackCategoriesIntoKey(category, rhs.category)) {
1911 default:
1912 llvm_unreachable(nullptr);
1914 case PackCategoriesIntoKey(fcZero, fcNaN):
1915 case PackCategoriesIntoKey(fcNormal, fcNaN):
1916 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1917 assign(rhs);
1918 sign = false;
1919 [[fallthrough]];
1920 case PackCategoriesIntoKey(fcNaN, fcZero):
1921 case PackCategoriesIntoKey(fcNaN, fcNormal):
1922 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1923 case PackCategoriesIntoKey(fcNaN, fcNaN):
1924 sign ^= rhs.sign; // restore the original sign
1925 if (isSignaling()) {
1926 makeQuiet();
1927 return opInvalidOp;
1929 return rhs.isSignaling() ? opInvalidOp : opOK;
1931 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1932 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1933 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1934 category = fcInfinity;
1935 return opOK;
1937 case PackCategoriesIntoKey(fcZero, fcNormal):
1938 case PackCategoriesIntoKey(fcNormal, fcZero):
1939 case PackCategoriesIntoKey(fcZero, fcZero):
1940 category = fcZero;
1941 return opOK;
1943 case PackCategoriesIntoKey(fcZero, fcInfinity):
1944 case PackCategoriesIntoKey(fcInfinity, fcZero):
1945 makeNaN();
1946 return opInvalidOp;
1948 case PackCategoriesIntoKey(fcNormal, fcNormal):
1949 return opOK;
1953 APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1954 switch (PackCategoriesIntoKey(category, rhs.category)) {
1955 default:
1956 llvm_unreachable(nullptr);
1958 case PackCategoriesIntoKey(fcZero, fcNaN):
1959 case PackCategoriesIntoKey(fcNormal, fcNaN):
1960 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1961 assign(rhs);
1962 sign = false;
1963 [[fallthrough]];
1964 case PackCategoriesIntoKey(fcNaN, fcZero):
1965 case PackCategoriesIntoKey(fcNaN, fcNormal):
1966 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1967 case PackCategoriesIntoKey(fcNaN, fcNaN):
1968 sign ^= rhs.sign; // restore the original sign
1969 if (isSignaling()) {
1970 makeQuiet();
1971 return opInvalidOp;
1973 return rhs.isSignaling() ? opInvalidOp : opOK;
1975 case PackCategoriesIntoKey(fcInfinity, fcZero):
1976 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1977 case PackCategoriesIntoKey(fcZero, fcInfinity):
1978 case PackCategoriesIntoKey(fcZero, fcNormal):
1979 return opOK;
1981 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1982 category = fcZero;
1983 return opOK;
1985 case PackCategoriesIntoKey(fcNormal, fcZero):
1986 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1987 makeNaN(false, sign);
1988 else
1989 category = fcInfinity;
1990 return opDivByZero;
1992 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1993 case PackCategoriesIntoKey(fcZero, fcZero):
1994 makeNaN();
1995 return opInvalidOp;
1997 case PackCategoriesIntoKey(fcNormal, fcNormal):
1998 return opOK;
2002 APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2003 switch (PackCategoriesIntoKey(category, rhs.category)) {
2004 default:
2005 llvm_unreachable(nullptr);
2007 case PackCategoriesIntoKey(fcZero, fcNaN):
2008 case PackCategoriesIntoKey(fcNormal, fcNaN):
2009 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2010 assign(rhs);
2011 [[fallthrough]];
2012 case PackCategoriesIntoKey(fcNaN, fcZero):
2013 case PackCategoriesIntoKey(fcNaN, fcNormal):
2014 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2015 case PackCategoriesIntoKey(fcNaN, fcNaN):
2016 if (isSignaling()) {
2017 makeQuiet();
2018 return opInvalidOp;
2020 return rhs.isSignaling() ? opInvalidOp : opOK;
2022 case PackCategoriesIntoKey(fcZero, fcInfinity):
2023 case PackCategoriesIntoKey(fcZero, fcNormal):
2024 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2025 return opOK;
2027 case PackCategoriesIntoKey(fcNormal, fcZero):
2028 case PackCategoriesIntoKey(fcInfinity, fcZero):
2029 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2030 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2031 case PackCategoriesIntoKey(fcZero, fcZero):
2032 makeNaN();
2033 return opInvalidOp;
2035 case PackCategoriesIntoKey(fcNormal, fcNormal):
2036 return opOK;
2040 APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2041 switch (PackCategoriesIntoKey(category, rhs.category)) {
2042 default:
2043 llvm_unreachable(nullptr);
2045 case PackCategoriesIntoKey(fcZero, fcNaN):
2046 case PackCategoriesIntoKey(fcNormal, fcNaN):
2047 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2048 assign(rhs);
2049 [[fallthrough]];
2050 case PackCategoriesIntoKey(fcNaN, fcZero):
2051 case PackCategoriesIntoKey(fcNaN, fcNormal):
2052 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2053 case PackCategoriesIntoKey(fcNaN, fcNaN):
2054 if (isSignaling()) {
2055 makeQuiet();
2056 return opInvalidOp;
2058 return rhs.isSignaling() ? opInvalidOp : opOK;
2060 case PackCategoriesIntoKey(fcZero, fcInfinity):
2061 case PackCategoriesIntoKey(fcZero, fcNormal):
2062 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2063 return opOK;
2065 case PackCategoriesIntoKey(fcNormal, fcZero):
2066 case PackCategoriesIntoKey(fcInfinity, fcZero):
2067 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2068 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2069 case PackCategoriesIntoKey(fcZero, fcZero):
2070 makeNaN();
2071 return opInvalidOp;
2073 case PackCategoriesIntoKey(fcNormal, fcNormal):
2074 return opDivByZero; // fake status, indicating this is not a special case
2078 /* Change sign. */
2079 void IEEEFloat::changeSign() {
2080 // With NaN-as-negative-zero, neither NaN or negative zero can change
2081 // their signs.
2082 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2083 (isZero() || isNaN()))
2084 return;
2085 /* Look mummy, this one's easy. */
2086 sign = !sign;
2089 /* Normalized addition or subtraction. */
2090 APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2091 roundingMode rounding_mode,
2092 bool subtract) {
2093 opStatus fs;
2095 fs = addOrSubtractSpecials(rhs, subtract);
2097 /* This return code means it was not a simple case. */
2098 if (fs == opDivByZero) {
2099 lostFraction lost_fraction;
2101 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2102 fs = normalize(rounding_mode, lost_fraction);
2104 /* Can only be zero if we lost no fraction. */
2105 assert(category != fcZero || lost_fraction == lfExactlyZero);
2108 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2109 positive zero unless rounding to minus infinity, except that
2110 adding two like-signed zeroes gives that zero. */
2111 if (category == fcZero) {
2112 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2113 sign = (rounding_mode == rmTowardNegative);
2114 // NaN-in-negative-zero means zeros need to be normalized to +0.
2115 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2116 sign = false;
2119 return fs;
2122 /* Normalized addition. */
2123 APFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2124 roundingMode rounding_mode) {
2125 return addOrSubtract(rhs, rounding_mode, false);
2128 /* Normalized subtraction. */
2129 APFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2130 roundingMode rounding_mode) {
2131 return addOrSubtract(rhs, rounding_mode, true);
2134 /* Normalized multiply. */
2135 APFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2136 roundingMode rounding_mode) {
2137 opStatus fs;
2139 sign ^= rhs.sign;
2140 fs = multiplySpecials(rhs);
2142 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2143 sign = false;
2144 if (isFiniteNonZero()) {
2145 lostFraction lost_fraction = multiplySignificand(rhs);
2146 fs = normalize(rounding_mode, lost_fraction);
2147 if (lost_fraction != lfExactlyZero)
2148 fs = (opStatus) (fs | opInexact);
2151 return fs;
2154 /* Normalized divide. */
2155 APFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2156 roundingMode rounding_mode) {
2157 opStatus fs;
2159 sign ^= rhs.sign;
2160 fs = divideSpecials(rhs);
2162 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2163 sign = false;
2164 if (isFiniteNonZero()) {
2165 lostFraction lost_fraction = divideSignificand(rhs);
2166 fs = normalize(rounding_mode, lost_fraction);
2167 if (lost_fraction != lfExactlyZero)
2168 fs = (opStatus) (fs | opInexact);
2171 return fs;
2174 /* Normalized remainder. */
2175 APFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2176 opStatus fs;
2177 unsigned int origSign = sign;
2179 // First handle the special cases.
2180 fs = remainderSpecials(rhs);
2181 if (fs != opDivByZero)
2182 return fs;
2184 fs = opOK;
2186 // Make sure the current value is less than twice the denom. If the addition
2187 // did not succeed (an overflow has happened), which means that the finite
2188 // value we currently posses must be less than twice the denom (as we are
2189 // using the same semantics).
2190 IEEEFloat P2 = rhs;
2191 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2192 fs = mod(P2);
2193 assert(fs == opOK);
2196 // Lets work with absolute numbers.
2197 IEEEFloat P = rhs;
2198 P.sign = false;
2199 sign = false;
2202 // To calculate the remainder we use the following scheme.
2204 // The remainder is defained as follows:
2206 // remainder = numer - rquot * denom = x - r * p
2208 // Where r is the result of: x/p, rounded toward the nearest integral value
2209 // (with halfway cases rounded toward the even number).
2211 // Currently, (after x mod 2p):
2212 // r is the number of 2p's present inside x, which is inherently, an even
2213 // number of p's.
2215 // We may split the remaining calculation into 4 options:
2216 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2217 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2218 // are done as well.
2219 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2220 // to subtract 1p at least once.
2221 // - if x >= p then we must subtract p at least once, as x must be a
2222 // remainder.
2224 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2226 // We can now split the remaining calculation to the following 3 options:
2227 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2228 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2229 // must round up to the next even number. so we must subtract p once more.
2230 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2231 // integral, and subtract p once more.
2234 // Extend the semantics to prevent an overflow/underflow or inexact result.
2235 bool losesInfo;
2236 fltSemantics extendedSemantics = *semantics;
2237 extendedSemantics.maxExponent++;
2238 extendedSemantics.minExponent--;
2239 extendedSemantics.precision += 2;
2241 IEEEFloat VEx = *this;
2242 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2243 assert(fs == opOK && !losesInfo);
2244 IEEEFloat PEx = P;
2245 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2246 assert(fs == opOK && !losesInfo);
2248 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2249 // any fraction.
2250 fs = VEx.add(VEx, rmNearestTiesToEven);
2251 assert(fs == opOK);
2253 if (VEx.compare(PEx) == cmpGreaterThan) {
2254 fs = subtract(P, rmNearestTiesToEven);
2255 assert(fs == opOK);
2257 // Make VEx = this.add(this), but because we have different semantics, we do
2258 // not want to `convert` again, so we just subtract PEx twice (which equals
2259 // to the desired value).
2260 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2261 assert(fs == opOK);
2262 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2263 assert(fs == opOK);
2265 cmpResult result = VEx.compare(PEx);
2266 if (result == cmpGreaterThan || result == cmpEqual) {
2267 fs = subtract(P, rmNearestTiesToEven);
2268 assert(fs == opOK);
2272 if (isZero()) {
2273 sign = origSign; // IEEE754 requires this
2274 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2275 // But some 8-bit floats only have positive 0.
2276 sign = false;
2279 else
2280 sign ^= origSign;
2281 return fs;
2284 /* Normalized llvm frem (C fmod). */
2285 APFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2286 opStatus fs;
2287 fs = modSpecials(rhs);
2288 unsigned int origSign = sign;
2290 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2291 compareAbsoluteValue(rhs) != cmpLessThan) {
2292 int Exp = ilogb(*this) - ilogb(rhs);
2293 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2294 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2295 // check for it.
2296 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2297 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2298 V.sign = sign;
2300 fs = subtract(V, rmNearestTiesToEven);
2302 // When the semantics supports zero, this loop's
2303 // exit-condition is handled by the 'isFiniteNonZero'
2304 // category check above. However, when the semantics
2305 // does not have 'fcZero' and we have reached the
2306 // minimum possible value, (and any further subtract
2307 // will underflow to the same value) explicitly
2308 // provide an exit-path here.
2309 if (!semantics->hasZero && this->isSmallest())
2310 break;
2312 assert(fs==opOK);
2314 if (isZero()) {
2315 sign = origSign; // fmod requires this
2316 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2317 sign = false;
2319 return fs;
2322 /* Normalized fused-multiply-add. */
2323 APFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2324 const IEEEFloat &addend,
2325 roundingMode rounding_mode) {
2326 opStatus fs;
2328 /* Post-multiplication sign, before addition. */
2329 sign ^= multiplicand.sign;
2331 /* If and only if all arguments are normal do we need to do an
2332 extended-precision calculation. */
2333 if (isFiniteNonZero() &&
2334 multiplicand.isFiniteNonZero() &&
2335 addend.isFinite()) {
2336 lostFraction lost_fraction;
2338 lost_fraction = multiplySignificand(multiplicand, addend);
2339 fs = normalize(rounding_mode, lost_fraction);
2340 if (lost_fraction != lfExactlyZero)
2341 fs = (opStatus) (fs | opInexact);
2343 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2344 positive zero unless rounding to minus infinity, except that
2345 adding two like-signed zeroes gives that zero. */
2346 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2347 sign = (rounding_mode == rmTowardNegative);
2348 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2349 sign = false;
2351 } else {
2352 fs = multiplySpecials(multiplicand);
2354 /* FS can only be opOK or opInvalidOp. There is no more work
2355 to do in the latter case. The IEEE-754R standard says it is
2356 implementation-defined in this case whether, if ADDEND is a
2357 quiet NaN, we raise invalid op; this implementation does so.
2359 If we need to do the addition we can do so with normal
2360 precision. */
2361 if (fs == opOK)
2362 fs = addOrSubtract(addend, rounding_mode, false);
2365 return fs;
2368 /* Rounding-mode correct round to integral value. */
2369 APFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2370 opStatus fs;
2372 if (isInfinity())
2373 // [IEEE Std 754-2008 6.1]:
2374 // The behavior of infinity in floating-point arithmetic is derived from the
2375 // limiting cases of real arithmetic with operands of arbitrarily
2376 // large magnitude, when such a limit exists.
2377 // ...
2378 // Operations on infinite operands are usually exact and therefore signal no
2379 // exceptions ...
2380 return opOK;
2382 if (isNaN()) {
2383 if (isSignaling()) {
2384 // [IEEE Std 754-2008 6.2]:
2385 // Under default exception handling, any operation signaling an invalid
2386 // operation exception and for which a floating-point result is to be
2387 // delivered shall deliver a quiet NaN.
2388 makeQuiet();
2389 // [IEEE Std 754-2008 6.2]:
2390 // Signaling NaNs shall be reserved operands that, under default exception
2391 // handling, signal the invalid operation exception(see 7.2) for every
2392 // general-computational and signaling-computational operation except for
2393 // the conversions described in 5.12.
2394 return opInvalidOp;
2395 } else {
2396 // [IEEE Std 754-2008 6.2]:
2397 // For an operation with quiet NaN inputs, other than maximum and minimum
2398 // operations, if a floating-point result is to be delivered the result
2399 // shall be a quiet NaN which should be one of the input NaNs.
2400 // ...
2401 // Every general-computational and quiet-computational operation involving
2402 // one or more input NaNs, none of them signaling, shall signal no
2403 // exception, except fusedMultiplyAdd might signal the invalid operation
2404 // exception(see 7.2).
2405 return opOK;
2409 if (isZero()) {
2410 // [IEEE Std 754-2008 6.3]:
2411 // ... the sign of the result of conversions, the quantize operation, the
2412 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2413 // the sign of the first or only operand.
2414 return opOK;
2417 // If the exponent is large enough, we know that this value is already
2418 // integral, and the arithmetic below would potentially cause it to saturate
2419 // to +/-Inf. Bail out early instead.
2420 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2421 return opOK;
2423 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2424 // precision of our format, and then subtract it back off again. The choice
2425 // of rounding modes for the addition/subtraction determines the rounding mode
2426 // for our integral rounding as well.
2427 // NOTE: When the input value is negative, we do subtraction followed by
2428 // addition instead.
2429 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2431 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2432 IEEEFloat MagicConstant(*semantics);
2433 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2434 rmNearestTiesToEven);
2435 assert(fs == opOK);
2436 MagicConstant.sign = sign;
2438 // Preserve the input sign so that we can handle the case of zero result
2439 // correctly.
2440 bool inputSign = isNegative();
2442 fs = add(MagicConstant, rounding_mode);
2444 // Current value and 'MagicConstant' are both integers, so the result of the
2445 // subtraction is always exact according to Sterbenz' lemma.
2446 subtract(MagicConstant, rounding_mode);
2448 // Restore the input sign.
2449 if (inputSign != isNegative())
2450 changeSign();
2452 return fs;
2455 /* Comparison requires normalized numbers. */
2456 APFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2457 cmpResult result;
2459 assert(semantics == rhs.semantics);
2461 switch (PackCategoriesIntoKey(category, rhs.category)) {
2462 default:
2463 llvm_unreachable(nullptr);
2465 case PackCategoriesIntoKey(fcNaN, fcZero):
2466 case PackCategoriesIntoKey(fcNaN, fcNormal):
2467 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2468 case PackCategoriesIntoKey(fcNaN, fcNaN):
2469 case PackCategoriesIntoKey(fcZero, fcNaN):
2470 case PackCategoriesIntoKey(fcNormal, fcNaN):
2471 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2472 return cmpUnordered;
2474 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2475 case PackCategoriesIntoKey(fcInfinity, fcZero):
2476 case PackCategoriesIntoKey(fcNormal, fcZero):
2477 if (sign)
2478 return cmpLessThan;
2479 else
2480 return cmpGreaterThan;
2482 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2483 case PackCategoriesIntoKey(fcZero, fcInfinity):
2484 case PackCategoriesIntoKey(fcZero, fcNormal):
2485 if (rhs.sign)
2486 return cmpGreaterThan;
2487 else
2488 return cmpLessThan;
2490 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2491 if (sign == rhs.sign)
2492 return cmpEqual;
2493 else if (sign)
2494 return cmpLessThan;
2495 else
2496 return cmpGreaterThan;
2498 case PackCategoriesIntoKey(fcZero, fcZero):
2499 return cmpEqual;
2501 case PackCategoriesIntoKey(fcNormal, fcNormal):
2502 break;
2505 /* Two normal numbers. Do they have the same sign? */
2506 if (sign != rhs.sign) {
2507 if (sign)
2508 result = cmpLessThan;
2509 else
2510 result = cmpGreaterThan;
2511 } else {
2512 /* Compare absolute values; invert result if negative. */
2513 result = compareAbsoluteValue(rhs);
2515 if (sign) {
2516 if (result == cmpLessThan)
2517 result = cmpGreaterThan;
2518 else if (result == cmpGreaterThan)
2519 result = cmpLessThan;
2523 return result;
2526 /// IEEEFloat::convert - convert a value of one floating point type to another.
2527 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2528 /// records whether the transformation lost information, i.e. whether
2529 /// converting the result back to the original type will produce the
2530 /// original value (this is almost the same as return value==fsOK, but there
2531 /// are edge cases where this is not so).
2533 APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2534 roundingMode rounding_mode,
2535 bool *losesInfo) {
2536 lostFraction lostFraction;
2537 unsigned int newPartCount, oldPartCount;
2538 opStatus fs;
2539 int shift;
2540 const fltSemantics &fromSemantics = *semantics;
2541 bool is_signaling = isSignaling();
2543 lostFraction = lfExactlyZero;
2544 newPartCount = partCountForBits(toSemantics.precision + 1);
2545 oldPartCount = partCount();
2546 shift = toSemantics.precision - fromSemantics.precision;
2548 bool X86SpecialNan = false;
2549 if (&fromSemantics == &semX87DoubleExtended &&
2550 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2551 (!(*significandParts() & 0x8000000000000000ULL) ||
2552 !(*significandParts() & 0x4000000000000000ULL))) {
2553 // x86 has some unusual NaNs which cannot be represented in any other
2554 // format; note them here.
2555 X86SpecialNan = true;
2558 // If this is a truncation of a denormal number, and the target semantics
2559 // has larger exponent range than the source semantics (this can happen
2560 // when truncating from PowerPC double-double to double format), the
2561 // right shift could lose result mantissa bits. Adjust exponent instead
2562 // of performing excessive shift.
2563 // Also do a similar trick in case shifting denormal would produce zero
2564 // significand as this case isn't handled correctly by normalize.
2565 if (shift < 0 && isFiniteNonZero()) {
2566 int omsb = significandMSB() + 1;
2567 int exponentChange = omsb - fromSemantics.precision;
2568 if (exponent + exponentChange < toSemantics.minExponent)
2569 exponentChange = toSemantics.minExponent - exponent;
2570 if (exponentChange < shift)
2571 exponentChange = shift;
2572 if (exponentChange < 0) {
2573 shift -= exponentChange;
2574 exponent += exponentChange;
2575 } else if (omsb <= -shift) {
2576 exponentChange = omsb + shift - 1; // leave at least one bit set
2577 shift -= exponentChange;
2578 exponent += exponentChange;
2582 // If this is a truncation, perform the shift before we narrow the storage.
2583 if (shift < 0 && (isFiniteNonZero() ||
2584 (category == fcNaN && semantics->nonFiniteBehavior !=
2585 fltNonfiniteBehavior::NanOnly)))
2586 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2588 // Fix the storage so it can hold to new value.
2589 if (newPartCount > oldPartCount) {
2590 // The new type requires more storage; make it available.
2591 integerPart *newParts;
2592 newParts = new integerPart[newPartCount];
2593 APInt::tcSet(newParts, 0, newPartCount);
2594 if (isFiniteNonZero() || category==fcNaN)
2595 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2596 freeSignificand();
2597 significand.parts = newParts;
2598 } else if (newPartCount == 1 && oldPartCount != 1) {
2599 // Switch to built-in storage for a single part.
2600 integerPart newPart = 0;
2601 if (isFiniteNonZero() || category==fcNaN)
2602 newPart = significandParts()[0];
2603 freeSignificand();
2604 significand.part = newPart;
2607 // Now that we have the right storage, switch the semantics.
2608 semantics = &toSemantics;
2610 // If this is an extension, perform the shift now that the storage is
2611 // available.
2612 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2613 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2615 if (isFiniteNonZero()) {
2616 fs = normalize(rounding_mode, lostFraction);
2617 *losesInfo = (fs != opOK);
2618 } else if (category == fcNaN) {
2619 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2620 *losesInfo =
2621 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2622 makeNaN(false, sign);
2623 return is_signaling ? opInvalidOp : opOK;
2626 // If NaN is negative zero, we need to create a new NaN to avoid converting
2627 // NaN to -Inf.
2628 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2629 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2630 makeNaN(false, false);
2632 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2634 // For x87 extended precision, we want to make a NaN, not a special NaN if
2635 // the input wasn't special either.
2636 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2637 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2639 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2640 // This also guarantees that a sNaN does not become Inf on a truncation
2641 // that loses all payload bits.
2642 if (is_signaling) {
2643 makeQuiet();
2644 fs = opInvalidOp;
2645 } else {
2646 fs = opOK;
2648 } else if (category == fcInfinity &&
2649 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2650 makeNaN(false, sign);
2651 *losesInfo = true;
2652 fs = opInexact;
2653 } else if (category == fcZero &&
2654 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2655 // Negative zero loses info, but positive zero doesn't.
2656 *losesInfo =
2657 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2658 fs = *losesInfo ? opInexact : opOK;
2659 // NaN is negative zero means -0 -> +0, which can lose information
2660 sign = false;
2661 } else {
2662 *losesInfo = false;
2663 fs = opOK;
2666 if (category == fcZero && !semantics->hasZero)
2667 makeSmallestNormalized(false);
2668 return fs;
2671 /* Convert a floating point number to an integer according to the
2672 rounding mode. If the rounded integer value is out of range this
2673 returns an invalid operation exception and the contents of the
2674 destination parts are unspecified. If the rounded value is in
2675 range but the floating point number is not the exact integer, the C
2676 standard doesn't require an inexact exception to be raised. IEEE
2677 854 does require it so we do that.
2679 Note that for conversions to integer type the C standard requires
2680 round-to-zero to always be used. */
2681 APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2682 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2683 roundingMode rounding_mode, bool *isExact) const {
2684 lostFraction lost_fraction;
2685 const integerPart *src;
2686 unsigned int dstPartsCount, truncatedBits;
2688 *isExact = false;
2690 /* Handle the three special cases first. */
2691 if (category == fcInfinity || category == fcNaN)
2692 return opInvalidOp;
2694 dstPartsCount = partCountForBits(width);
2695 assert(dstPartsCount <= parts.size() && "Integer too big");
2697 if (category == fcZero) {
2698 APInt::tcSet(parts.data(), 0, dstPartsCount);
2699 // Negative zero can't be represented as an int.
2700 *isExact = !sign;
2701 return opOK;
2704 src = significandParts();
2706 /* Step 1: place our absolute value, with any fraction truncated, in
2707 the destination. */
2708 if (exponent < 0) {
2709 /* Our absolute value is less than one; truncate everything. */
2710 APInt::tcSet(parts.data(), 0, dstPartsCount);
2711 /* For exponent -1 the integer bit represents .5, look at that.
2712 For smaller exponents leftmost truncated bit is 0. */
2713 truncatedBits = semantics->precision -1U - exponent;
2714 } else {
2715 /* We want the most significant (exponent + 1) bits; the rest are
2716 truncated. */
2717 unsigned int bits = exponent + 1U;
2719 /* Hopelessly large in magnitude? */
2720 if (bits > width)
2721 return opInvalidOp;
2723 if (bits < semantics->precision) {
2724 /* We truncate (semantics->precision - bits) bits. */
2725 truncatedBits = semantics->precision - bits;
2726 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2727 } else {
2728 /* We want at least as many bits as are available. */
2729 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2731 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2732 bits - semantics->precision);
2733 truncatedBits = 0;
2737 /* Step 2: work out any lost fraction, and increment the absolute
2738 value if we would round away from zero. */
2739 if (truncatedBits) {
2740 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2741 truncatedBits);
2742 if (lost_fraction != lfExactlyZero &&
2743 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2744 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2745 return opInvalidOp; /* Overflow. */
2747 } else {
2748 lost_fraction = lfExactlyZero;
2751 /* Step 3: check if we fit in the destination. */
2752 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2754 if (sign) {
2755 if (!isSigned) {
2756 /* Negative numbers cannot be represented as unsigned. */
2757 if (omsb != 0)
2758 return opInvalidOp;
2759 } else {
2760 /* It takes omsb bits to represent the unsigned integer value.
2761 We lose a bit for the sign, but care is needed as the
2762 maximally negative integer is a special case. */
2763 if (omsb == width &&
2764 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2765 return opInvalidOp;
2767 /* This case can happen because of rounding. */
2768 if (omsb > width)
2769 return opInvalidOp;
2772 APInt::tcNegate (parts.data(), dstPartsCount);
2773 } else {
2774 if (omsb >= width + !isSigned)
2775 return opInvalidOp;
2778 if (lost_fraction == lfExactlyZero) {
2779 *isExact = true;
2780 return opOK;
2781 } else
2782 return opInexact;
2785 /* Same as convertToSignExtendedInteger, except we provide
2786 deterministic values in case of an invalid operation exception,
2787 namely zero for NaNs and the minimal or maximal value respectively
2788 for underflow or overflow.
2789 The *isExact output tells whether the result is exact, in the sense
2790 that converting it back to the original floating point type produces
2791 the original value. This is almost equivalent to result==opOK,
2792 except for negative zeroes.
2794 APFloat::opStatus
2795 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2796 unsigned int width, bool isSigned,
2797 roundingMode rounding_mode, bool *isExact) const {
2798 opStatus fs;
2800 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2801 isExact);
2803 if (fs == opInvalidOp) {
2804 unsigned int bits, dstPartsCount;
2806 dstPartsCount = partCountForBits(width);
2807 assert(dstPartsCount <= parts.size() && "Integer too big");
2809 if (category == fcNaN)
2810 bits = 0;
2811 else if (sign)
2812 bits = isSigned;
2813 else
2814 bits = width - isSigned;
2816 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2817 if (sign && isSigned)
2818 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2821 return fs;
2824 /* Convert an unsigned integer SRC to a floating point number,
2825 rounding according to ROUNDING_MODE. The sign of the floating
2826 point number is not modified. */
2827 APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2828 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2829 unsigned int omsb, precision, dstCount;
2830 integerPart *dst;
2831 lostFraction lost_fraction;
2833 category = fcNormal;
2834 omsb = APInt::tcMSB(src, srcCount) + 1;
2835 dst = significandParts();
2836 dstCount = partCount();
2837 precision = semantics->precision;
2839 /* We want the most significant PRECISION bits of SRC. There may not
2840 be that many; extract what we can. */
2841 if (precision <= omsb) {
2842 exponent = omsb - 1;
2843 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2844 omsb - precision);
2845 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2846 } else {
2847 exponent = precision - 1;
2848 lost_fraction = lfExactlyZero;
2849 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2852 return normalize(rounding_mode, lost_fraction);
2855 APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2856 roundingMode rounding_mode) {
2857 unsigned int partCount = Val.getNumWords();
2858 APInt api = Val;
2860 sign = false;
2861 if (isSigned && api.isNegative()) {
2862 sign = true;
2863 api = -api;
2866 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2869 /* Convert a two's complement integer SRC to a floating point number,
2870 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2871 integer is signed, in which case it must be sign-extended. */
2872 APFloat::opStatus
2873 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2874 unsigned int srcCount, bool isSigned,
2875 roundingMode rounding_mode) {
2876 opStatus status;
2878 if (isSigned &&
2879 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2880 integerPart *copy;
2882 /* If we're signed and negative negate a copy. */
2883 sign = true;
2884 copy = new integerPart[srcCount];
2885 APInt::tcAssign(copy, src, srcCount);
2886 APInt::tcNegate(copy, srcCount);
2887 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2888 delete [] copy;
2889 } else {
2890 sign = false;
2891 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2894 return status;
2897 /* FIXME: should this just take a const APInt reference? */
2898 APFloat::opStatus
2899 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2900 unsigned int width, bool isSigned,
2901 roundingMode rounding_mode) {
2902 unsigned int partCount = partCountForBits(width);
2903 APInt api = APInt(width, ArrayRef(parts, partCount));
2905 sign = false;
2906 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2907 sign = true;
2908 api = -api;
2911 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2914 Expected<APFloat::opStatus>
2915 IEEEFloat::convertFromHexadecimalString(StringRef s,
2916 roundingMode rounding_mode) {
2917 lostFraction lost_fraction = lfExactlyZero;
2919 category = fcNormal;
2920 zeroSignificand();
2921 exponent = 0;
2923 integerPart *significand = significandParts();
2924 unsigned partsCount = partCount();
2925 unsigned bitPos = partsCount * integerPartWidth;
2926 bool computedTrailingFraction = false;
2928 // Skip leading zeroes and any (hexa)decimal point.
2929 StringRef::iterator begin = s.begin();
2930 StringRef::iterator end = s.end();
2931 StringRef::iterator dot;
2932 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2933 if (!PtrOrErr)
2934 return PtrOrErr.takeError();
2935 StringRef::iterator p = *PtrOrErr;
2936 StringRef::iterator firstSignificantDigit = p;
2938 while (p != end) {
2939 integerPart hex_value;
2941 if (*p == '.') {
2942 if (dot != end)
2943 return createError("String contains multiple dots");
2944 dot = p++;
2945 continue;
2948 hex_value = hexDigitValue(*p);
2949 if (hex_value == UINT_MAX)
2950 break;
2952 p++;
2954 // Store the number while we have space.
2955 if (bitPos) {
2956 bitPos -= 4;
2957 hex_value <<= bitPos % integerPartWidth;
2958 significand[bitPos / integerPartWidth] |= hex_value;
2959 } else if (!computedTrailingFraction) {
2960 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2961 if (!FractOrErr)
2962 return FractOrErr.takeError();
2963 lost_fraction = *FractOrErr;
2964 computedTrailingFraction = true;
2968 /* Hex floats require an exponent but not a hexadecimal point. */
2969 if (p == end)
2970 return createError("Hex strings require an exponent");
2971 if (*p != 'p' && *p != 'P')
2972 return createError("Invalid character in significand");
2973 if (p == begin)
2974 return createError("Significand has no digits");
2975 if (dot != end && p - begin == 1)
2976 return createError("Significand has no digits");
2978 /* Ignore the exponent if we are zero. */
2979 if (p != firstSignificantDigit) {
2980 int expAdjustment;
2982 /* Implicit hexadecimal point? */
2983 if (dot == end)
2984 dot = p;
2986 /* Calculate the exponent adjustment implicit in the number of
2987 significant digits. */
2988 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2989 if (expAdjustment < 0)
2990 expAdjustment++;
2991 expAdjustment = expAdjustment * 4 - 1;
2993 /* Adjust for writing the significand starting at the most
2994 significant nibble. */
2995 expAdjustment += semantics->precision;
2996 expAdjustment -= partsCount * integerPartWidth;
2998 /* Adjust for the given exponent. */
2999 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
3000 if (!ExpOrErr)
3001 return ExpOrErr.takeError();
3002 exponent = *ExpOrErr;
3005 return normalize(rounding_mode, lost_fraction);
3008 APFloat::opStatus
3009 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
3010 unsigned sigPartCount, int exp,
3011 roundingMode rounding_mode) {
3012 unsigned int parts, pow5PartCount;
3013 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
3014 integerPart pow5Parts[maxPowerOfFiveParts];
3015 bool isNearest;
3017 isNearest = (rounding_mode == rmNearestTiesToEven ||
3018 rounding_mode == rmNearestTiesToAway);
3020 parts = partCountForBits(semantics->precision + 11);
3022 /* Calculate pow(5, abs(exp)). */
3023 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
3025 for (;; parts *= 2) {
3026 opStatus sigStatus, powStatus;
3027 unsigned int excessPrecision, truncatedBits;
3029 calcSemantics.precision = parts * integerPartWidth - 1;
3030 excessPrecision = calcSemantics.precision - semantics->precision;
3031 truncatedBits = excessPrecision;
3033 IEEEFloat decSig(calcSemantics, uninitialized);
3034 decSig.makeZero(sign);
3035 IEEEFloat pow5(calcSemantics);
3037 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
3038 rmNearestTiesToEven);
3039 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
3040 rmNearestTiesToEven);
3041 /* Add exp, as 10^n = 5^n * 2^n. */
3042 decSig.exponent += exp;
3044 lostFraction calcLostFraction;
3045 integerPart HUerr, HUdistance;
3046 unsigned int powHUerr;
3048 if (exp >= 0) {
3049 /* multiplySignificand leaves the precision-th bit set to 1. */
3050 calcLostFraction = decSig.multiplySignificand(pow5);
3051 powHUerr = powStatus != opOK;
3052 } else {
3053 calcLostFraction = decSig.divideSignificand(pow5);
3054 /* Denormal numbers have less precision. */
3055 if (decSig.exponent < semantics->minExponent) {
3056 excessPrecision += (semantics->minExponent - decSig.exponent);
3057 truncatedBits = excessPrecision;
3058 if (excessPrecision > calcSemantics.precision)
3059 excessPrecision = calcSemantics.precision;
3061 /* Extra half-ulp lost in reciprocal of exponent. */
3062 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3065 /* Both multiplySignificand and divideSignificand return the
3066 result with the integer bit set. */
3067 assert(APInt::tcExtractBit
3068 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3070 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3071 powHUerr);
3072 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3073 excessPrecision, isNearest);
3075 /* Are we guaranteed to round correctly if we truncate? */
3076 if (HUdistance >= HUerr) {
3077 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3078 calcSemantics.precision - excessPrecision,
3079 excessPrecision);
3080 /* Take the exponent of decSig. If we tcExtract-ed less bits
3081 above we must adjust our exponent to compensate for the
3082 implicit right shift. */
3083 exponent = (decSig.exponent + semantics->precision
3084 - (calcSemantics.precision - excessPrecision));
3085 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3086 decSig.partCount(),
3087 truncatedBits);
3088 return normalize(rounding_mode, calcLostFraction);
3093 Expected<APFloat::opStatus>
3094 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3095 decimalInfo D;
3096 opStatus fs;
3098 /* Scan the text. */
3099 StringRef::iterator p = str.begin();
3100 if (Error Err = interpretDecimal(p, str.end(), &D))
3101 return std::move(Err);
3103 /* Handle the quick cases. First the case of no significant digits,
3104 i.e. zero, and then exponents that are obviously too large or too
3105 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3106 definitely overflows if
3108 (exp - 1) * L >= maxExponent
3110 and definitely underflows to zero where
3112 (exp + 1) * L <= minExponent - precision
3114 With integer arithmetic the tightest bounds for L are
3116 93/28 < L < 196/59 [ numerator <= 256 ]
3117 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3120 // Test if we have a zero number allowing for strings with no null terminators
3121 // and zero decimals with non-zero exponents.
3123 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3124 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3125 // be at most one dot. On the other hand, if we have a zero with a non-zero
3126 // exponent, then we know that D.firstSigDigit will be non-numeric.
3127 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3128 category = fcZero;
3129 fs = opOK;
3130 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3131 sign = false;
3132 if (!semantics->hasZero)
3133 makeSmallestNormalized(false);
3135 /* Check whether the normalized exponent is high enough to overflow
3136 max during the log-rebasing in the max-exponent check below. */
3137 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3138 fs = handleOverflow(rounding_mode);
3140 /* If it wasn't, then it also wasn't high enough to overflow max
3141 during the log-rebasing in the min-exponent check. Check that it
3142 won't overflow min in either check, then perform the min-exponent
3143 check. */
3144 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3145 (D.normalizedExponent + 1) * 28738 <=
3146 8651 * (semantics->minExponent - (int) semantics->precision)) {
3147 /* Underflow to zero and round. */
3148 category = fcNormal;
3149 zeroSignificand();
3150 fs = normalize(rounding_mode, lfLessThanHalf);
3152 /* We can finally safely perform the max-exponent check. */
3153 } else if ((D.normalizedExponent - 1) * 42039
3154 >= 12655 * semantics->maxExponent) {
3155 /* Overflow and round. */
3156 fs = handleOverflow(rounding_mode);
3157 } else {
3158 integerPart *decSignificand;
3159 unsigned int partCount;
3161 /* A tight upper bound on number of bits required to hold an
3162 N-digit decimal integer is N * 196 / 59. Allocate enough space
3163 to hold the full significand, and an extra part required by
3164 tcMultiplyPart. */
3165 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3166 partCount = partCountForBits(1 + 196 * partCount / 59);
3167 decSignificand = new integerPart[partCount + 1];
3168 partCount = 0;
3170 /* Convert to binary efficiently - we do almost all multiplication
3171 in an integerPart. When this would overflow do we do a single
3172 bignum multiplication, and then revert again to multiplication
3173 in an integerPart. */
3174 do {
3175 integerPart decValue, val, multiplier;
3177 val = 0;
3178 multiplier = 1;
3180 do {
3181 if (*p == '.') {
3182 p++;
3183 if (p == str.end()) {
3184 break;
3187 decValue = decDigitValue(*p++);
3188 if (decValue >= 10U) {
3189 delete[] decSignificand;
3190 return createError("Invalid character in significand");
3192 multiplier *= 10;
3193 val = val * 10 + decValue;
3194 /* The maximum number that can be multiplied by ten with any
3195 digit added without overflowing an integerPart. */
3196 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3198 /* Multiply out the current part. */
3199 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3200 partCount, partCount + 1, false);
3202 /* If we used another part (likely but not guaranteed), increase
3203 the count. */
3204 if (decSignificand[partCount])
3205 partCount++;
3206 } while (p <= D.lastSigDigit);
3208 category = fcNormal;
3209 fs = roundSignificandWithExponent(decSignificand, partCount,
3210 D.exponent, rounding_mode);
3212 delete [] decSignificand;
3215 return fs;
3218 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3219 const size_t MIN_NAME_SIZE = 3;
3221 if (str.size() < MIN_NAME_SIZE)
3222 return false;
3224 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3225 makeInf(false);
3226 return true;
3229 bool IsNegative = str.front() == '-';
3230 if (IsNegative) {
3231 str = str.drop_front();
3232 if (str.size() < MIN_NAME_SIZE)
3233 return false;
3235 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3236 makeInf(true);
3237 return true;
3241 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3242 bool IsSignaling = str.front() == 's' || str.front() == 'S';
3243 if (IsSignaling) {
3244 str = str.drop_front();
3245 if (str.size() < MIN_NAME_SIZE)
3246 return false;
3249 if (str.starts_with("nan") || str.starts_with("NaN")) {
3250 str = str.drop_front(3);
3252 // A NaN without payload.
3253 if (str.empty()) {
3254 makeNaN(IsSignaling, IsNegative);
3255 return true;
3258 // Allow the payload to be inside parentheses.
3259 if (str.front() == '(') {
3260 // Parentheses should be balanced (and not empty).
3261 if (str.size() <= 2 || str.back() != ')')
3262 return false;
3264 str = str.slice(1, str.size() - 1);
3267 // Determine the payload number's radix.
3268 unsigned Radix = 10;
3269 if (str[0] == '0') {
3270 if (str.size() > 1 && tolower(str[1]) == 'x') {
3271 str = str.drop_front(2);
3272 Radix = 16;
3273 } else
3274 Radix = 8;
3277 // Parse the payload and make the NaN.
3278 APInt Payload;
3279 if (!str.getAsInteger(Radix, Payload)) {
3280 makeNaN(IsSignaling, IsNegative, &Payload);
3281 return true;
3285 return false;
3288 Expected<APFloat::opStatus>
3289 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3290 if (str.empty())
3291 return createError("Invalid string length");
3293 // Handle special cases.
3294 if (convertFromStringSpecials(str))
3295 return opOK;
3297 /* Handle a leading minus sign. */
3298 StringRef::iterator p = str.begin();
3299 size_t slen = str.size();
3300 sign = *p == '-' ? 1 : 0;
3301 if (sign && !semantics->hasSignedRepr)
3302 llvm_unreachable(
3303 "This floating point format does not support signed values");
3305 if (*p == '-' || *p == '+') {
3306 p++;
3307 slen--;
3308 if (!slen)
3309 return createError("String has no digits");
3312 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3313 if (slen == 2)
3314 return createError("Invalid string");
3315 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3316 rounding_mode);
3319 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3322 /* Write out a hexadecimal representation of the floating point value
3323 to DST, which must be of sufficient size, in the C99 form
3324 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3325 excluding the terminating NUL.
3327 If UPPERCASE, the output is in upper case, otherwise in lower case.
3329 HEXDIGITS digits appear altogether, rounding the value if
3330 necessary. If HEXDIGITS is 0, the minimal precision to display the
3331 number precisely is used instead. If nothing would appear after
3332 the decimal point it is suppressed.
3334 The decimal exponent is always printed and has at least one digit.
3335 Zero values display an exponent of zero. Infinities and NaNs
3336 appear as "infinity" or "nan" respectively.
3338 The above rules are as specified by C99. There is ambiguity about
3339 what the leading hexadecimal digit should be. This implementation
3340 uses whatever is necessary so that the exponent is displayed as
3341 stored. This implies the exponent will fall within the IEEE format
3342 range, and the leading hexadecimal digit will be 0 (for denormals),
3343 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3344 any other digits zero).
3346 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3347 bool upperCase,
3348 roundingMode rounding_mode) const {
3349 char *p;
3351 p = dst;
3352 if (sign)
3353 *dst++ = '-';
3355 switch (category) {
3356 case fcInfinity:
3357 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3358 dst += sizeof infinityL - 1;
3359 break;
3361 case fcNaN:
3362 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3363 dst += sizeof NaNU - 1;
3364 break;
3366 case fcZero:
3367 *dst++ = '0';
3368 *dst++ = upperCase ? 'X': 'x';
3369 *dst++ = '0';
3370 if (hexDigits > 1) {
3371 *dst++ = '.';
3372 memset (dst, '0', hexDigits - 1);
3373 dst += hexDigits - 1;
3375 *dst++ = upperCase ? 'P': 'p';
3376 *dst++ = '0';
3377 break;
3379 case fcNormal:
3380 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3381 break;
3384 *dst = 0;
3386 return static_cast<unsigned int>(dst - p);
3389 /* Does the hard work of outputting the correctly rounded hexadecimal
3390 form of a normal floating point number with the specified number of
3391 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3392 digits necessary to print the value precisely is output. */
3393 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3394 bool upperCase,
3395 roundingMode rounding_mode) const {
3396 unsigned int count, valueBits, shift, partsCount, outputDigits;
3397 const char *hexDigitChars;
3398 const integerPart *significand;
3399 char *p;
3400 bool roundUp;
3402 *dst++ = '0';
3403 *dst++ = upperCase ? 'X': 'x';
3405 roundUp = false;
3406 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3408 significand = significandParts();
3409 partsCount = partCount();
3411 /* +3 because the first digit only uses the single integer bit, so
3412 we have 3 virtual zero most-significant-bits. */
3413 valueBits = semantics->precision + 3;
3414 shift = integerPartWidth - valueBits % integerPartWidth;
3416 /* The natural number of digits required ignoring trailing
3417 insignificant zeroes. */
3418 outputDigits = (valueBits - significandLSB () + 3) / 4;
3420 /* hexDigits of zero means use the required number for the
3421 precision. Otherwise, see if we are truncating. If we are,
3422 find out if we need to round away from zero. */
3423 if (hexDigits) {
3424 if (hexDigits < outputDigits) {
3425 /* We are dropping non-zero bits, so need to check how to round.
3426 "bits" is the number of dropped bits. */
3427 unsigned int bits;
3428 lostFraction fraction;
3430 bits = valueBits - hexDigits * 4;
3431 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3432 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3434 outputDigits = hexDigits;
3437 /* Write the digits consecutively, and start writing in the location
3438 of the hexadecimal point. We move the most significant digit
3439 left and add the hexadecimal point later. */
3440 p = ++dst;
3442 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3444 while (outputDigits && count) {
3445 integerPart part;
3447 /* Put the most significant integerPartWidth bits in "part". */
3448 if (--count == partsCount)
3449 part = 0; /* An imaginary higher zero part. */
3450 else
3451 part = significand[count] << shift;
3453 if (count && shift)
3454 part |= significand[count - 1] >> (integerPartWidth - shift);
3456 /* Convert as much of "part" to hexdigits as we can. */
3457 unsigned int curDigits = integerPartWidth / 4;
3459 if (curDigits > outputDigits)
3460 curDigits = outputDigits;
3461 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3462 outputDigits -= curDigits;
3465 if (roundUp) {
3466 char *q = dst;
3468 /* Note that hexDigitChars has a trailing '0'. */
3469 do {
3470 q--;
3471 *q = hexDigitChars[hexDigitValue (*q) + 1];
3472 } while (*q == '0');
3473 assert(q >= p);
3474 } else {
3475 /* Add trailing zeroes. */
3476 memset (dst, '0', outputDigits);
3477 dst += outputDigits;
3480 /* Move the most significant digit to before the point, and if there
3481 is something after the decimal point add it. This must come
3482 after rounding above. */
3483 p[-1] = p[0];
3484 if (dst -1 == p)
3485 dst--;
3486 else
3487 p[0] = '.';
3489 /* Finally output the exponent. */
3490 *dst++ = upperCase ? 'P': 'p';
3492 return writeSignedDecimal (dst, exponent);
3495 hash_code hash_value(const IEEEFloat &Arg) {
3496 if (!Arg.isFiniteNonZero())
3497 return hash_combine((uint8_t)Arg.category,
3498 // NaN has no sign, fix it at zero.
3499 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3500 Arg.semantics->precision);
3502 // Normal floats need their exponent and significand hashed.
3503 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3504 Arg.semantics->precision, Arg.exponent,
3505 hash_combine_range(
3506 Arg.significandParts(),
3507 Arg.significandParts() + Arg.partCount()));
3510 // Conversion from APFloat to/from host float/double. It may eventually be
3511 // possible to eliminate these and have everybody deal with APFloats, but that
3512 // will take a while. This approach will not easily extend to long double.
3513 // Current implementation requires integerPartWidth==64, which is correct at
3514 // the moment but could be made more general.
3516 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3517 // the actual IEEE respresentations. We compensate for that here.
3519 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3520 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3521 assert(partCount()==2);
3523 uint64_t myexponent, mysignificand;
3525 if (isFiniteNonZero()) {
3526 myexponent = exponent+16383; //bias
3527 mysignificand = significandParts()[0];
3528 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3529 myexponent = 0; // denormal
3530 } else if (category==fcZero) {
3531 myexponent = 0;
3532 mysignificand = 0;
3533 } else if (category==fcInfinity) {
3534 myexponent = 0x7fff;
3535 mysignificand = 0x8000000000000000ULL;
3536 } else {
3537 assert(category == fcNaN && "Unknown category");
3538 myexponent = 0x7fff;
3539 mysignificand = significandParts()[0];
3542 uint64_t words[2];
3543 words[0] = mysignificand;
3544 words[1] = ((uint64_t)(sign & 1) << 15) |
3545 (myexponent & 0x7fffLL);
3546 return APInt(80, words);
3549 APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3550 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3551 assert(partCount()==2);
3553 uint64_t words[2];
3554 opStatus fs;
3555 bool losesInfo;
3557 // Convert number to double. To avoid spurious underflows, we re-
3558 // normalize against the "double" minExponent first, and only *then*
3559 // truncate the mantissa. The result of that second conversion
3560 // may be inexact, but should never underflow.
3561 // Declare fltSemantics before APFloat that uses it (and
3562 // saves pointer to it) to ensure correct destruction order.
3563 fltSemantics extendedSemantics = *semantics;
3564 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3565 IEEEFloat extended(*this);
3566 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3567 assert(fs == opOK && !losesInfo);
3568 (void)fs;
3570 IEEEFloat u(extended);
3571 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3572 assert(fs == opOK || fs == opInexact);
3573 (void)fs;
3574 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3576 // If conversion was exact or resulted in a special case, we're done;
3577 // just set the second double to zero. Otherwise, re-convert back to
3578 // the extended format and compute the difference. This now should
3579 // convert exactly to double.
3580 if (u.isFiniteNonZero() && losesInfo) {
3581 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3582 assert(fs == opOK && !losesInfo);
3583 (void)fs;
3585 IEEEFloat v(extended);
3586 v.subtract(u, rmNearestTiesToEven);
3587 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3588 assert(fs == opOK && !losesInfo);
3589 (void)fs;
3590 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3591 } else {
3592 words[1] = 0;
3595 return APInt(128, words);
3598 template <const fltSemantics &S>
3599 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3600 assert(semantics == &S);
3601 const int bias =
3602 (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
3603 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3604 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3605 constexpr integerPart integer_bit =
3606 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3607 constexpr uint64_t significand_mask = integer_bit - 1;
3608 constexpr unsigned int exponent_bits =
3609 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3610 : S.sizeInBits;
3611 static_assert(exponent_bits < 64);
3612 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3614 uint64_t myexponent;
3615 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3616 mysignificand;
3618 if (isFiniteNonZero()) {
3619 myexponent = exponent + bias;
3620 std::copy_n(significandParts(), mysignificand.size(),
3621 mysignificand.begin());
3622 if (myexponent == 1 &&
3623 !(significandParts()[integer_bit_part] & integer_bit))
3624 myexponent = 0; // denormal
3625 } else if (category == fcZero) {
3626 if (!S.hasZero)
3627 llvm_unreachable("semantics does not support zero!");
3628 myexponent = ::exponentZero(S) + bias;
3629 mysignificand.fill(0);
3630 } else if (category == fcInfinity) {
3631 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3632 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3633 llvm_unreachable("semantics don't support inf!");
3634 myexponent = ::exponentInf(S) + bias;
3635 mysignificand.fill(0);
3636 } else {
3637 assert(category == fcNaN && "Unknown category!");
3638 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3639 llvm_unreachable("semantics don't support NaN!");
3640 myexponent = ::exponentNaN(S) + bias;
3641 std::copy_n(significandParts(), mysignificand.size(),
3642 mysignificand.begin());
3644 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3645 auto words_iter =
3646 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3647 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3648 // Clear the integer bit.
3649 words[mysignificand.size() - 1] &= significand_mask;
3651 std::fill(words_iter, words.end(), uint64_t{0});
3652 constexpr size_t last_word = words.size() - 1;
3653 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3654 << ((S.sizeInBits - 1) % 64);
3655 words[last_word] |= shifted_sign;
3656 uint64_t shifted_exponent = (myexponent & exponent_mask)
3657 << (trailing_significand_bits % 64);
3658 words[last_word] |= shifted_exponent;
3659 if constexpr (last_word == 0) {
3660 return APInt(S.sizeInBits, words[0]);
3662 return APInt(S.sizeInBits, words);
3665 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3666 assert(partCount() == 2);
3667 return convertIEEEFloatToAPInt<semIEEEquad>();
3670 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3671 assert(partCount()==1);
3672 return convertIEEEFloatToAPInt<semIEEEdouble>();
3675 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3676 assert(partCount()==1);
3677 return convertIEEEFloatToAPInt<semIEEEsingle>();
3680 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3681 assert(partCount() == 1);
3682 return convertIEEEFloatToAPInt<semBFloat>();
3685 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3686 assert(partCount()==1);
3687 return convertIEEEFloatToAPInt<semIEEEhalf>();
3690 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3691 assert(partCount() == 1);
3692 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3695 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3696 assert(partCount() == 1);
3697 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3700 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3701 assert(partCount() == 1);
3702 return convertIEEEFloatToAPInt<semFloat8E4M3>();
3705 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3706 assert(partCount() == 1);
3707 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3710 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3711 assert(partCount() == 1);
3712 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3715 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3716 assert(partCount() == 1);
3717 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3720 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3721 assert(partCount() == 1);
3722 return convertIEEEFloatToAPInt<semFloat8E3M4>();
3725 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3726 assert(partCount() == 1);
3727 return convertIEEEFloatToAPInt<semFloatTF32>();
3730 APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3731 assert(partCount() == 1);
3732 return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
3735 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3736 assert(partCount() == 1);
3737 return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3740 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3741 assert(partCount() == 1);
3742 return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3745 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3746 assert(partCount() == 1);
3747 return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3750 // This function creates an APInt that is just a bit map of the floating
3751 // point constant as it would appear in memory. It is not a conversion,
3752 // and treating the result as a normal integer is unlikely to be useful.
3754 APInt IEEEFloat::bitcastToAPInt() const {
3755 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3756 return convertHalfAPFloatToAPInt();
3758 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3759 return convertBFloatAPFloatToAPInt();
3761 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3762 return convertFloatAPFloatToAPInt();
3764 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3765 return convertDoubleAPFloatToAPInt();
3767 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3768 return convertQuadrupleAPFloatToAPInt();
3770 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3771 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3773 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3774 return convertFloat8E5M2APFloatToAPInt();
3776 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3777 return convertFloat8E5M2FNUZAPFloatToAPInt();
3779 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3780 return convertFloat8E4M3APFloatToAPInt();
3782 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3783 return convertFloat8E4M3FNAPFloatToAPInt();
3785 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3786 return convertFloat8E4M3FNUZAPFloatToAPInt();
3788 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3789 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3791 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
3792 return convertFloat8E3M4APFloatToAPInt();
3794 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3795 return convertFloatTF32APFloatToAPInt();
3797 if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
3798 return convertFloat8E8M0FNUAPFloatToAPInt();
3800 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3801 return convertFloat6E3M2FNAPFloatToAPInt();
3803 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3804 return convertFloat6E2M3FNAPFloatToAPInt();
3806 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3807 return convertFloat4E2M1FNAPFloatToAPInt();
3809 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3810 "unknown format!");
3811 return convertF80LongDoubleAPFloatToAPInt();
3814 float IEEEFloat::convertToFloat() const {
3815 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3816 "Float semantics are not IEEEsingle");
3817 APInt api = bitcastToAPInt();
3818 return api.bitsToFloat();
3821 double IEEEFloat::convertToDouble() const {
3822 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3823 "Float semantics are not IEEEdouble");
3824 APInt api = bitcastToAPInt();
3825 return api.bitsToDouble();
3828 #ifdef HAS_IEE754_FLOAT128
3829 float128 IEEEFloat::convertToQuad() const {
3830 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3831 "Float semantics are not IEEEquads");
3832 APInt api = bitcastToAPInt();
3833 return api.bitsToQuad();
3835 #endif
3837 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3838 /// does not support these bit patterns:
3839 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3840 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3841 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3842 /// exponent = 0, integer bit 1 ("pseudodenormal")
3843 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3844 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3845 uint64_t i1 = api.getRawData()[0];
3846 uint64_t i2 = api.getRawData()[1];
3847 uint64_t myexponent = (i2 & 0x7fff);
3848 uint64_t mysignificand = i1;
3849 uint8_t myintegerbit = mysignificand >> 63;
3851 initialize(&semX87DoubleExtended);
3852 assert(partCount()==2);
3854 sign = static_cast<unsigned int>(i2>>15);
3855 if (myexponent == 0 && mysignificand == 0) {
3856 makeZero(sign);
3857 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3858 makeInf(sign);
3859 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3860 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3861 category = fcNaN;
3862 exponent = exponentNaN();
3863 significandParts()[0] = mysignificand;
3864 significandParts()[1] = 0;
3865 } else {
3866 category = fcNormal;
3867 exponent = myexponent - 16383;
3868 significandParts()[0] = mysignificand;
3869 significandParts()[1] = 0;
3870 if (myexponent==0) // denormal
3871 exponent = -16382;
3875 void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3876 uint64_t i1 = api.getRawData()[0];
3877 uint64_t i2 = api.getRawData()[1];
3878 opStatus fs;
3879 bool losesInfo;
3881 // Get the first double and convert to our format.
3882 initFromDoubleAPInt(APInt(64, i1));
3883 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3884 assert(fs == opOK && !losesInfo);
3885 (void)fs;
3887 // Unless we have a special case, add in second double.
3888 if (isFiniteNonZero()) {
3889 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3890 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3891 assert(fs == opOK && !losesInfo);
3892 (void)fs;
3894 add(v, rmNearestTiesToEven);
3898 // The E8M0 format has the following characteristics:
3899 // It is an 8-bit unsigned format with only exponents (no actual significand).
3900 // No encodings for {zero, infinities or denorms}.
3901 // NaN is represented by all 1's.
3902 // Bias is 127.
3903 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3904 const uint64_t exponent_mask = 0xff;
3905 uint64_t val = api.getRawData()[0];
3906 uint64_t myexponent = (val & exponent_mask);
3908 initialize(&semFloat8E8M0FNU);
3909 assert(partCount() == 1);
3911 // This format has unsigned representation only
3912 sign = 0;
3914 // Set the significand
3915 // This format does not have any significand but the 'Pth' precision bit is
3916 // always set to 1 for consistency in APFloat's internal representation.
3917 uint64_t mysignificand = 1;
3918 significandParts()[0] = mysignificand;
3920 // This format can either have a NaN or fcNormal
3921 // All 1's i.e. 255 is a NaN
3922 if (val == exponent_mask) {
3923 category = fcNaN;
3924 exponent = exponentNaN();
3925 return;
3927 // Handle fcNormal...
3928 category = fcNormal;
3929 exponent = myexponent - 127; // 127 is bias
3931 template <const fltSemantics &S>
3932 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3933 assert(api.getBitWidth() == S.sizeInBits);
3934 constexpr integerPart integer_bit = integerPart{1}
3935 << ((S.precision - 1) % integerPartWidth);
3936 constexpr uint64_t significand_mask = integer_bit - 1;
3937 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3938 constexpr unsigned int stored_significand_parts =
3939 partCountForBits(trailing_significand_bits);
3940 constexpr unsigned int exponent_bits =
3941 S.sizeInBits - 1 - trailing_significand_bits;
3942 static_assert(exponent_bits < 64);
3943 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3944 constexpr int bias = -(S.minExponent - 1);
3946 // Copy the bits of the significand. We need to clear out the exponent and
3947 // sign bit in the last word.
3948 std::array<integerPart, stored_significand_parts> mysignificand;
3949 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3950 if constexpr (significand_mask != 0) {
3951 mysignificand[mysignificand.size() - 1] &= significand_mask;
3954 // We assume the last word holds the sign bit, the exponent, and potentially
3955 // some of the trailing significand field.
3956 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3957 uint64_t myexponent =
3958 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3960 initialize(&S);
3961 assert(partCount() == mysignificand.size());
3963 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3965 bool all_zero_significand =
3966 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3968 bool is_zero = myexponent == 0 && all_zero_significand;
3970 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3971 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3972 makeInf(sign);
3973 return;
3977 bool is_nan = false;
3979 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3980 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3981 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3982 bool all_ones_significand =
3983 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3984 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3985 (!significand_mask ||
3986 mysignificand[mysignificand.size() - 1] == significand_mask);
3987 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3988 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3989 is_nan = is_zero && sign;
3992 if (is_nan) {
3993 category = fcNaN;
3994 exponent = ::exponentNaN(S);
3995 std::copy_n(mysignificand.begin(), mysignificand.size(),
3996 significandParts());
3997 return;
4000 if (is_zero) {
4001 makeZero(sign);
4002 return;
4005 category = fcNormal;
4006 exponent = myexponent - bias;
4007 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
4008 if (myexponent == 0) // denormal
4009 exponent = S.minExponent;
4010 else
4011 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
4014 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
4015 initFromIEEEAPInt<semIEEEquad>(api);
4018 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4019 initFromIEEEAPInt<semIEEEdouble>(api);
4022 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4023 initFromIEEEAPInt<semIEEEsingle>(api);
4026 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4027 initFromIEEEAPInt<semBFloat>(api);
4030 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4031 initFromIEEEAPInt<semIEEEhalf>(api);
4034 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4035 initFromIEEEAPInt<semFloat8E5M2>(api);
4038 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4039 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
4042 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4043 initFromIEEEAPInt<semFloat8E4M3>(api);
4046 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4047 initFromIEEEAPInt<semFloat8E4M3FN>(api);
4050 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4051 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
4054 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4055 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
4058 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4059 initFromIEEEAPInt<semFloat8E3M4>(api);
4062 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4063 initFromIEEEAPInt<semFloatTF32>(api);
4066 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4067 initFromIEEEAPInt<semFloat6E3M2FN>(api);
4070 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4071 initFromIEEEAPInt<semFloat6E2M3FN>(api);
4074 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4075 initFromIEEEAPInt<semFloat4E2M1FN>(api);
4078 /// Treat api as containing the bits of a floating point number.
4079 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4080 assert(api.getBitWidth() == Sem->sizeInBits);
4081 if (Sem == &semIEEEhalf)
4082 return initFromHalfAPInt(api);
4083 if (Sem == &semBFloat)
4084 return initFromBFloatAPInt(api);
4085 if (Sem == &semIEEEsingle)
4086 return initFromFloatAPInt(api);
4087 if (Sem == &semIEEEdouble)
4088 return initFromDoubleAPInt(api);
4089 if (Sem == &semX87DoubleExtended)
4090 return initFromF80LongDoubleAPInt(api);
4091 if (Sem == &semIEEEquad)
4092 return initFromQuadrupleAPInt(api);
4093 if (Sem == &semPPCDoubleDoubleLegacy)
4094 return initFromPPCDoubleDoubleLegacyAPInt(api);
4095 if (Sem == &semFloat8E5M2)
4096 return initFromFloat8E5M2APInt(api);
4097 if (Sem == &semFloat8E5M2FNUZ)
4098 return initFromFloat8E5M2FNUZAPInt(api);
4099 if (Sem == &semFloat8E4M3)
4100 return initFromFloat8E4M3APInt(api);
4101 if (Sem == &semFloat8E4M3FN)
4102 return initFromFloat8E4M3FNAPInt(api);
4103 if (Sem == &semFloat8E4M3FNUZ)
4104 return initFromFloat8E4M3FNUZAPInt(api);
4105 if (Sem == &semFloat8E4M3B11FNUZ)
4106 return initFromFloat8E4M3B11FNUZAPInt(api);
4107 if (Sem == &semFloat8E3M4)
4108 return initFromFloat8E3M4APInt(api);
4109 if (Sem == &semFloatTF32)
4110 return initFromFloatTF32APInt(api);
4111 if (Sem == &semFloat8E8M0FNU)
4112 return initFromFloat8E8M0FNUAPInt(api);
4113 if (Sem == &semFloat6E3M2FN)
4114 return initFromFloat6E3M2FNAPInt(api);
4115 if (Sem == &semFloat6E2M3FN)
4116 return initFromFloat6E2M3FNAPInt(api);
4117 if (Sem == &semFloat4E2M1FN)
4118 return initFromFloat4E2M1FNAPInt(api);
4120 llvm_unreachable("unsupported semantics");
4123 /// Make this number the largest magnitude normal number in the given
4124 /// semantics.
4125 void IEEEFloat::makeLargest(bool Negative) {
4126 if (Negative && !semantics->hasSignedRepr)
4127 llvm_unreachable(
4128 "This floating point format does not support signed values");
4129 // We want (in interchange format):
4130 // sign = {Negative}
4131 // exponent = 1..10
4132 // significand = 1..1
4133 category = fcNormal;
4134 sign = Negative;
4135 exponent = semantics->maxExponent;
4137 // Use memset to set all but the highest integerPart to all ones.
4138 integerPart *significand = significandParts();
4139 unsigned PartCount = partCount();
4140 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4142 // Set the high integerPart especially setting all unused top bits for
4143 // internal consistency.
4144 const unsigned NumUnusedHighBits =
4145 PartCount*integerPartWidth - semantics->precision;
4146 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4147 ? (~integerPart(0) >> NumUnusedHighBits)
4148 : 0;
4149 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4150 semantics->nanEncoding == fltNanEncoding::AllOnes &&
4151 (semantics->precision > 1))
4152 significand[0] &= ~integerPart(1);
4155 /// Make this number the smallest magnitude denormal number in the given
4156 /// semantics.
4157 void IEEEFloat::makeSmallest(bool Negative) {
4158 if (Negative && !semantics->hasSignedRepr)
4159 llvm_unreachable(
4160 "This floating point format does not support signed values");
4161 // We want (in interchange format):
4162 // sign = {Negative}
4163 // exponent = 0..0
4164 // significand = 0..01
4165 category = fcNormal;
4166 sign = Negative;
4167 exponent = semantics->minExponent;
4168 APInt::tcSet(significandParts(), 1, partCount());
4171 void IEEEFloat::makeSmallestNormalized(bool Negative) {
4172 if (Negative && !semantics->hasSignedRepr)
4173 llvm_unreachable(
4174 "This floating point format does not support signed values");
4175 // We want (in interchange format):
4176 // sign = {Negative}
4177 // exponent = 0..0
4178 // significand = 10..0
4180 category = fcNormal;
4181 zeroSignificand();
4182 sign = Negative;
4183 exponent = semantics->minExponent;
4184 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4187 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4188 initFromAPInt(&Sem, API);
4191 IEEEFloat::IEEEFloat(float f) {
4192 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4195 IEEEFloat::IEEEFloat(double d) {
4196 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4199 namespace {
4200 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4201 Buffer.append(Str.begin(), Str.end());
4204 /// Removes data from the given significand until it is no more
4205 /// precise than is required for the desired precision.
4206 void AdjustToPrecision(APInt &significand,
4207 int &exp, unsigned FormatPrecision) {
4208 unsigned bits = significand.getActiveBits();
4210 // 196/59 is a very slight overestimate of lg_2(10).
4211 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4213 if (bits <= bitsRequired) return;
4215 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4216 if (!tensRemovable) return;
4218 exp += tensRemovable;
4220 APInt divisor(significand.getBitWidth(), 1);
4221 APInt powten(significand.getBitWidth(), 10);
4222 while (true) {
4223 if (tensRemovable & 1)
4224 divisor *= powten;
4225 tensRemovable >>= 1;
4226 if (!tensRemovable) break;
4227 powten *= powten;
4230 significand = significand.udiv(divisor);
4232 // Truncate the significand down to its active bit count.
4233 significand = significand.trunc(significand.getActiveBits());
4237 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4238 int &exp, unsigned FormatPrecision) {
4239 unsigned N = buffer.size();
4240 if (N <= FormatPrecision) return;
4242 // The most significant figures are the last ones in the buffer.
4243 unsigned FirstSignificant = N - FormatPrecision;
4245 // Round.
4246 // FIXME: this probably shouldn't use 'round half up'.
4248 // Rounding down is just a truncation, except we also want to drop
4249 // trailing zeros from the new result.
4250 if (buffer[FirstSignificant - 1] < '5') {
4251 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4252 FirstSignificant++;
4254 exp += FirstSignificant;
4255 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4256 return;
4259 // Rounding up requires a decimal add-with-carry. If we continue
4260 // the carry, the newly-introduced zeros will just be truncated.
4261 for (unsigned I = FirstSignificant; I != N; ++I) {
4262 if (buffer[I] == '9') {
4263 FirstSignificant++;
4264 } else {
4265 buffer[I]++;
4266 break;
4270 // If we carried through, we have exactly one digit of precision.
4271 if (FirstSignificant == N) {
4272 exp += FirstSignificant;
4273 buffer.clear();
4274 buffer.push_back('1');
4275 return;
4278 exp += FirstSignificant;
4279 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4282 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4283 APInt significand, unsigned FormatPrecision,
4284 unsigned FormatMaxPadding, bool TruncateZero) {
4285 const int semanticsPrecision = significand.getBitWidth();
4287 if (isNeg)
4288 Str.push_back('-');
4290 // Set FormatPrecision if zero. We want to do this before we
4291 // truncate trailing zeros, as those are part of the precision.
4292 if (!FormatPrecision) {
4293 // We use enough digits so the number can be round-tripped back to an
4294 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4295 // Accurately" by Steele and White.
4296 // FIXME: Using a formula based purely on the precision is conservative;
4297 // we can print fewer digits depending on the actual value being printed.
4299 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4300 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4303 // Ignore trailing binary zeros.
4304 int trailingZeros = significand.countr_zero();
4305 exp += trailingZeros;
4306 significand.lshrInPlace(trailingZeros);
4308 // Change the exponent from 2^e to 10^e.
4309 if (exp == 0) {
4310 // Nothing to do.
4311 } else if (exp > 0) {
4312 // Just shift left.
4313 significand = significand.zext(semanticsPrecision + exp);
4314 significand <<= exp;
4315 exp = 0;
4316 } else { /* exp < 0 */
4317 int texp = -exp;
4319 // We transform this using the identity:
4320 // (N)(2^-e) == (N)(5^e)(10^-e)
4321 // This means we have to multiply N (the significand) by 5^e.
4322 // To avoid overflow, we have to operate on numbers large
4323 // enough to store N * 5^e:
4324 // log2(N * 5^e) == log2(N) + e * log2(5)
4325 // <= semantics->precision + e * 137 / 59
4326 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4328 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4330 // Multiply significand by 5^e.
4331 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4332 significand = significand.zext(precision);
4333 APInt five_to_the_i(precision, 5);
4334 while (true) {
4335 if (texp & 1)
4336 significand *= five_to_the_i;
4338 texp >>= 1;
4339 if (!texp)
4340 break;
4341 five_to_the_i *= five_to_the_i;
4345 AdjustToPrecision(significand, exp, FormatPrecision);
4347 SmallVector<char, 256> buffer;
4349 // Fill the buffer.
4350 unsigned precision = significand.getBitWidth();
4351 if (precision < 4) {
4352 // We need enough precision to store the value 10.
4353 precision = 4;
4354 significand = significand.zext(precision);
4356 APInt ten(precision, 10);
4357 APInt digit(precision, 0);
4359 bool inTrail = true;
4360 while (significand != 0) {
4361 // digit <- significand % 10
4362 // significand <- significand / 10
4363 APInt::udivrem(significand, ten, significand, digit);
4365 unsigned d = digit.getZExtValue();
4367 // Drop trailing zeros.
4368 if (inTrail && !d)
4369 exp++;
4370 else {
4371 buffer.push_back((char) ('0' + d));
4372 inTrail = false;
4376 assert(!buffer.empty() && "no characters in buffer!");
4378 // Drop down to FormatPrecision.
4379 // TODO: don't do more precise calculations above than are required.
4380 AdjustToPrecision(buffer, exp, FormatPrecision);
4382 unsigned NDigits = buffer.size();
4384 // Check whether we should use scientific notation.
4385 bool FormatScientific;
4386 if (!FormatMaxPadding)
4387 FormatScientific = true;
4388 else {
4389 if (exp >= 0) {
4390 // 765e3 --> 765000
4391 // ^^^
4392 // But we shouldn't make the number look more precise than it is.
4393 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4394 NDigits + (unsigned) exp > FormatPrecision);
4395 } else {
4396 // Power of the most significant digit.
4397 int MSD = exp + (int) (NDigits - 1);
4398 if (MSD >= 0) {
4399 // 765e-2 == 7.65
4400 FormatScientific = false;
4401 } else {
4402 // 765e-5 == 0.00765
4403 // ^ ^^
4404 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4409 // Scientific formatting is pretty straightforward.
4410 if (FormatScientific) {
4411 exp += (NDigits - 1);
4413 Str.push_back(buffer[NDigits-1]);
4414 Str.push_back('.');
4415 if (NDigits == 1 && TruncateZero)
4416 Str.push_back('0');
4417 else
4418 for (unsigned I = 1; I != NDigits; ++I)
4419 Str.push_back(buffer[NDigits-1-I]);
4420 // Fill with zeros up to FormatPrecision.
4421 if (!TruncateZero && FormatPrecision > NDigits - 1)
4422 Str.append(FormatPrecision - NDigits + 1, '0');
4423 // For !TruncateZero we use lower 'e'.
4424 Str.push_back(TruncateZero ? 'E' : 'e');
4426 Str.push_back(exp >= 0 ? '+' : '-');
4427 if (exp < 0)
4428 exp = -exp;
4429 SmallVector<char, 6> expbuf;
4430 do {
4431 expbuf.push_back((char) ('0' + (exp % 10)));
4432 exp /= 10;
4433 } while (exp);
4434 // Exponent always at least two digits if we do not truncate zeros.
4435 if (!TruncateZero && expbuf.size() < 2)
4436 expbuf.push_back('0');
4437 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4438 Str.push_back(expbuf[E-1-I]);
4439 return;
4442 // Non-scientific, positive exponents.
4443 if (exp >= 0) {
4444 for (unsigned I = 0; I != NDigits; ++I)
4445 Str.push_back(buffer[NDigits-1-I]);
4446 for (unsigned I = 0; I != (unsigned) exp; ++I)
4447 Str.push_back('0');
4448 return;
4451 // Non-scientific, negative exponents.
4453 // The number of digits to the left of the decimal point.
4454 int NWholeDigits = exp + (int) NDigits;
4456 unsigned I = 0;
4457 if (NWholeDigits > 0) {
4458 for (; I != (unsigned) NWholeDigits; ++I)
4459 Str.push_back(buffer[NDigits-I-1]);
4460 Str.push_back('.');
4461 } else {
4462 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4464 Str.push_back('0');
4465 Str.push_back('.');
4466 for (unsigned Z = 1; Z != NZeros; ++Z)
4467 Str.push_back('0');
4470 for (; I != NDigits; ++I)
4471 Str.push_back(buffer[NDigits-I-1]);
4474 } // namespace
4476 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4477 unsigned FormatMaxPadding, bool TruncateZero) const {
4478 switch (category) {
4479 case fcInfinity:
4480 if (isNegative())
4481 return append(Str, "-Inf");
4482 else
4483 return append(Str, "+Inf");
4485 case fcNaN: return append(Str, "NaN");
4487 case fcZero:
4488 if (isNegative())
4489 Str.push_back('-');
4491 if (!FormatMaxPadding) {
4492 if (TruncateZero)
4493 append(Str, "0.0E+0");
4494 else {
4495 append(Str, "0.0");
4496 if (FormatPrecision > 1)
4497 Str.append(FormatPrecision - 1, '0');
4498 append(Str, "e+00");
4500 } else
4501 Str.push_back('0');
4502 return;
4504 case fcNormal:
4505 break;
4508 // Decompose the number into an APInt and an exponent.
4509 int exp = exponent - ((int) semantics->precision - 1);
4510 APInt significand(
4511 semantics->precision,
4512 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4514 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4515 FormatMaxPadding, TruncateZero);
4519 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4520 // Special floats and denormals have no exact inverse.
4521 if (!isFiniteNonZero())
4522 return false;
4524 // Check that the number is a power of two by making sure that only the
4525 // integer bit is set in the significand.
4526 if (significandLSB() != semantics->precision - 1)
4527 return false;
4529 // Get the inverse.
4530 IEEEFloat reciprocal(*semantics, 1ULL);
4531 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4532 return false;
4534 // Avoid multiplication with a denormal, it is not safe on all platforms and
4535 // may be slower than a normal division.
4536 if (reciprocal.isDenormal())
4537 return false;
4539 assert(reciprocal.isFiniteNonZero() &&
4540 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4542 if (inv)
4543 *inv = APFloat(reciprocal, *semantics);
4545 return true;
4548 int IEEEFloat::getExactLog2Abs() const {
4549 if (!isFinite() || isZero())
4550 return INT_MIN;
4552 const integerPart *Parts = significandParts();
4553 const int PartCount = partCountForBits(semantics->precision);
4555 int PopCount = 0;
4556 for (int i = 0; i < PartCount; ++i) {
4557 PopCount += llvm::popcount(Parts[i]);
4558 if (PopCount > 1)
4559 return INT_MIN;
4562 if (exponent != semantics->minExponent)
4563 return exponent;
4565 int CountrParts = 0;
4566 for (int i = 0; i < PartCount;
4567 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4568 if (Parts[i] != 0) {
4569 return exponent - semantics->precision + CountrParts +
4570 llvm::countr_zero(Parts[i]) + 1;
4574 llvm_unreachable("didn't find the set bit");
4577 bool IEEEFloat::isSignaling() const {
4578 if (!isNaN())
4579 return false;
4580 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4581 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4582 return false;
4584 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4585 // first bit of the trailing significand being 0.
4586 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4589 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4591 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4592 /// appropriate sign switching before/after the computation.
4593 APFloat::opStatus IEEEFloat::next(bool nextDown) {
4594 // If we are performing nextDown, swap sign so we have -x.
4595 if (nextDown)
4596 changeSign();
4598 // Compute nextUp(x)
4599 opStatus result = opOK;
4601 // Handle each float category separately.
4602 switch (category) {
4603 case fcInfinity:
4604 // nextUp(+inf) = +inf
4605 if (!isNegative())
4606 break;
4607 // nextUp(-inf) = -getLargest()
4608 makeLargest(true);
4609 break;
4610 case fcNaN:
4611 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4612 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4613 // change the payload.
4614 if (isSignaling()) {
4615 result = opInvalidOp;
4616 // For consistency, propagate the sign of the sNaN to the qNaN.
4617 makeNaN(false, isNegative(), nullptr);
4619 break;
4620 case fcZero:
4621 // nextUp(pm 0) = +getSmallest()
4622 makeSmallest(false);
4623 break;
4624 case fcNormal:
4625 // nextUp(-getSmallest()) = -0
4626 if (isSmallest() && isNegative()) {
4627 APInt::tcSet(significandParts(), 0, partCount());
4628 category = fcZero;
4629 exponent = 0;
4630 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4631 sign = false;
4632 if (!semantics->hasZero)
4633 makeSmallestNormalized(false);
4634 break;
4637 if (isLargest() && !isNegative()) {
4638 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4639 // nextUp(getLargest()) == NAN
4640 makeNaN();
4641 break;
4642 } else if (semantics->nonFiniteBehavior ==
4643 fltNonfiniteBehavior::FiniteOnly) {
4644 // nextUp(getLargest()) == getLargest()
4645 break;
4646 } else {
4647 // nextUp(getLargest()) == INFINITY
4648 APInt::tcSet(significandParts(), 0, partCount());
4649 category = fcInfinity;
4650 exponent = semantics->maxExponent + 1;
4651 break;
4655 // nextUp(normal) == normal + inc.
4656 if (isNegative()) {
4657 // If we are negative, we need to decrement the significand.
4659 // We only cross a binade boundary that requires adjusting the exponent
4660 // if:
4661 // 1. exponent != semantics->minExponent. This implies we are not in the
4662 // smallest binade or are dealing with denormals.
4663 // 2. Our significand excluding the integral bit is all zeros.
4664 bool WillCrossBinadeBoundary =
4665 exponent != semantics->minExponent && isSignificandAllZeros();
4667 // Decrement the significand.
4669 // We always do this since:
4670 // 1. If we are dealing with a non-binade decrement, by definition we
4671 // just decrement the significand.
4672 // 2. If we are dealing with a normal -> normal binade decrement, since
4673 // we have an explicit integral bit the fact that all bits but the
4674 // integral bit are zero implies that subtracting one will yield a
4675 // significand with 0 integral bit and 1 in all other spots. Thus we
4676 // must just adjust the exponent and set the integral bit to 1.
4677 // 3. If we are dealing with a normal -> denormal binade decrement,
4678 // since we set the integral bit to 0 when we represent denormals, we
4679 // just decrement the significand.
4680 integerPart *Parts = significandParts();
4681 APInt::tcDecrement(Parts, partCount());
4683 if (WillCrossBinadeBoundary) {
4684 // Our result is a normal number. Do the following:
4685 // 1. Set the integral bit to 1.
4686 // 2. Decrement the exponent.
4687 APInt::tcSetBit(Parts, semantics->precision - 1);
4688 exponent--;
4690 } else {
4691 // If we are positive, we need to increment the significand.
4693 // We only cross a binade boundary that requires adjusting the exponent if
4694 // the input is not a denormal and all of said input's significand bits
4695 // are set. If all of said conditions are true: clear the significand, set
4696 // the integral bit to 1, and increment the exponent. If we have a
4697 // denormal always increment since moving denormals and the numbers in the
4698 // smallest normal binade have the same exponent in our representation.
4699 // If there are only exponents, any increment always crosses the
4700 // BinadeBoundary.
4701 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4702 (!isDenormal() && isSignificandAllOnes());
4704 if (WillCrossBinadeBoundary) {
4705 integerPart *Parts = significandParts();
4706 APInt::tcSet(Parts, 0, partCount());
4707 APInt::tcSetBit(Parts, semantics->precision - 1);
4708 assert(exponent != semantics->maxExponent &&
4709 "We can not increment an exponent beyond the maxExponent allowed"
4710 " by the given floating point semantics.");
4711 exponent++;
4712 } else {
4713 incrementSignificand();
4716 break;
4719 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4720 if (nextDown)
4721 changeSign();
4723 return result;
4726 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4727 return ::exponentNaN(*semantics);
4730 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4731 return ::exponentInf(*semantics);
4734 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4735 return ::exponentZero(*semantics);
4738 void IEEEFloat::makeInf(bool Negative) {
4739 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4740 llvm_unreachable("This floating point format does not support Inf");
4742 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4743 // There is no Inf, so make NaN instead.
4744 makeNaN(false, Negative);
4745 return;
4747 category = fcInfinity;
4748 sign = Negative;
4749 exponent = exponentInf();
4750 APInt::tcSet(significandParts(), 0, partCount());
4753 void IEEEFloat::makeZero(bool Negative) {
4754 if (!semantics->hasZero)
4755 llvm_unreachable("This floating point format does not support Zero");
4757 category = fcZero;
4758 sign = Negative;
4759 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4760 // Merge negative zero to positive because 0b10000...000 is used for NaN
4761 sign = false;
4763 exponent = exponentZero();
4764 APInt::tcSet(significandParts(), 0, partCount());
4767 void IEEEFloat::makeQuiet() {
4768 assert(isNaN());
4769 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4770 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4773 int ilogb(const IEEEFloat &Arg) {
4774 if (Arg.isNaN())
4775 return APFloat::IEK_NaN;
4776 if (Arg.isZero())
4777 return APFloat::IEK_Zero;
4778 if (Arg.isInfinity())
4779 return APFloat::IEK_Inf;
4780 if (!Arg.isDenormal())
4781 return Arg.exponent;
4783 IEEEFloat Normalized(Arg);
4784 int SignificandBits = Arg.getSemantics().precision - 1;
4786 Normalized.exponent += SignificandBits;
4787 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4788 return Normalized.exponent - SignificandBits;
4791 IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode RoundingMode) {
4792 auto MaxExp = X.getSemantics().maxExponent;
4793 auto MinExp = X.getSemantics().minExponent;
4795 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4796 // overflow; clamp it to a safe range before adding, but ensure that the range
4797 // is large enough that the clamp does not change the result. The range we
4798 // need to support is the difference between the largest possible exponent and
4799 // the normalized exponent of half the smallest denormal.
4801 int SignificandBits = X.getSemantics().precision - 1;
4802 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4804 // Clamp to one past the range ends to let normalize handle overlflow.
4805 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4806 X.normalize(RoundingMode, lfExactlyZero);
4807 if (X.isNaN())
4808 X.makeQuiet();
4809 return X;
4812 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4813 Exp = ilogb(Val);
4815 // Quiet signalling nans.
4816 if (Exp == APFloat::IEK_NaN) {
4817 IEEEFloat Quiet(Val);
4818 Quiet.makeQuiet();
4819 return Quiet;
4822 if (Exp == APFloat::IEK_Inf)
4823 return Val;
4825 // 1 is added because frexp is defined to return a normalized fraction in
4826 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4827 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4828 return scalbn(Val, -Exp, RM);
4831 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4832 : Semantics(&S),
4833 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4834 assert(Semantics == &semPPCDoubleDouble);
4837 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4838 : Semantics(&S),
4839 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4840 APFloat(semIEEEdouble, uninitialized)}) {
4841 assert(Semantics == &semPPCDoubleDouble);
4844 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4845 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4846 APFloat(semIEEEdouble)}) {
4847 assert(Semantics == &semPPCDoubleDouble);
4850 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4851 : Semantics(&S),
4852 Floats(new APFloat[2]{
4853 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4854 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4855 assert(Semantics == &semPPCDoubleDouble);
4858 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4859 APFloat &&Second)
4860 : Semantics(&S),
4861 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4862 assert(Semantics == &semPPCDoubleDouble);
4863 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4864 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4867 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4868 : Semantics(RHS.Semantics),
4869 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4870 APFloat(RHS.Floats[1])}
4871 : nullptr) {
4872 assert(Semantics == &semPPCDoubleDouble);
4875 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4876 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4877 RHS.Semantics = &semBogus;
4878 assert(Semantics == &semPPCDoubleDouble);
4881 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4882 if (Semantics == RHS.Semantics && RHS.Floats) {
4883 Floats[0] = RHS.Floats[0];
4884 Floats[1] = RHS.Floats[1];
4885 } else if (this != &RHS) {
4886 this->~DoubleAPFloat();
4887 new (this) DoubleAPFloat(RHS);
4889 return *this;
4892 // Implement addition, subtraction, multiplication and division based on:
4893 // "Software for Doubled-Precision Floating-Point Computations",
4894 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4895 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4896 const APFloat &c, const APFloat &cc,
4897 roundingMode RM) {
4898 int Status = opOK;
4899 APFloat z = a;
4900 Status |= z.add(c, RM);
4901 if (!z.isFinite()) {
4902 if (!z.isInfinity()) {
4903 Floats[0] = std::move(z);
4904 Floats[1].makeZero(/* Neg = */ false);
4905 return (opStatus)Status;
4907 Status = opOK;
4908 auto AComparedToC = a.compareAbsoluteValue(c);
4909 z = cc;
4910 Status |= z.add(aa, RM);
4911 if (AComparedToC == APFloat::cmpGreaterThan) {
4912 // z = cc + aa + c + a;
4913 Status |= z.add(c, RM);
4914 Status |= z.add(a, RM);
4915 } else {
4916 // z = cc + aa + a + c;
4917 Status |= z.add(a, RM);
4918 Status |= z.add(c, RM);
4920 if (!z.isFinite()) {
4921 Floats[0] = std::move(z);
4922 Floats[1].makeZero(/* Neg = */ false);
4923 return (opStatus)Status;
4925 Floats[0] = z;
4926 APFloat zz = aa;
4927 Status |= zz.add(cc, RM);
4928 if (AComparedToC == APFloat::cmpGreaterThan) {
4929 // Floats[1] = a - z + c + zz;
4930 Floats[1] = a;
4931 Status |= Floats[1].subtract(z, RM);
4932 Status |= Floats[1].add(c, RM);
4933 Status |= Floats[1].add(zz, RM);
4934 } else {
4935 // Floats[1] = c - z + a + zz;
4936 Floats[1] = c;
4937 Status |= Floats[1].subtract(z, RM);
4938 Status |= Floats[1].add(a, RM);
4939 Status |= Floats[1].add(zz, RM);
4941 } else {
4942 // q = a - z;
4943 APFloat q = a;
4944 Status |= q.subtract(z, RM);
4946 // zz = q + c + (a - (q + z)) + aa + cc;
4947 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4948 auto zz = q;
4949 Status |= zz.add(c, RM);
4950 Status |= q.add(z, RM);
4951 Status |= q.subtract(a, RM);
4952 q.changeSign();
4953 Status |= zz.add(q, RM);
4954 Status |= zz.add(aa, RM);
4955 Status |= zz.add(cc, RM);
4956 if (zz.isZero() && !zz.isNegative()) {
4957 Floats[0] = std::move(z);
4958 Floats[1].makeZero(/* Neg = */ false);
4959 return opOK;
4961 Floats[0] = z;
4962 Status |= Floats[0].add(zz, RM);
4963 if (!Floats[0].isFinite()) {
4964 Floats[1].makeZero(/* Neg = */ false);
4965 return (opStatus)Status;
4967 Floats[1] = std::move(z);
4968 Status |= Floats[1].subtract(Floats[0], RM);
4969 Status |= Floats[1].add(zz, RM);
4971 return (opStatus)Status;
4974 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4975 const DoubleAPFloat &RHS,
4976 DoubleAPFloat &Out,
4977 roundingMode RM) {
4978 if (LHS.getCategory() == fcNaN) {
4979 Out = LHS;
4980 return opOK;
4982 if (RHS.getCategory() == fcNaN) {
4983 Out = RHS;
4984 return opOK;
4986 if (LHS.getCategory() == fcZero) {
4987 Out = RHS;
4988 return opOK;
4990 if (RHS.getCategory() == fcZero) {
4991 Out = LHS;
4992 return opOK;
4994 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4995 LHS.isNegative() != RHS.isNegative()) {
4996 Out.makeNaN(false, Out.isNegative(), nullptr);
4997 return opInvalidOp;
4999 if (LHS.getCategory() == fcInfinity) {
5000 Out = LHS;
5001 return opOK;
5003 if (RHS.getCategory() == fcInfinity) {
5004 Out = RHS;
5005 return opOK;
5007 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
5009 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
5010 CC(RHS.Floats[1]);
5011 assert(&A.getSemantics() == &semIEEEdouble);
5012 assert(&AA.getSemantics() == &semIEEEdouble);
5013 assert(&C.getSemantics() == &semIEEEdouble);
5014 assert(&CC.getSemantics() == &semIEEEdouble);
5015 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
5016 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
5017 return Out.addImpl(A, AA, C, CC, RM);
5020 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
5021 roundingMode RM) {
5022 return addWithSpecial(*this, RHS, *this, RM);
5025 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
5026 roundingMode RM) {
5027 changeSign();
5028 auto Ret = add(RHS, RM);
5029 changeSign();
5030 return Ret;
5033 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
5034 APFloat::roundingMode RM) {
5035 const auto &LHS = *this;
5036 auto &Out = *this;
5037 /* Interesting observation: For special categories, finding the lowest
5038 common ancestor of the following layered graph gives the correct
5039 return category:
5043 Zero Inf
5045 Normal
5047 e.g. NaN * NaN = NaN
5048 Zero * Inf = NaN
5049 Normal * Zero = Zero
5050 Normal * Inf = Inf
5052 if (LHS.getCategory() == fcNaN) {
5053 Out = LHS;
5054 return opOK;
5056 if (RHS.getCategory() == fcNaN) {
5057 Out = RHS;
5058 return opOK;
5060 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5061 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5062 Out.makeNaN(false, false, nullptr);
5063 return opOK;
5065 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5066 Out = LHS;
5067 return opOK;
5069 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5070 Out = RHS;
5071 return opOK;
5073 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5074 "Special cases not handled exhaustively");
5076 int Status = opOK;
5077 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5078 // t = a * c
5079 APFloat T = A;
5080 Status |= T.multiply(C, RM);
5081 if (!T.isFiniteNonZero()) {
5082 Floats[0] = T;
5083 Floats[1].makeZero(/* Neg = */ false);
5084 return (opStatus)Status;
5087 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5088 APFloat Tau = A;
5089 T.changeSign();
5090 Status |= Tau.fusedMultiplyAdd(C, T, RM);
5091 T.changeSign();
5093 // v = a * d
5094 APFloat V = A;
5095 Status |= V.multiply(D, RM);
5096 // w = b * c
5097 APFloat W = B;
5098 Status |= W.multiply(C, RM);
5099 Status |= V.add(W, RM);
5100 // tau += v + w
5101 Status |= Tau.add(V, RM);
5103 // u = t + tau
5104 APFloat U = T;
5105 Status |= U.add(Tau, RM);
5107 Floats[0] = U;
5108 if (!U.isFinite()) {
5109 Floats[1].makeZero(/* Neg = */ false);
5110 } else {
5111 // Floats[1] = (t - u) + tau
5112 Status |= T.subtract(U, RM);
5113 Status |= T.add(Tau, RM);
5114 Floats[1] = T;
5116 return (opStatus)Status;
5119 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
5120 APFloat::roundingMode RM) {
5121 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5122 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5123 auto Ret =
5124 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5125 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5126 return Ret;
5129 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
5130 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5131 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5132 auto Ret =
5133 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5134 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5135 return Ret;
5138 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
5139 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5140 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5141 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5142 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5143 return Ret;
5146 APFloat::opStatus
5147 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
5148 const DoubleAPFloat &Addend,
5149 APFloat::roundingMode RM) {
5150 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5151 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5152 auto Ret = Tmp.fusedMultiplyAdd(
5153 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
5154 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
5155 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5156 return Ret;
5159 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
5160 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5161 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5162 auto Ret = Tmp.roundToIntegral(RM);
5163 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5164 return Ret;
5167 void DoubleAPFloat::changeSign() {
5168 Floats[0].changeSign();
5169 Floats[1].changeSign();
5172 APFloat::cmpResult
5173 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5174 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5175 if (Result != cmpEqual)
5176 return Result;
5177 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5178 if (Result == cmpLessThan || Result == cmpGreaterThan) {
5179 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5180 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5181 if (Against && !RHSAgainst)
5182 return cmpLessThan;
5183 if (!Against && RHSAgainst)
5184 return cmpGreaterThan;
5185 if (!Against && !RHSAgainst)
5186 return Result;
5187 if (Against && RHSAgainst)
5188 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5190 return Result;
5193 APFloat::fltCategory DoubleAPFloat::getCategory() const {
5194 return Floats[0].getCategory();
5197 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5199 void DoubleAPFloat::makeInf(bool Neg) {
5200 Floats[0].makeInf(Neg);
5201 Floats[1].makeZero(/* Neg = */ false);
5204 void DoubleAPFloat::makeZero(bool Neg) {
5205 Floats[0].makeZero(Neg);
5206 Floats[1].makeZero(/* Neg = */ false);
5209 void DoubleAPFloat::makeLargest(bool Neg) {
5210 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5211 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5212 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5213 if (Neg)
5214 changeSign();
5217 void DoubleAPFloat::makeSmallest(bool Neg) {
5218 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5219 Floats[0].makeSmallest(Neg);
5220 Floats[1].makeZero(/* Neg = */ false);
5223 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5224 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5225 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5226 if (Neg)
5227 Floats[0].changeSign();
5228 Floats[1].makeZero(/* Neg = */ false);
5231 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5232 Floats[0].makeNaN(SNaN, Neg, fill);
5233 Floats[1].makeZero(/* Neg = */ false);
5236 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5237 auto Result = Floats[0].compare(RHS.Floats[0]);
5238 // |Float[0]| > |Float[1]|
5239 if (Result == APFloat::cmpEqual)
5240 return Floats[1].compare(RHS.Floats[1]);
5241 return Result;
5244 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5245 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5246 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5249 hash_code hash_value(const DoubleAPFloat &Arg) {
5250 if (Arg.Floats)
5251 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5252 return hash_combine(Arg.Semantics);
5255 APInt DoubleAPFloat::bitcastToAPInt() const {
5256 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5257 uint64_t Data[] = {
5258 Floats[0].bitcastToAPInt().getRawData()[0],
5259 Floats[1].bitcastToAPInt().getRawData()[0],
5261 return APInt(128, 2, Data);
5264 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5265 roundingMode RM) {
5266 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5267 APFloat Tmp(semPPCDoubleDoubleLegacy);
5268 auto Ret = Tmp.convertFromString(S, RM);
5269 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5270 return Ret;
5273 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5274 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5275 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5276 auto Ret = Tmp.next(nextDown);
5277 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5278 return Ret;
5281 APFloat::opStatus
5282 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5283 unsigned int Width, bool IsSigned,
5284 roundingMode RM, bool *IsExact) const {
5285 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5286 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5287 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5290 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5291 bool IsSigned,
5292 roundingMode RM) {
5293 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5294 APFloat Tmp(semPPCDoubleDoubleLegacy);
5295 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5296 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5297 return Ret;
5300 APFloat::opStatus
5301 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5302 unsigned int InputSize,
5303 bool IsSigned, roundingMode RM) {
5304 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5305 APFloat Tmp(semPPCDoubleDoubleLegacy);
5306 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5307 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5308 return Ret;
5311 APFloat::opStatus
5312 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5313 unsigned int InputSize,
5314 bool IsSigned, roundingMode RM) {
5315 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5316 APFloat Tmp(semPPCDoubleDoubleLegacy);
5317 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5318 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5319 return Ret;
5322 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5323 unsigned int HexDigits,
5324 bool UpperCase,
5325 roundingMode RM) const {
5326 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5327 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5328 .convertToHexString(DST, HexDigits, UpperCase, RM);
5331 bool DoubleAPFloat::isDenormal() const {
5332 return getCategory() == fcNormal &&
5333 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5334 // (double)(Hi + Lo) == Hi defines a normal number.
5335 Floats[0] != Floats[0] + Floats[1]);
5338 bool DoubleAPFloat::isSmallest() const {
5339 if (getCategory() != fcNormal)
5340 return false;
5341 DoubleAPFloat Tmp(*this);
5342 Tmp.makeSmallest(this->isNegative());
5343 return Tmp.compare(*this) == cmpEqual;
5346 bool DoubleAPFloat::isSmallestNormalized() const {
5347 if (getCategory() != fcNormal)
5348 return false;
5350 DoubleAPFloat Tmp(*this);
5351 Tmp.makeSmallestNormalized(this->isNegative());
5352 return Tmp.compare(*this) == cmpEqual;
5355 bool DoubleAPFloat::isLargest() const {
5356 if (getCategory() != fcNormal)
5357 return false;
5358 DoubleAPFloat Tmp(*this);
5359 Tmp.makeLargest(this->isNegative());
5360 return Tmp.compare(*this) == cmpEqual;
5363 bool DoubleAPFloat::isInteger() const {
5364 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5365 return Floats[0].isInteger() && Floats[1].isInteger();
5368 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5369 unsigned FormatPrecision,
5370 unsigned FormatMaxPadding,
5371 bool TruncateZero) const {
5372 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5373 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5374 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5377 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5378 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5379 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5380 if (!inv)
5381 return Tmp.getExactInverse(nullptr);
5382 APFloat Inv(semPPCDoubleDoubleLegacy);
5383 auto Ret = Tmp.getExactInverse(&Inv);
5384 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5385 return Ret;
5388 int DoubleAPFloat::getExactLog2() const {
5389 // TODO: Implement me
5390 return INT_MIN;
5393 int DoubleAPFloat::getExactLog2Abs() const {
5394 // TODO: Implement me
5395 return INT_MIN;
5398 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5399 APFloat::roundingMode RM) {
5400 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5401 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5402 scalbn(Arg.Floats[1], Exp, RM));
5405 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5406 APFloat::roundingMode RM) {
5407 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5408 APFloat First = frexp(Arg.Floats[0], Exp, RM);
5409 APFloat Second = Arg.Floats[1];
5410 if (Arg.getCategory() == APFloat::fcNormal)
5411 Second = scalbn(Second, -Exp, RM);
5412 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5415 } // namespace detail
5417 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5418 if (usesLayout<IEEEFloat>(Semantics)) {
5419 new (&IEEE) IEEEFloat(std::move(F));
5420 return;
5422 if (usesLayout<DoubleAPFloat>(Semantics)) {
5423 const fltSemantics& S = F.getSemantics();
5424 new (&Double)
5425 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5426 APFloat(semIEEEdouble));
5427 return;
5429 llvm_unreachable("Unexpected semantics");
5432 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5433 roundingMode RM) {
5434 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5437 hash_code hash_value(const APFloat &Arg) {
5438 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5439 return hash_value(Arg.U.IEEE);
5440 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5441 return hash_value(Arg.U.Double);
5442 llvm_unreachable("Unexpected semantics");
5445 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5446 : APFloat(Semantics) {
5447 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5448 assert(StatusOrErr && "Invalid floating point representation");
5449 consumeError(StatusOrErr.takeError());
5452 FPClassTest APFloat::classify() const {
5453 if (isZero())
5454 return isNegative() ? fcNegZero : fcPosZero;
5455 if (isNormal())
5456 return isNegative() ? fcNegNormal : fcPosNormal;
5457 if (isDenormal())
5458 return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5459 if (isInfinity())
5460 return isNegative() ? fcNegInf : fcPosInf;
5461 assert(isNaN() && "Other class of FP constant");
5462 return isSignaling() ? fcSNan : fcQNan;
5465 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5466 roundingMode RM, bool *losesInfo) {
5467 if (&getSemantics() == &ToSemantics) {
5468 *losesInfo = false;
5469 return opOK;
5471 if (usesLayout<IEEEFloat>(getSemantics()) &&
5472 usesLayout<IEEEFloat>(ToSemantics))
5473 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5474 if (usesLayout<IEEEFloat>(getSemantics()) &&
5475 usesLayout<DoubleAPFloat>(ToSemantics)) {
5476 assert(&ToSemantics == &semPPCDoubleDouble);
5477 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5478 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5479 return Ret;
5481 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5482 usesLayout<IEEEFloat>(ToSemantics)) {
5483 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5484 *this = APFloat(std::move(getIEEE()), ToSemantics);
5485 return Ret;
5487 llvm_unreachable("Unexpected semantics");
5490 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5491 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5494 void APFloat::print(raw_ostream &OS) const {
5495 SmallVector<char, 16> Buffer;
5496 toString(Buffer);
5497 OS << Buffer;
5500 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5501 LLVM_DUMP_METHOD void APFloat::dump() const {
5502 print(dbgs());
5503 dbgs() << '\n';
5505 #endif
5507 void APFloat::Profile(FoldingSetNodeID &NID) const {
5508 NID.Add(bitcastToAPInt());
5511 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5512 an APSInt, whose initial bit-width and signed-ness are used to determine the
5513 precision of the conversion.
5515 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5516 roundingMode rounding_mode,
5517 bool *isExact) const {
5518 unsigned bitWidth = result.getBitWidth();
5519 SmallVector<uint64_t, 4> parts(result.getNumWords());
5520 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5521 rounding_mode, isExact);
5522 // Keeps the original signed-ness.
5523 result = APInt(bitWidth, parts);
5524 return status;
5527 double APFloat::convertToDouble() const {
5528 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5529 return getIEEE().convertToDouble();
5530 assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5531 "Float semantics is not representable by IEEEdouble");
5532 APFloat Temp = *this;
5533 bool LosesInfo;
5534 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5535 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5536 (void)St;
5537 return Temp.getIEEE().convertToDouble();
5540 #ifdef HAS_IEE754_FLOAT128
5541 float128 APFloat::convertToQuad() const {
5542 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5543 return getIEEE().convertToQuad();
5544 assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5545 "Float semantics is not representable by IEEEquad");
5546 APFloat Temp = *this;
5547 bool LosesInfo;
5548 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5549 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5550 (void)St;
5551 return Temp.getIEEE().convertToQuad();
5553 #endif
5555 float APFloat::convertToFloat() const {
5556 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5557 return getIEEE().convertToFloat();
5558 assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5559 "Float semantics is not representable by IEEEsingle");
5560 APFloat Temp = *this;
5561 bool LosesInfo;
5562 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5563 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5564 (void)St;
5565 return Temp.getIEEE().convertToFloat();
5568 } // namespace llvm
5570 #undef APFLOAT_DISPATCH_ON_SEMANTICS