Use BranchProbability instead of floating points in IfConverter.
[llvm/stm8.git] / lib / Support / APFloat.cpp
blobc3169acabbc7b115375c4548fadd24b4e18d00af
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
20 #include <limits.h>
21 #include <cstring>
23 using namespace llvm;
25 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
27 /* Assumed in hexadecimal significand parsing, and conversion to
28 hexadecimal strings. */
29 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
30 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
32 namespace llvm {
34 /* Represents floating point arithmetic semantics. */
35 struct fltSemantics {
36 /* The largest E such that 2^E is representable; this matches the
37 definition of IEEE 754. */
38 exponent_t maxExponent;
40 /* The smallest E such that 2^E is a normalized number; this
41 matches the definition of IEEE 754. */
42 exponent_t minExponent;
44 /* Number of bits in the significand. This includes the integer
45 bit. */
46 unsigned int precision;
48 /* True if arithmetic is supported. */
49 unsigned int arithmeticOK;
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
59 // The PowerPC format consists of two doubles. It does not map cleanly
60 // onto the usual format above. For now only storage of constants of
61 // this type is supported, no arithmetic.
62 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
64 /* A tight upper bound on number of parts required to hold the value
65 pow(5, power) is
67 power * 815 / (351 * integerPartWidth) + 1
69 However, whilst the result may require only this many parts,
70 because we are multiplying two values to get it, the
71 multiplication may require an extra part with the excess part
72 being zero (consider the trivial case of 1 * 1, tcFullMultiply
73 requires two parts to hold the single-part result). So we add an
74 extra one to guarantee enough space whilst multiplying. */
75 const unsigned int maxExponent = 16383;
76 const unsigned int maxPrecision = 113;
77 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
78 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
79 / (351 * integerPartWidth));
82 /* A bunch of private, handy routines. */
84 static inline unsigned int
85 partCountForBits(unsigned int bits)
87 return ((bits) + integerPartWidth - 1) / integerPartWidth;
90 /* Returns 0U-9U. Return values >= 10U are not digits. */
91 static inline unsigned int
92 decDigitValue(unsigned int c)
94 return c - '0';
97 static unsigned int
98 hexDigitValue(unsigned int c)
100 unsigned int r;
102 r = c - '0';
103 if (r <= 9)
104 return r;
106 r = c - 'A';
107 if (r <= 5)
108 return r + 10;
110 r = c - 'a';
111 if (r <= 5)
112 return r + 10;
114 return -1U;
117 static inline void
118 assertArithmeticOK(const llvm::fltSemantics &semantics) {
119 assert(semantics.arithmeticOK &&
120 "Compile-time arithmetic does not support these semantics");
123 /* Return the value of a decimal exponent of the form
124 [+-]ddddddd.
126 If the exponent overflows, returns a large exponent with the
127 appropriate sign. */
128 static int
129 readExponent(StringRef::iterator begin, StringRef::iterator end)
131 bool isNegative;
132 unsigned int absExponent;
133 const unsigned int overlargeExponent = 24000; /* FIXME. */
134 StringRef::iterator p = begin;
136 assert(p != end && "Exponent has no digits");
138 isNegative = (*p == '-');
139 if (*p == '-' || *p == '+') {
140 p++;
141 assert(p != end && "Exponent has no digits");
144 absExponent = decDigitValue(*p++);
145 assert(absExponent < 10U && "Invalid character in exponent");
147 for (; p != end; ++p) {
148 unsigned int value;
150 value = decDigitValue(*p);
151 assert(value < 10U && "Invalid character in exponent");
153 value += absExponent * 10;
154 if (absExponent >= overlargeExponent) {
155 absExponent = overlargeExponent;
156 p = end; /* outwit assert below */
157 break;
159 absExponent = value;
162 assert(p == end && "Invalid exponent in exponent");
164 if (isNegative)
165 return -(int) absExponent;
166 else
167 return (int) absExponent;
170 /* This is ugly and needs cleaning up, but I don't immediately see
171 how whilst remaining safe. */
172 static int
173 totalExponent(StringRef::iterator p, StringRef::iterator end,
174 int exponentAdjustment)
176 int unsignedExponent;
177 bool negative, overflow;
178 int exponent = 0;
180 assert(p != end && "Exponent has no digits");
182 negative = *p == '-';
183 if (*p == '-' || *p == '+') {
184 p++;
185 assert(p != end && "Exponent has no digits");
188 unsignedExponent = 0;
189 overflow = false;
190 for (; p != end; ++p) {
191 unsigned int value;
193 value = decDigitValue(*p);
194 assert(value < 10U && "Invalid character in exponent");
196 unsignedExponent = unsignedExponent * 10 + value;
197 if (unsignedExponent > 32767)
198 overflow = true;
201 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
202 overflow = true;
204 if (!overflow) {
205 exponent = unsignedExponent;
206 if (negative)
207 exponent = -exponent;
208 exponent += exponentAdjustment;
209 if (exponent > 32767 || exponent < -32768)
210 overflow = true;
213 if (overflow)
214 exponent = negative ? -32768: 32767;
216 return exponent;
219 static StringRef::iterator
220 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
221 StringRef::iterator *dot)
223 StringRef::iterator p = begin;
224 *dot = end;
225 while (*p == '0' && p != end)
226 p++;
228 if (*p == '.') {
229 *dot = p++;
231 assert(end - begin != 1 && "Significand has no digits");
233 while (*p == '0' && p != end)
234 p++;
237 return p;
240 /* Given a normal decimal floating point number of the form
242 dddd.dddd[eE][+-]ddd
244 where the decimal point and exponent are optional, fill out the
245 structure D. Exponent is appropriate if the significand is
246 treated as an integer, and normalizedExponent if the significand
247 is taken to have the decimal point after a single leading
248 non-zero digit.
250 If the value is zero, V->firstSigDigit points to a non-digit, and
251 the return exponent is zero.
253 struct decimalInfo {
254 const char *firstSigDigit;
255 const char *lastSigDigit;
256 int exponent;
257 int normalizedExponent;
260 static void
261 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
262 decimalInfo *D)
264 StringRef::iterator dot = end;
265 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
267 D->firstSigDigit = p;
268 D->exponent = 0;
269 D->normalizedExponent = 0;
271 for (; p != end; ++p) {
272 if (*p == '.') {
273 assert(dot == end && "String contains multiple dots");
274 dot = p++;
275 if (p == end)
276 break;
278 if (decDigitValue(*p) >= 10U)
279 break;
282 if (p != end) {
283 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
284 assert(p != begin && "Significand has no digits");
285 assert((dot == end || p - begin != 1) && "Significand has no digits");
287 /* p points to the first non-digit in the string */
288 D->exponent = readExponent(p + 1, end);
290 /* Implied decimal point? */
291 if (dot == end)
292 dot = p;
295 /* If number is all zeroes accept any exponent. */
296 if (p != D->firstSigDigit) {
297 /* Drop insignificant trailing zeroes. */
298 if (p != begin) {
301 p--;
302 while (p != begin && *p == '0');
303 while (p != begin && *p == '.');
306 /* Adjust the exponents for any decimal point. */
307 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
308 D->normalizedExponent = (D->exponent +
309 static_cast<exponent_t>((p - D->firstSigDigit)
310 - (dot > D->firstSigDigit && dot < p)));
313 D->lastSigDigit = p;
316 /* Return the trailing fraction of a hexadecimal number.
317 DIGITVALUE is the first hex digit of the fraction, P points to
318 the next digit. */
319 static lostFraction
320 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
321 unsigned int digitValue)
323 unsigned int hexDigit;
325 /* If the first trailing digit isn't 0 or 8 we can work out the
326 fraction immediately. */
327 if (digitValue > 8)
328 return lfMoreThanHalf;
329 else if (digitValue < 8 && digitValue > 0)
330 return lfLessThanHalf;
332 /* Otherwise we need to find the first non-zero digit. */
333 while (*p == '0')
334 p++;
336 assert(p != end && "Invalid trailing hexadecimal fraction!");
338 hexDigit = hexDigitValue(*p);
340 /* If we ran off the end it is exactly zero or one-half, otherwise
341 a little more. */
342 if (hexDigit == -1U)
343 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
344 else
345 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
348 /* Return the fraction lost were a bignum truncated losing the least
349 significant BITS bits. */
350 static lostFraction
351 lostFractionThroughTruncation(const integerPart *parts,
352 unsigned int partCount,
353 unsigned int bits)
355 unsigned int lsb;
357 lsb = APInt::tcLSB(parts, partCount);
359 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
360 if (bits <= lsb)
361 return lfExactlyZero;
362 if (bits == lsb + 1)
363 return lfExactlyHalf;
364 if (bits <= partCount * integerPartWidth &&
365 APInt::tcExtractBit(parts, bits - 1))
366 return lfMoreThanHalf;
368 return lfLessThanHalf;
371 /* Shift DST right BITS bits noting lost fraction. */
372 static lostFraction
373 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
375 lostFraction lost_fraction;
377 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
379 APInt::tcShiftRight(dst, parts, bits);
381 return lost_fraction;
384 /* Combine the effect of two lost fractions. */
385 static lostFraction
386 combineLostFractions(lostFraction moreSignificant,
387 lostFraction lessSignificant)
389 if (lessSignificant != lfExactlyZero) {
390 if (moreSignificant == lfExactlyZero)
391 moreSignificant = lfLessThanHalf;
392 else if (moreSignificant == lfExactlyHalf)
393 moreSignificant = lfMoreThanHalf;
396 return moreSignificant;
399 /* The error from the true value, in half-ulps, on multiplying two
400 floating point numbers, which differ from the value they
401 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
402 than the returned value.
404 See "How to Read Floating Point Numbers Accurately" by William D
405 Clinger. */
406 static unsigned int
407 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
409 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
411 if (HUerr1 + HUerr2 == 0)
412 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
413 else
414 return inexactMultiply + 2 * (HUerr1 + HUerr2);
417 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
418 when the least significant BITS are truncated. BITS cannot be
419 zero. */
420 static integerPart
421 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
423 unsigned int count, partBits;
424 integerPart part, boundary;
426 assert(bits != 0);
428 bits--;
429 count = bits / integerPartWidth;
430 partBits = bits % integerPartWidth + 1;
432 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
434 if (isNearest)
435 boundary = (integerPart) 1 << (partBits - 1);
436 else
437 boundary = 0;
439 if (count == 0) {
440 if (part - boundary <= boundary - part)
441 return part - boundary;
442 else
443 return boundary - part;
446 if (part == boundary) {
447 while (--count)
448 if (parts[count])
449 return ~(integerPart) 0; /* A lot. */
451 return parts[0];
452 } else if (part == boundary - 1) {
453 while (--count)
454 if (~parts[count])
455 return ~(integerPart) 0; /* A lot. */
457 return -parts[0];
460 return ~(integerPart) 0; /* A lot. */
463 /* Place pow(5, power) in DST, and return the number of parts used.
464 DST must be at least one part larger than size of the answer. */
465 static unsigned int
466 powerOf5(integerPart *dst, unsigned int power)
468 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
469 15625, 78125 };
470 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
471 pow5s[0] = 78125 * 5;
473 unsigned int partsCount[16] = { 1 };
474 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
475 unsigned int result;
476 assert(power <= maxExponent);
478 p1 = dst;
479 p2 = scratch;
481 *p1 = firstEightPowers[power & 7];
482 power >>= 3;
484 result = 1;
485 pow5 = pow5s;
487 for (unsigned int n = 0; power; power >>= 1, n++) {
488 unsigned int pc;
490 pc = partsCount[n];
492 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
493 if (pc == 0) {
494 pc = partsCount[n - 1];
495 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
496 pc *= 2;
497 if (pow5[pc - 1] == 0)
498 pc--;
499 partsCount[n] = pc;
502 if (power & 1) {
503 integerPart *tmp;
505 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
506 result += pc;
507 if (p2[result - 1] == 0)
508 result--;
510 /* Now result is in p1 with partsCount parts and p2 is scratch
511 space. */
512 tmp = p1, p1 = p2, p2 = tmp;
515 pow5 += pc;
518 if (p1 != dst)
519 APInt::tcAssign(dst, p1, result);
521 return result;
524 /* Zero at the end to avoid modular arithmetic when adding one; used
525 when rounding up during hexadecimal output. */
526 static const char hexDigitsLower[] = "0123456789abcdef0";
527 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
528 static const char infinityL[] = "infinity";
529 static const char infinityU[] = "INFINITY";
530 static const char NaNL[] = "nan";
531 static const char NaNU[] = "NAN";
533 /* Write out an integerPart in hexadecimal, starting with the most
534 significant nibble. Write out exactly COUNT hexdigits, return
535 COUNT. */
536 static unsigned int
537 partAsHex (char *dst, integerPart part, unsigned int count,
538 const char *hexDigitChars)
540 unsigned int result = count;
542 assert(count != 0 && count <= integerPartWidth / 4);
544 part >>= (integerPartWidth - 4 * count);
545 while (count--) {
546 dst[count] = hexDigitChars[part & 0xf];
547 part >>= 4;
550 return result;
553 /* Write out an unsigned decimal integer. */
554 static char *
555 writeUnsignedDecimal (char *dst, unsigned int n)
557 char buff[40], *p;
559 p = buff;
561 *p++ = '0' + n % 10;
562 while (n /= 10);
565 *dst++ = *--p;
566 while (p != buff);
568 return dst;
571 /* Write out a signed decimal integer. */
572 static char *
573 writeSignedDecimal (char *dst, int value)
575 if (value < 0) {
576 *dst++ = '-';
577 dst = writeUnsignedDecimal(dst, -(unsigned) value);
578 } else
579 dst = writeUnsignedDecimal(dst, value);
581 return dst;
584 /* Constructors. */
585 void
586 APFloat::initialize(const fltSemantics *ourSemantics)
588 unsigned int count;
590 semantics = ourSemantics;
591 count = partCount();
592 if (count > 1)
593 significand.parts = new integerPart[count];
596 void
597 APFloat::freeSignificand()
599 if (partCount() > 1)
600 delete [] significand.parts;
603 void
604 APFloat::assign(const APFloat &rhs)
606 assert(semantics == rhs.semantics);
608 sign = rhs.sign;
609 category = rhs.category;
610 exponent = rhs.exponent;
611 sign2 = rhs.sign2;
612 exponent2 = rhs.exponent2;
613 if (category == fcNormal || category == fcNaN)
614 copySignificand(rhs);
617 void
618 APFloat::copySignificand(const APFloat &rhs)
620 assert(category == fcNormal || category == fcNaN);
621 assert(rhs.partCount() >= partCount());
623 APInt::tcAssign(significandParts(), rhs.significandParts(),
624 partCount());
627 /* Make this number a NaN, with an arbitrary but deterministic value
628 for the significand. If double or longer, this is a signalling NaN,
629 which may not be ideal. If float, this is QNaN(0). */
630 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
632 category = fcNaN;
633 sign = Negative;
635 integerPart *significand = significandParts();
636 unsigned numParts = partCount();
638 // Set the significand bits to the fill.
639 if (!fill || fill->getNumWords() < numParts)
640 APInt::tcSet(significand, 0, numParts);
641 if (fill) {
642 APInt::tcAssign(significand, fill->getRawData(),
643 std::min(fill->getNumWords(), numParts));
645 // Zero out the excess bits of the significand.
646 unsigned bitsToPreserve = semantics->precision - 1;
647 unsigned part = bitsToPreserve / 64;
648 bitsToPreserve %= 64;
649 significand[part] &= ((1ULL << bitsToPreserve) - 1);
650 for (part++; part != numParts; ++part)
651 significand[part] = 0;
654 unsigned QNaNBit = semantics->precision - 2;
656 if (SNaN) {
657 // We always have to clear the QNaN bit to make it an SNaN.
658 APInt::tcClearBit(significand, QNaNBit);
660 // If there are no bits set in the payload, we have to set
661 // *something* to make it a NaN instead of an infinity;
662 // conventionally, this is the next bit down from the QNaN bit.
663 if (APInt::tcIsZero(significand, numParts))
664 APInt::tcSetBit(significand, QNaNBit - 1);
665 } else {
666 // We always have to set the QNaN bit to make it a QNaN.
667 APInt::tcSetBit(significand, QNaNBit);
670 // For x87 extended precision, we want to make a NaN, not a
671 // pseudo-NaN. Maybe we should expose the ability to make
672 // pseudo-NaNs?
673 if (semantics == &APFloat::x87DoubleExtended)
674 APInt::tcSetBit(significand, QNaNBit + 1);
677 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
678 const APInt *fill) {
679 APFloat value(Sem, uninitialized);
680 value.makeNaN(SNaN, Negative, fill);
681 return value;
684 APFloat &
685 APFloat::operator=(const APFloat &rhs)
687 if (this != &rhs) {
688 if (semantics != rhs.semantics) {
689 freeSignificand();
690 initialize(rhs.semantics);
692 assign(rhs);
695 return *this;
698 bool
699 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
700 if (this == &rhs)
701 return true;
702 if (semantics != rhs.semantics ||
703 category != rhs.category ||
704 sign != rhs.sign)
705 return false;
706 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
707 sign2 != rhs.sign2)
708 return false;
709 if (category==fcZero || category==fcInfinity)
710 return true;
711 else if (category==fcNormal && exponent!=rhs.exponent)
712 return false;
713 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
714 exponent2!=rhs.exponent2)
715 return false;
716 else {
717 int i= partCount();
718 const integerPart* p=significandParts();
719 const integerPart* q=rhs.significandParts();
720 for (; i>0; i--, p++, q++) {
721 if (*p != *q)
722 return false;
724 return true;
728 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
729 : exponent2(0), sign2(0) {
730 assertArithmeticOK(ourSemantics);
731 initialize(&ourSemantics);
732 sign = 0;
733 zeroSignificand();
734 exponent = ourSemantics.precision - 1;
735 significandParts()[0] = value;
736 normalize(rmNearestTiesToEven, lfExactlyZero);
739 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
740 assertArithmeticOK(ourSemantics);
741 initialize(&ourSemantics);
742 category = fcZero;
743 sign = false;
746 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
747 : exponent2(0), sign2(0) {
748 assertArithmeticOK(ourSemantics);
749 // Allocates storage if necessary but does not initialize it.
750 initialize(&ourSemantics);
753 APFloat::APFloat(const fltSemantics &ourSemantics,
754 fltCategory ourCategory, bool negative)
755 : exponent2(0), sign2(0) {
756 assertArithmeticOK(ourSemantics);
757 initialize(&ourSemantics);
758 category = ourCategory;
759 sign = negative;
760 if (category == fcNormal)
761 category = fcZero;
762 else if (ourCategory == fcNaN)
763 makeNaN();
766 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
767 : exponent2(0), sign2(0) {
768 assertArithmeticOK(ourSemantics);
769 initialize(&ourSemantics);
770 convertFromString(text, rmNearestTiesToEven);
773 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
774 initialize(rhs.semantics);
775 assign(rhs);
778 APFloat::~APFloat()
780 freeSignificand();
783 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
784 void APFloat::Profile(FoldingSetNodeID& ID) const {
785 ID.Add(bitcastToAPInt());
788 unsigned int
789 APFloat::partCount() const
791 return partCountForBits(semantics->precision + 1);
794 unsigned int
795 APFloat::semanticsPrecision(const fltSemantics &semantics)
797 return semantics.precision;
800 const integerPart *
801 APFloat::significandParts() const
803 return const_cast<APFloat *>(this)->significandParts();
806 integerPart *
807 APFloat::significandParts()
809 assert(category == fcNormal || category == fcNaN);
811 if (partCount() > 1)
812 return significand.parts;
813 else
814 return &significand.part;
817 void
818 APFloat::zeroSignificand()
820 category = fcNormal;
821 APInt::tcSet(significandParts(), 0, partCount());
824 /* Increment an fcNormal floating point number's significand. */
825 void
826 APFloat::incrementSignificand()
828 integerPart carry;
830 carry = APInt::tcIncrement(significandParts(), partCount());
832 /* Our callers should never cause us to overflow. */
833 assert(carry == 0);
836 /* Add the significand of the RHS. Returns the carry flag. */
837 integerPart
838 APFloat::addSignificand(const APFloat &rhs)
840 integerPart *parts;
842 parts = significandParts();
844 assert(semantics == rhs.semantics);
845 assert(exponent == rhs.exponent);
847 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
850 /* Subtract the significand of the RHS with a borrow flag. Returns
851 the borrow flag. */
852 integerPart
853 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
855 integerPart *parts;
857 parts = significandParts();
859 assert(semantics == rhs.semantics);
860 assert(exponent == rhs.exponent);
862 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
863 partCount());
866 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
867 on to the full-precision result of the multiplication. Returns the
868 lost fraction. */
869 lostFraction
870 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
872 unsigned int omsb; // One, not zero, based MSB.
873 unsigned int partsCount, newPartsCount, precision;
874 integerPart *lhsSignificand;
875 integerPart scratch[4];
876 integerPart *fullSignificand;
877 lostFraction lost_fraction;
878 bool ignored;
880 assert(semantics == rhs.semantics);
882 precision = semantics->precision;
883 newPartsCount = partCountForBits(precision * 2);
885 if (newPartsCount > 4)
886 fullSignificand = new integerPart[newPartsCount];
887 else
888 fullSignificand = scratch;
890 lhsSignificand = significandParts();
891 partsCount = partCount();
893 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
894 rhs.significandParts(), partsCount, partsCount);
896 lost_fraction = lfExactlyZero;
897 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
898 exponent += rhs.exponent;
900 if (addend) {
901 Significand savedSignificand = significand;
902 const fltSemantics *savedSemantics = semantics;
903 fltSemantics extendedSemantics;
904 opStatus status;
905 unsigned int extendedPrecision;
907 /* Normalize our MSB. */
908 extendedPrecision = precision + precision - 1;
909 if (omsb != extendedPrecision) {
910 APInt::tcShiftLeft(fullSignificand, newPartsCount,
911 extendedPrecision - omsb);
912 exponent -= extendedPrecision - omsb;
915 /* Create new semantics. */
916 extendedSemantics = *semantics;
917 extendedSemantics.precision = extendedPrecision;
919 if (newPartsCount == 1)
920 significand.part = fullSignificand[0];
921 else
922 significand.parts = fullSignificand;
923 semantics = &extendedSemantics;
925 APFloat extendedAddend(*addend);
926 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
927 assert(status == opOK);
928 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
930 /* Restore our state. */
931 if (newPartsCount == 1)
932 fullSignificand[0] = significand.part;
933 significand = savedSignificand;
934 semantics = savedSemantics;
936 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
939 exponent -= (precision - 1);
941 if (omsb > precision) {
942 unsigned int bits, significantParts;
943 lostFraction lf;
945 bits = omsb - precision;
946 significantParts = partCountForBits(omsb);
947 lf = shiftRight(fullSignificand, significantParts, bits);
948 lost_fraction = combineLostFractions(lf, lost_fraction);
949 exponent += bits;
952 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
954 if (newPartsCount > 4)
955 delete [] fullSignificand;
957 return lost_fraction;
960 /* Multiply the significands of LHS and RHS to DST. */
961 lostFraction
962 APFloat::divideSignificand(const APFloat &rhs)
964 unsigned int bit, i, partsCount;
965 const integerPart *rhsSignificand;
966 integerPart *lhsSignificand, *dividend, *divisor;
967 integerPart scratch[4];
968 lostFraction lost_fraction;
970 assert(semantics == rhs.semantics);
972 lhsSignificand = significandParts();
973 rhsSignificand = rhs.significandParts();
974 partsCount = partCount();
976 if (partsCount > 2)
977 dividend = new integerPart[partsCount * 2];
978 else
979 dividend = scratch;
981 divisor = dividend + partsCount;
983 /* Copy the dividend and divisor as they will be modified in-place. */
984 for (i = 0; i < partsCount; i++) {
985 dividend[i] = lhsSignificand[i];
986 divisor[i] = rhsSignificand[i];
987 lhsSignificand[i] = 0;
990 exponent -= rhs.exponent;
992 unsigned int precision = semantics->precision;
994 /* Normalize the divisor. */
995 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
996 if (bit) {
997 exponent += bit;
998 APInt::tcShiftLeft(divisor, partsCount, bit);
1001 /* Normalize the dividend. */
1002 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1003 if (bit) {
1004 exponent -= bit;
1005 APInt::tcShiftLeft(dividend, partsCount, bit);
1008 /* Ensure the dividend >= divisor initially for the loop below.
1009 Incidentally, this means that the division loop below is
1010 guaranteed to set the integer bit to one. */
1011 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1012 exponent--;
1013 APInt::tcShiftLeft(dividend, partsCount, 1);
1014 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1017 /* Long division. */
1018 for (bit = precision; bit; bit -= 1) {
1019 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1020 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1021 APInt::tcSetBit(lhsSignificand, bit - 1);
1024 APInt::tcShiftLeft(dividend, partsCount, 1);
1027 /* Figure out the lost fraction. */
1028 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1030 if (cmp > 0)
1031 lost_fraction = lfMoreThanHalf;
1032 else if (cmp == 0)
1033 lost_fraction = lfExactlyHalf;
1034 else if (APInt::tcIsZero(dividend, partsCount))
1035 lost_fraction = lfExactlyZero;
1036 else
1037 lost_fraction = lfLessThanHalf;
1039 if (partsCount > 2)
1040 delete [] dividend;
1042 return lost_fraction;
1045 unsigned int
1046 APFloat::significandMSB() const
1048 return APInt::tcMSB(significandParts(), partCount());
1051 unsigned int
1052 APFloat::significandLSB() const
1054 return APInt::tcLSB(significandParts(), partCount());
1057 /* Note that a zero result is NOT normalized to fcZero. */
1058 lostFraction
1059 APFloat::shiftSignificandRight(unsigned int bits)
1061 /* Our exponent should not overflow. */
1062 assert((exponent_t) (exponent + bits) >= exponent);
1064 exponent += bits;
1066 return shiftRight(significandParts(), partCount(), bits);
1069 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1070 void
1071 APFloat::shiftSignificandLeft(unsigned int bits)
1073 assert(bits < semantics->precision);
1075 if (bits) {
1076 unsigned int partsCount = partCount();
1078 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1079 exponent -= bits;
1081 assert(!APInt::tcIsZero(significandParts(), partsCount));
1085 APFloat::cmpResult
1086 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1088 int compare;
1090 assert(semantics == rhs.semantics);
1091 assert(category == fcNormal);
1092 assert(rhs.category == fcNormal);
1094 compare = exponent - rhs.exponent;
1096 /* If exponents are equal, do an unsigned bignum comparison of the
1097 significands. */
1098 if (compare == 0)
1099 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1100 partCount());
1102 if (compare > 0)
1103 return cmpGreaterThan;
1104 else if (compare < 0)
1105 return cmpLessThan;
1106 else
1107 return cmpEqual;
1110 /* Handle overflow. Sign is preserved. We either become infinity or
1111 the largest finite number. */
1112 APFloat::opStatus
1113 APFloat::handleOverflow(roundingMode rounding_mode)
1115 /* Infinity? */
1116 if (rounding_mode == rmNearestTiesToEven ||
1117 rounding_mode == rmNearestTiesToAway ||
1118 (rounding_mode == rmTowardPositive && !sign) ||
1119 (rounding_mode == rmTowardNegative && sign)) {
1120 category = fcInfinity;
1121 return (opStatus) (opOverflow | opInexact);
1124 /* Otherwise we become the largest finite number. */
1125 category = fcNormal;
1126 exponent = semantics->maxExponent;
1127 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1128 semantics->precision);
1130 return opInexact;
1133 /* Returns TRUE if, when truncating the current number, with BIT the
1134 new LSB, with the given lost fraction and rounding mode, the result
1135 would need to be rounded away from zero (i.e., by increasing the
1136 signficand). This routine must work for fcZero of both signs, and
1137 fcNormal numbers. */
1138 bool
1139 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1140 lostFraction lost_fraction,
1141 unsigned int bit) const
1143 /* NaNs and infinities should not have lost fractions. */
1144 assert(category == fcNormal || category == fcZero);
1146 /* Current callers never pass this so we don't handle it. */
1147 assert(lost_fraction != lfExactlyZero);
1149 switch (rounding_mode) {
1150 default:
1151 llvm_unreachable(0);
1153 case rmNearestTiesToAway:
1154 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1156 case rmNearestTiesToEven:
1157 if (lost_fraction == lfMoreThanHalf)
1158 return true;
1160 /* Our zeroes don't have a significand to test. */
1161 if (lost_fraction == lfExactlyHalf && category != fcZero)
1162 return APInt::tcExtractBit(significandParts(), bit);
1164 return false;
1166 case rmTowardZero:
1167 return false;
1169 case rmTowardPositive:
1170 return sign == false;
1172 case rmTowardNegative:
1173 return sign == true;
1177 APFloat::opStatus
1178 APFloat::normalize(roundingMode rounding_mode,
1179 lostFraction lost_fraction)
1181 unsigned int omsb; /* One, not zero, based MSB. */
1182 int exponentChange;
1184 if (category != fcNormal)
1185 return opOK;
1187 /* Before rounding normalize the exponent of fcNormal numbers. */
1188 omsb = significandMSB() + 1;
1190 if (omsb) {
1191 /* OMSB is numbered from 1. We want to place it in the integer
1192 bit numbered PRECISON if possible, with a compensating change in
1193 the exponent. */
1194 exponentChange = omsb - semantics->precision;
1196 /* If the resulting exponent is too high, overflow according to
1197 the rounding mode. */
1198 if (exponent + exponentChange > semantics->maxExponent)
1199 return handleOverflow(rounding_mode);
1201 /* Subnormal numbers have exponent minExponent, and their MSB
1202 is forced based on that. */
1203 if (exponent + exponentChange < semantics->minExponent)
1204 exponentChange = semantics->minExponent - exponent;
1206 /* Shifting left is easy as we don't lose precision. */
1207 if (exponentChange < 0) {
1208 assert(lost_fraction == lfExactlyZero);
1210 shiftSignificandLeft(-exponentChange);
1212 return opOK;
1215 if (exponentChange > 0) {
1216 lostFraction lf;
1218 /* Shift right and capture any new lost fraction. */
1219 lf = shiftSignificandRight(exponentChange);
1221 lost_fraction = combineLostFractions(lf, lost_fraction);
1223 /* Keep OMSB up-to-date. */
1224 if (omsb > (unsigned) exponentChange)
1225 omsb -= exponentChange;
1226 else
1227 omsb = 0;
1231 /* Now round the number according to rounding_mode given the lost
1232 fraction. */
1234 /* As specified in IEEE 754, since we do not trap we do not report
1235 underflow for exact results. */
1236 if (lost_fraction == lfExactlyZero) {
1237 /* Canonicalize zeroes. */
1238 if (omsb == 0)
1239 category = fcZero;
1241 return opOK;
1244 /* Increment the significand if we're rounding away from zero. */
1245 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1246 if (omsb == 0)
1247 exponent = semantics->minExponent;
1249 incrementSignificand();
1250 omsb = significandMSB() + 1;
1252 /* Did the significand increment overflow? */
1253 if (omsb == (unsigned) semantics->precision + 1) {
1254 /* Renormalize by incrementing the exponent and shifting our
1255 significand right one. However if we already have the
1256 maximum exponent we overflow to infinity. */
1257 if (exponent == semantics->maxExponent) {
1258 category = fcInfinity;
1260 return (opStatus) (opOverflow | opInexact);
1263 shiftSignificandRight(1);
1265 return opInexact;
1269 /* The normal case - we were and are not denormal, and any
1270 significand increment above didn't overflow. */
1271 if (omsb == semantics->precision)
1272 return opInexact;
1274 /* We have a non-zero denormal. */
1275 assert(omsb < semantics->precision);
1277 /* Canonicalize zeroes. */
1278 if (omsb == 0)
1279 category = fcZero;
1281 /* The fcZero case is a denormal that underflowed to zero. */
1282 return (opStatus) (opUnderflow | opInexact);
1285 APFloat::opStatus
1286 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1288 switch (convolve(category, rhs.category)) {
1289 default:
1290 llvm_unreachable(0);
1292 case convolve(fcNaN, fcZero):
1293 case convolve(fcNaN, fcNormal):
1294 case convolve(fcNaN, fcInfinity):
1295 case convolve(fcNaN, fcNaN):
1296 case convolve(fcNormal, fcZero):
1297 case convolve(fcInfinity, fcNormal):
1298 case convolve(fcInfinity, fcZero):
1299 return opOK;
1301 case convolve(fcZero, fcNaN):
1302 case convolve(fcNormal, fcNaN):
1303 case convolve(fcInfinity, fcNaN):
1304 category = fcNaN;
1305 copySignificand(rhs);
1306 return opOK;
1308 case convolve(fcNormal, fcInfinity):
1309 case convolve(fcZero, fcInfinity):
1310 category = fcInfinity;
1311 sign = rhs.sign ^ subtract;
1312 return opOK;
1314 case convolve(fcZero, fcNormal):
1315 assign(rhs);
1316 sign = rhs.sign ^ subtract;
1317 return opOK;
1319 case convolve(fcZero, fcZero):
1320 /* Sign depends on rounding mode; handled by caller. */
1321 return opOK;
1323 case convolve(fcInfinity, fcInfinity):
1324 /* Differently signed infinities can only be validly
1325 subtracted. */
1326 if (((sign ^ rhs.sign)!=0) != subtract) {
1327 makeNaN();
1328 return opInvalidOp;
1331 return opOK;
1333 case convolve(fcNormal, fcNormal):
1334 return opDivByZero;
1338 /* Add or subtract two normal numbers. */
1339 lostFraction
1340 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1342 integerPart carry;
1343 lostFraction lost_fraction;
1344 int bits;
1346 /* Determine if the operation on the absolute values is effectively
1347 an addition or subtraction. */
1348 subtract ^= (sign ^ rhs.sign) ? true : false;
1350 /* Are we bigger exponent-wise than the RHS? */
1351 bits = exponent - rhs.exponent;
1353 /* Subtraction is more subtle than one might naively expect. */
1354 if (subtract) {
1355 APFloat temp_rhs(rhs);
1356 bool reverse;
1358 if (bits == 0) {
1359 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1360 lost_fraction = lfExactlyZero;
1361 } else if (bits > 0) {
1362 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1363 shiftSignificandLeft(1);
1364 reverse = false;
1365 } else {
1366 lost_fraction = shiftSignificandRight(-bits - 1);
1367 temp_rhs.shiftSignificandLeft(1);
1368 reverse = true;
1371 if (reverse) {
1372 carry = temp_rhs.subtractSignificand
1373 (*this, lost_fraction != lfExactlyZero);
1374 copySignificand(temp_rhs);
1375 sign = !sign;
1376 } else {
1377 carry = subtractSignificand
1378 (temp_rhs, lost_fraction != lfExactlyZero);
1381 /* Invert the lost fraction - it was on the RHS and
1382 subtracted. */
1383 if (lost_fraction == lfLessThanHalf)
1384 lost_fraction = lfMoreThanHalf;
1385 else if (lost_fraction == lfMoreThanHalf)
1386 lost_fraction = lfLessThanHalf;
1388 /* The code above is intended to ensure that no borrow is
1389 necessary. */
1390 assert(!carry);
1391 } else {
1392 if (bits > 0) {
1393 APFloat temp_rhs(rhs);
1395 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1396 carry = addSignificand(temp_rhs);
1397 } else {
1398 lost_fraction = shiftSignificandRight(-bits);
1399 carry = addSignificand(rhs);
1402 /* We have a guard bit; generating a carry cannot happen. */
1403 assert(!carry);
1406 return lost_fraction;
1409 APFloat::opStatus
1410 APFloat::multiplySpecials(const APFloat &rhs)
1412 switch (convolve(category, rhs.category)) {
1413 default:
1414 llvm_unreachable(0);
1416 case convolve(fcNaN, fcZero):
1417 case convolve(fcNaN, fcNormal):
1418 case convolve(fcNaN, fcInfinity):
1419 case convolve(fcNaN, fcNaN):
1420 return opOK;
1422 case convolve(fcZero, fcNaN):
1423 case convolve(fcNormal, fcNaN):
1424 case convolve(fcInfinity, fcNaN):
1425 category = fcNaN;
1426 copySignificand(rhs);
1427 return opOK;
1429 case convolve(fcNormal, fcInfinity):
1430 case convolve(fcInfinity, fcNormal):
1431 case convolve(fcInfinity, fcInfinity):
1432 category = fcInfinity;
1433 return opOK;
1435 case convolve(fcZero, fcNormal):
1436 case convolve(fcNormal, fcZero):
1437 case convolve(fcZero, fcZero):
1438 category = fcZero;
1439 return opOK;
1441 case convolve(fcZero, fcInfinity):
1442 case convolve(fcInfinity, fcZero):
1443 makeNaN();
1444 return opInvalidOp;
1446 case convolve(fcNormal, fcNormal):
1447 return opOK;
1451 APFloat::opStatus
1452 APFloat::divideSpecials(const APFloat &rhs)
1454 switch (convolve(category, rhs.category)) {
1455 default:
1456 llvm_unreachable(0);
1458 case convolve(fcNaN, fcZero):
1459 case convolve(fcNaN, fcNormal):
1460 case convolve(fcNaN, fcInfinity):
1461 case convolve(fcNaN, fcNaN):
1462 case convolve(fcInfinity, fcZero):
1463 case convolve(fcInfinity, fcNormal):
1464 case convolve(fcZero, fcInfinity):
1465 case convolve(fcZero, fcNormal):
1466 return opOK;
1468 case convolve(fcZero, fcNaN):
1469 case convolve(fcNormal, fcNaN):
1470 case convolve(fcInfinity, fcNaN):
1471 category = fcNaN;
1472 copySignificand(rhs);
1473 return opOK;
1475 case convolve(fcNormal, fcInfinity):
1476 category = fcZero;
1477 return opOK;
1479 case convolve(fcNormal, fcZero):
1480 category = fcInfinity;
1481 return opDivByZero;
1483 case convolve(fcInfinity, fcInfinity):
1484 case convolve(fcZero, fcZero):
1485 makeNaN();
1486 return opInvalidOp;
1488 case convolve(fcNormal, fcNormal):
1489 return opOK;
1493 APFloat::opStatus
1494 APFloat::modSpecials(const APFloat &rhs)
1496 switch (convolve(category, rhs.category)) {
1497 default:
1498 llvm_unreachable(0);
1500 case convolve(fcNaN, fcZero):
1501 case convolve(fcNaN, fcNormal):
1502 case convolve(fcNaN, fcInfinity):
1503 case convolve(fcNaN, fcNaN):
1504 case convolve(fcZero, fcInfinity):
1505 case convolve(fcZero, fcNormal):
1506 case convolve(fcNormal, fcInfinity):
1507 return opOK;
1509 case convolve(fcZero, fcNaN):
1510 case convolve(fcNormal, fcNaN):
1511 case convolve(fcInfinity, fcNaN):
1512 category = fcNaN;
1513 copySignificand(rhs);
1514 return opOK;
1516 case convolve(fcNormal, fcZero):
1517 case convolve(fcInfinity, fcZero):
1518 case convolve(fcInfinity, fcNormal):
1519 case convolve(fcInfinity, fcInfinity):
1520 case convolve(fcZero, fcZero):
1521 makeNaN();
1522 return opInvalidOp;
1524 case convolve(fcNormal, fcNormal):
1525 return opOK;
1529 /* Change sign. */
1530 void
1531 APFloat::changeSign()
1533 /* Look mummy, this one's easy. */
1534 sign = !sign;
1537 void
1538 APFloat::clearSign()
1540 /* So is this one. */
1541 sign = 0;
1544 void
1545 APFloat::copySign(const APFloat &rhs)
1547 /* And this one. */
1548 sign = rhs.sign;
1551 /* Normalized addition or subtraction. */
1552 APFloat::opStatus
1553 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1554 bool subtract)
1556 opStatus fs;
1558 assertArithmeticOK(*semantics);
1560 fs = addOrSubtractSpecials(rhs, subtract);
1562 /* This return code means it was not a simple case. */
1563 if (fs == opDivByZero) {
1564 lostFraction lost_fraction;
1566 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1567 fs = normalize(rounding_mode, lost_fraction);
1569 /* Can only be zero if we lost no fraction. */
1570 assert(category != fcZero || lost_fraction == lfExactlyZero);
1573 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1574 positive zero unless rounding to minus infinity, except that
1575 adding two like-signed zeroes gives that zero. */
1576 if (category == fcZero) {
1577 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1578 sign = (rounding_mode == rmTowardNegative);
1581 return fs;
1584 /* Normalized addition. */
1585 APFloat::opStatus
1586 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1588 return addOrSubtract(rhs, rounding_mode, false);
1591 /* Normalized subtraction. */
1592 APFloat::opStatus
1593 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1595 return addOrSubtract(rhs, rounding_mode, true);
1598 /* Normalized multiply. */
1599 APFloat::opStatus
1600 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1602 opStatus fs;
1604 assertArithmeticOK(*semantics);
1605 sign ^= rhs.sign;
1606 fs = multiplySpecials(rhs);
1608 if (category == fcNormal) {
1609 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1610 fs = normalize(rounding_mode, lost_fraction);
1611 if (lost_fraction != lfExactlyZero)
1612 fs = (opStatus) (fs | opInexact);
1615 return fs;
1618 /* Normalized divide. */
1619 APFloat::opStatus
1620 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1622 opStatus fs;
1624 assertArithmeticOK(*semantics);
1625 sign ^= rhs.sign;
1626 fs = divideSpecials(rhs);
1628 if (category == fcNormal) {
1629 lostFraction lost_fraction = divideSignificand(rhs);
1630 fs = normalize(rounding_mode, lost_fraction);
1631 if (lost_fraction != lfExactlyZero)
1632 fs = (opStatus) (fs | opInexact);
1635 return fs;
1638 /* Normalized remainder. This is not currently correct in all cases. */
1639 APFloat::opStatus
1640 APFloat::remainder(const APFloat &rhs)
1642 opStatus fs;
1643 APFloat V = *this;
1644 unsigned int origSign = sign;
1646 assertArithmeticOK(*semantics);
1647 fs = V.divide(rhs, rmNearestTiesToEven);
1648 if (fs == opDivByZero)
1649 return fs;
1651 int parts = partCount();
1652 integerPart *x = new integerPart[parts];
1653 bool ignored;
1654 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1655 rmNearestTiesToEven, &ignored);
1656 if (fs==opInvalidOp)
1657 return fs;
1659 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1660 rmNearestTiesToEven);
1661 assert(fs==opOK); // should always work
1663 fs = V.multiply(rhs, rmNearestTiesToEven);
1664 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1666 fs = subtract(V, rmNearestTiesToEven);
1667 assert(fs==opOK || fs==opInexact); // likewise
1669 if (isZero())
1670 sign = origSign; // IEEE754 requires this
1671 delete[] x;
1672 return fs;
1675 /* Normalized llvm frem (C fmod).
1676 This is not currently correct in all cases. */
1677 APFloat::opStatus
1678 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1680 opStatus fs;
1681 assertArithmeticOK(*semantics);
1682 fs = modSpecials(rhs);
1684 if (category == fcNormal && rhs.category == fcNormal) {
1685 APFloat V = *this;
1686 unsigned int origSign = sign;
1688 fs = V.divide(rhs, rmNearestTiesToEven);
1689 if (fs == opDivByZero)
1690 return fs;
1692 int parts = partCount();
1693 integerPart *x = new integerPart[parts];
1694 bool ignored;
1695 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1696 rmTowardZero, &ignored);
1697 if (fs==opInvalidOp)
1698 return fs;
1700 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1701 rmNearestTiesToEven);
1702 assert(fs==opOK); // should always work
1704 fs = V.multiply(rhs, rounding_mode);
1705 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1707 fs = subtract(V, rounding_mode);
1708 assert(fs==opOK || fs==opInexact); // likewise
1710 if (isZero())
1711 sign = origSign; // IEEE754 requires this
1712 delete[] x;
1714 return fs;
1717 /* Normalized fused-multiply-add. */
1718 APFloat::opStatus
1719 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1720 const APFloat &addend,
1721 roundingMode rounding_mode)
1723 opStatus fs;
1725 assertArithmeticOK(*semantics);
1727 /* Post-multiplication sign, before addition. */
1728 sign ^= multiplicand.sign;
1730 /* If and only if all arguments are normal do we need to do an
1731 extended-precision calculation. */
1732 if (category == fcNormal &&
1733 multiplicand.category == fcNormal &&
1734 addend.category == fcNormal) {
1735 lostFraction lost_fraction;
1737 lost_fraction = multiplySignificand(multiplicand, &addend);
1738 fs = normalize(rounding_mode, lost_fraction);
1739 if (lost_fraction != lfExactlyZero)
1740 fs = (opStatus) (fs | opInexact);
1742 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1743 positive zero unless rounding to minus infinity, except that
1744 adding two like-signed zeroes gives that zero. */
1745 if (category == fcZero && sign != addend.sign)
1746 sign = (rounding_mode == rmTowardNegative);
1747 } else {
1748 fs = multiplySpecials(multiplicand);
1750 /* FS can only be opOK or opInvalidOp. There is no more work
1751 to do in the latter case. The IEEE-754R standard says it is
1752 implementation-defined in this case whether, if ADDEND is a
1753 quiet NaN, we raise invalid op; this implementation does so.
1755 If we need to do the addition we can do so with normal
1756 precision. */
1757 if (fs == opOK)
1758 fs = addOrSubtract(addend, rounding_mode, false);
1761 return fs;
1764 /* Comparison requires normalized numbers. */
1765 APFloat::cmpResult
1766 APFloat::compare(const APFloat &rhs) const
1768 cmpResult result;
1770 assertArithmeticOK(*semantics);
1771 assert(semantics == rhs.semantics);
1773 switch (convolve(category, rhs.category)) {
1774 default:
1775 llvm_unreachable(0);
1777 case convolve(fcNaN, fcZero):
1778 case convolve(fcNaN, fcNormal):
1779 case convolve(fcNaN, fcInfinity):
1780 case convolve(fcNaN, fcNaN):
1781 case convolve(fcZero, fcNaN):
1782 case convolve(fcNormal, fcNaN):
1783 case convolve(fcInfinity, fcNaN):
1784 return cmpUnordered;
1786 case convolve(fcInfinity, fcNormal):
1787 case convolve(fcInfinity, fcZero):
1788 case convolve(fcNormal, fcZero):
1789 if (sign)
1790 return cmpLessThan;
1791 else
1792 return cmpGreaterThan;
1794 case convolve(fcNormal, fcInfinity):
1795 case convolve(fcZero, fcInfinity):
1796 case convolve(fcZero, fcNormal):
1797 if (rhs.sign)
1798 return cmpGreaterThan;
1799 else
1800 return cmpLessThan;
1802 case convolve(fcInfinity, fcInfinity):
1803 if (sign == rhs.sign)
1804 return cmpEqual;
1805 else if (sign)
1806 return cmpLessThan;
1807 else
1808 return cmpGreaterThan;
1810 case convolve(fcZero, fcZero):
1811 return cmpEqual;
1813 case convolve(fcNormal, fcNormal):
1814 break;
1817 /* Two normal numbers. Do they have the same sign? */
1818 if (sign != rhs.sign) {
1819 if (sign)
1820 result = cmpLessThan;
1821 else
1822 result = cmpGreaterThan;
1823 } else {
1824 /* Compare absolute values; invert result if negative. */
1825 result = compareAbsoluteValue(rhs);
1827 if (sign) {
1828 if (result == cmpLessThan)
1829 result = cmpGreaterThan;
1830 else if (result == cmpGreaterThan)
1831 result = cmpLessThan;
1835 return result;
1838 /// APFloat::convert - convert a value of one floating point type to another.
1839 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1840 /// records whether the transformation lost information, i.e. whether
1841 /// converting the result back to the original type will produce the
1842 /// original value (this is almost the same as return value==fsOK, but there
1843 /// are edge cases where this is not so).
1845 APFloat::opStatus
1846 APFloat::convert(const fltSemantics &toSemantics,
1847 roundingMode rounding_mode, bool *losesInfo)
1849 lostFraction lostFraction;
1850 unsigned int newPartCount, oldPartCount;
1851 opStatus fs;
1853 assertArithmeticOK(*semantics);
1854 assertArithmeticOK(toSemantics);
1855 lostFraction = lfExactlyZero;
1856 newPartCount = partCountForBits(toSemantics.precision + 1);
1857 oldPartCount = partCount();
1859 /* Handle storage complications. If our new form is wider,
1860 re-allocate our bit pattern into wider storage. If it is
1861 narrower, we ignore the excess parts, but if narrowing to a
1862 single part we need to free the old storage.
1863 Be careful not to reference significandParts for zeroes
1864 and infinities, since it aborts. */
1865 if (newPartCount > oldPartCount) {
1866 integerPart *newParts;
1867 newParts = new integerPart[newPartCount];
1868 APInt::tcSet(newParts, 0, newPartCount);
1869 if (category==fcNormal || category==fcNaN)
1870 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1871 freeSignificand();
1872 significand.parts = newParts;
1873 } else if (newPartCount < oldPartCount) {
1874 /* Capture any lost fraction through truncation of parts so we get
1875 correct rounding whilst normalizing. */
1876 if (category==fcNormal)
1877 lostFraction = lostFractionThroughTruncation
1878 (significandParts(), oldPartCount, toSemantics.precision);
1879 if (newPartCount == 1) {
1880 integerPart newPart = 0;
1881 if (category==fcNormal || category==fcNaN)
1882 newPart = significandParts()[0];
1883 freeSignificand();
1884 significand.part = newPart;
1888 if (category == fcNormal) {
1889 /* Re-interpret our bit-pattern. */
1890 exponent += toSemantics.precision - semantics->precision;
1891 semantics = &toSemantics;
1892 fs = normalize(rounding_mode, lostFraction);
1893 *losesInfo = (fs != opOK);
1894 } else if (category == fcNaN) {
1895 int shift = toSemantics.precision - semantics->precision;
1896 // Do this now so significandParts gets the right answer
1897 const fltSemantics *oldSemantics = semantics;
1898 semantics = &toSemantics;
1899 *losesInfo = false;
1900 // No normalization here, just truncate
1901 if (shift>0)
1902 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1903 else if (shift < 0) {
1904 unsigned ushift = -shift;
1905 // Figure out if we are losing information. This happens
1906 // if are shifting out something other than 0s, or if the x87 long
1907 // double input did not have its integer bit set (pseudo-NaN), or if the
1908 // x87 long double input did not have its QNan bit set (because the x87
1909 // hardware sets this bit when converting a lower-precision NaN to
1910 // x87 long double).
1911 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1912 *losesInfo = true;
1913 if (oldSemantics == &APFloat::x87DoubleExtended &&
1914 (!(*significandParts() & 0x8000000000000000ULL) ||
1915 !(*significandParts() & 0x4000000000000000ULL)))
1916 *losesInfo = true;
1917 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1919 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1920 // does not give you back the same bits. This is dubious, and we
1921 // don't currently do it. You're really supposed to get
1922 // an invalid operation signal at runtime, but nobody does that.
1923 fs = opOK;
1924 } else {
1925 semantics = &toSemantics;
1926 fs = opOK;
1927 *losesInfo = false;
1930 return fs;
1933 /* Convert a floating point number to an integer according to the
1934 rounding mode. If the rounded integer value is out of range this
1935 returns an invalid operation exception and the contents of the
1936 destination parts are unspecified. If the rounded value is in
1937 range but the floating point number is not the exact integer, the C
1938 standard doesn't require an inexact exception to be raised. IEEE
1939 854 does require it so we do that.
1941 Note that for conversions to integer type the C standard requires
1942 round-to-zero to always be used. */
1943 APFloat::opStatus
1944 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1945 bool isSigned,
1946 roundingMode rounding_mode,
1947 bool *isExact) const
1949 lostFraction lost_fraction;
1950 const integerPart *src;
1951 unsigned int dstPartsCount, truncatedBits;
1953 assertArithmeticOK(*semantics);
1955 *isExact = false;
1957 /* Handle the three special cases first. */
1958 if (category == fcInfinity || category == fcNaN)
1959 return opInvalidOp;
1961 dstPartsCount = partCountForBits(width);
1963 if (category == fcZero) {
1964 APInt::tcSet(parts, 0, dstPartsCount);
1965 // Negative zero can't be represented as an int.
1966 *isExact = !sign;
1967 return opOK;
1970 src = significandParts();
1972 /* Step 1: place our absolute value, with any fraction truncated, in
1973 the destination. */
1974 if (exponent < 0) {
1975 /* Our absolute value is less than one; truncate everything. */
1976 APInt::tcSet(parts, 0, dstPartsCount);
1977 /* For exponent -1 the integer bit represents .5, look at that.
1978 For smaller exponents leftmost truncated bit is 0. */
1979 truncatedBits = semantics->precision -1U - exponent;
1980 } else {
1981 /* We want the most significant (exponent + 1) bits; the rest are
1982 truncated. */
1983 unsigned int bits = exponent + 1U;
1985 /* Hopelessly large in magnitude? */
1986 if (bits > width)
1987 return opInvalidOp;
1989 if (bits < semantics->precision) {
1990 /* We truncate (semantics->precision - bits) bits. */
1991 truncatedBits = semantics->precision - bits;
1992 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1993 } else {
1994 /* We want at least as many bits as are available. */
1995 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1996 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1997 truncatedBits = 0;
2001 /* Step 2: work out any lost fraction, and increment the absolute
2002 value if we would round away from zero. */
2003 if (truncatedBits) {
2004 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2005 truncatedBits);
2006 if (lost_fraction != lfExactlyZero &&
2007 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2008 if (APInt::tcIncrement(parts, dstPartsCount))
2009 return opInvalidOp; /* Overflow. */
2011 } else {
2012 lost_fraction = lfExactlyZero;
2015 /* Step 3: check if we fit in the destination. */
2016 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2018 if (sign) {
2019 if (!isSigned) {
2020 /* Negative numbers cannot be represented as unsigned. */
2021 if (omsb != 0)
2022 return opInvalidOp;
2023 } else {
2024 /* It takes omsb bits to represent the unsigned integer value.
2025 We lose a bit for the sign, but care is needed as the
2026 maximally negative integer is a special case. */
2027 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2028 return opInvalidOp;
2030 /* This case can happen because of rounding. */
2031 if (omsb > width)
2032 return opInvalidOp;
2035 APInt::tcNegate (parts, dstPartsCount);
2036 } else {
2037 if (omsb >= width + !isSigned)
2038 return opInvalidOp;
2041 if (lost_fraction == lfExactlyZero) {
2042 *isExact = true;
2043 return opOK;
2044 } else
2045 return opInexact;
2048 /* Same as convertToSignExtendedInteger, except we provide
2049 deterministic values in case of an invalid operation exception,
2050 namely zero for NaNs and the minimal or maximal value respectively
2051 for underflow or overflow.
2052 The *isExact output tells whether the result is exact, in the sense
2053 that converting it back to the original floating point type produces
2054 the original value. This is almost equivalent to result==opOK,
2055 except for negative zeroes.
2057 APFloat::opStatus
2058 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2059 bool isSigned,
2060 roundingMode rounding_mode, bool *isExact) const
2062 opStatus fs;
2064 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2065 isExact);
2067 if (fs == opInvalidOp) {
2068 unsigned int bits, dstPartsCount;
2070 dstPartsCount = partCountForBits(width);
2072 if (category == fcNaN)
2073 bits = 0;
2074 else if (sign)
2075 bits = isSigned;
2076 else
2077 bits = width - isSigned;
2079 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2080 if (sign && isSigned)
2081 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2084 return fs;
2087 /* Convert an unsigned integer SRC to a floating point number,
2088 rounding according to ROUNDING_MODE. The sign of the floating
2089 point number is not modified. */
2090 APFloat::opStatus
2091 APFloat::convertFromUnsignedParts(const integerPart *src,
2092 unsigned int srcCount,
2093 roundingMode rounding_mode)
2095 unsigned int omsb, precision, dstCount;
2096 integerPart *dst;
2097 lostFraction lost_fraction;
2099 assertArithmeticOK(*semantics);
2100 category = fcNormal;
2101 omsb = APInt::tcMSB(src, srcCount) + 1;
2102 dst = significandParts();
2103 dstCount = partCount();
2104 precision = semantics->precision;
2106 /* We want the most significant PRECISON bits of SRC. There may not
2107 be that many; extract what we can. */
2108 if (precision <= omsb) {
2109 exponent = omsb - 1;
2110 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2111 omsb - precision);
2112 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2113 } else {
2114 exponent = precision - 1;
2115 lost_fraction = lfExactlyZero;
2116 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2119 return normalize(rounding_mode, lost_fraction);
2122 APFloat::opStatus
2123 APFloat::convertFromAPInt(const APInt &Val,
2124 bool isSigned,
2125 roundingMode rounding_mode)
2127 unsigned int partCount = Val.getNumWords();
2128 APInt api = Val;
2130 sign = false;
2131 if (isSigned && api.isNegative()) {
2132 sign = true;
2133 api = -api;
2136 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2139 /* Convert a two's complement integer SRC to a floating point number,
2140 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2141 integer is signed, in which case it must be sign-extended. */
2142 APFloat::opStatus
2143 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2144 unsigned int srcCount,
2145 bool isSigned,
2146 roundingMode rounding_mode)
2148 opStatus status;
2150 assertArithmeticOK(*semantics);
2151 if (isSigned &&
2152 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2153 integerPart *copy;
2155 /* If we're signed and negative negate a copy. */
2156 sign = true;
2157 copy = new integerPart[srcCount];
2158 APInt::tcAssign(copy, src, srcCount);
2159 APInt::tcNegate(copy, srcCount);
2160 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2161 delete [] copy;
2162 } else {
2163 sign = false;
2164 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2167 return status;
2170 /* FIXME: should this just take a const APInt reference? */
2171 APFloat::opStatus
2172 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2173 unsigned int width, bool isSigned,
2174 roundingMode rounding_mode)
2176 unsigned int partCount = partCountForBits(width);
2177 APInt api = APInt(width, partCount, parts);
2179 sign = false;
2180 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2181 sign = true;
2182 api = -api;
2185 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2188 APFloat::opStatus
2189 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2191 lostFraction lost_fraction = lfExactlyZero;
2192 integerPart *significand;
2193 unsigned int bitPos, partsCount;
2194 StringRef::iterator dot, firstSignificantDigit;
2196 zeroSignificand();
2197 exponent = 0;
2198 category = fcNormal;
2200 significand = significandParts();
2201 partsCount = partCount();
2202 bitPos = partsCount * integerPartWidth;
2204 /* Skip leading zeroes and any (hexa)decimal point. */
2205 StringRef::iterator begin = s.begin();
2206 StringRef::iterator end = s.end();
2207 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2208 firstSignificantDigit = p;
2210 for (; p != end;) {
2211 integerPart hex_value;
2213 if (*p == '.') {
2214 assert(dot == end && "String contains multiple dots");
2215 dot = p++;
2216 if (p == end) {
2217 break;
2221 hex_value = hexDigitValue(*p);
2222 if (hex_value == -1U) {
2223 break;
2226 p++;
2228 if (p == end) {
2229 break;
2230 } else {
2231 /* Store the number whilst 4-bit nibbles remain. */
2232 if (bitPos) {
2233 bitPos -= 4;
2234 hex_value <<= bitPos % integerPartWidth;
2235 significand[bitPos / integerPartWidth] |= hex_value;
2236 } else {
2237 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2238 while (p != end && hexDigitValue(*p) != -1U)
2239 p++;
2240 break;
2245 /* Hex floats require an exponent but not a hexadecimal point. */
2246 assert(p != end && "Hex strings require an exponent");
2247 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2248 assert(p != begin && "Significand has no digits");
2249 assert((dot == end || p - begin != 1) && "Significand has no digits");
2251 /* Ignore the exponent if we are zero. */
2252 if (p != firstSignificantDigit) {
2253 int expAdjustment;
2255 /* Implicit hexadecimal point? */
2256 if (dot == end)
2257 dot = p;
2259 /* Calculate the exponent adjustment implicit in the number of
2260 significant digits. */
2261 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2262 if (expAdjustment < 0)
2263 expAdjustment++;
2264 expAdjustment = expAdjustment * 4 - 1;
2266 /* Adjust for writing the significand starting at the most
2267 significant nibble. */
2268 expAdjustment += semantics->precision;
2269 expAdjustment -= partsCount * integerPartWidth;
2271 /* Adjust for the given exponent. */
2272 exponent = totalExponent(p + 1, end, expAdjustment);
2275 return normalize(rounding_mode, lost_fraction);
2278 APFloat::opStatus
2279 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2280 unsigned sigPartCount, int exp,
2281 roundingMode rounding_mode)
2283 unsigned int parts, pow5PartCount;
2284 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2285 integerPart pow5Parts[maxPowerOfFiveParts];
2286 bool isNearest;
2288 isNearest = (rounding_mode == rmNearestTiesToEven ||
2289 rounding_mode == rmNearestTiesToAway);
2291 parts = partCountForBits(semantics->precision + 11);
2293 /* Calculate pow(5, abs(exp)). */
2294 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2296 for (;; parts *= 2) {
2297 opStatus sigStatus, powStatus;
2298 unsigned int excessPrecision, truncatedBits;
2300 calcSemantics.precision = parts * integerPartWidth - 1;
2301 excessPrecision = calcSemantics.precision - semantics->precision;
2302 truncatedBits = excessPrecision;
2304 APFloat decSig(calcSemantics, fcZero, sign);
2305 APFloat pow5(calcSemantics, fcZero, false);
2307 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2308 rmNearestTiesToEven);
2309 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2310 rmNearestTiesToEven);
2311 /* Add exp, as 10^n = 5^n * 2^n. */
2312 decSig.exponent += exp;
2314 lostFraction calcLostFraction;
2315 integerPart HUerr, HUdistance;
2316 unsigned int powHUerr;
2318 if (exp >= 0) {
2319 /* multiplySignificand leaves the precision-th bit set to 1. */
2320 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2321 powHUerr = powStatus != opOK;
2322 } else {
2323 calcLostFraction = decSig.divideSignificand(pow5);
2324 /* Denormal numbers have less precision. */
2325 if (decSig.exponent < semantics->minExponent) {
2326 excessPrecision += (semantics->minExponent - decSig.exponent);
2327 truncatedBits = excessPrecision;
2328 if (excessPrecision > calcSemantics.precision)
2329 excessPrecision = calcSemantics.precision;
2331 /* Extra half-ulp lost in reciprocal of exponent. */
2332 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2335 /* Both multiplySignificand and divideSignificand return the
2336 result with the integer bit set. */
2337 assert(APInt::tcExtractBit
2338 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2340 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2341 powHUerr);
2342 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2343 excessPrecision, isNearest);
2345 /* Are we guaranteed to round correctly if we truncate? */
2346 if (HUdistance >= HUerr) {
2347 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2348 calcSemantics.precision - excessPrecision,
2349 excessPrecision);
2350 /* Take the exponent of decSig. If we tcExtract-ed less bits
2351 above we must adjust our exponent to compensate for the
2352 implicit right shift. */
2353 exponent = (decSig.exponent + semantics->precision
2354 - (calcSemantics.precision - excessPrecision));
2355 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2356 decSig.partCount(),
2357 truncatedBits);
2358 return normalize(rounding_mode, calcLostFraction);
2363 APFloat::opStatus
2364 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2366 decimalInfo D;
2367 opStatus fs;
2369 /* Scan the text. */
2370 StringRef::iterator p = str.begin();
2371 interpretDecimal(p, str.end(), &D);
2373 /* Handle the quick cases. First the case of no significant digits,
2374 i.e. zero, and then exponents that are obviously too large or too
2375 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2376 definitely overflows if
2378 (exp - 1) * L >= maxExponent
2380 and definitely underflows to zero where
2382 (exp + 1) * L <= minExponent - precision
2384 With integer arithmetic the tightest bounds for L are
2386 93/28 < L < 196/59 [ numerator <= 256 ]
2387 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2390 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2391 category = fcZero;
2392 fs = opOK;
2394 /* Check whether the normalized exponent is high enough to overflow
2395 max during the log-rebasing in the max-exponent check below. */
2396 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2397 fs = handleOverflow(rounding_mode);
2399 /* If it wasn't, then it also wasn't high enough to overflow max
2400 during the log-rebasing in the min-exponent check. Check that it
2401 won't overflow min in either check, then perform the min-exponent
2402 check. */
2403 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2404 (D.normalizedExponent + 1) * 28738 <=
2405 8651 * (semantics->minExponent - (int) semantics->precision)) {
2406 /* Underflow to zero and round. */
2407 zeroSignificand();
2408 fs = normalize(rounding_mode, lfLessThanHalf);
2410 /* We can finally safely perform the max-exponent check. */
2411 } else if ((D.normalizedExponent - 1) * 42039
2412 >= 12655 * semantics->maxExponent) {
2413 /* Overflow and round. */
2414 fs = handleOverflow(rounding_mode);
2415 } else {
2416 integerPart *decSignificand;
2417 unsigned int partCount;
2419 /* A tight upper bound on number of bits required to hold an
2420 N-digit decimal integer is N * 196 / 59. Allocate enough space
2421 to hold the full significand, and an extra part required by
2422 tcMultiplyPart. */
2423 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2424 partCount = partCountForBits(1 + 196 * partCount / 59);
2425 decSignificand = new integerPart[partCount + 1];
2426 partCount = 0;
2428 /* Convert to binary efficiently - we do almost all multiplication
2429 in an integerPart. When this would overflow do we do a single
2430 bignum multiplication, and then revert again to multiplication
2431 in an integerPart. */
2432 do {
2433 integerPart decValue, val, multiplier;
2435 val = 0;
2436 multiplier = 1;
2438 do {
2439 if (*p == '.') {
2440 p++;
2441 if (p == str.end()) {
2442 break;
2445 decValue = decDigitValue(*p++);
2446 assert(decValue < 10U && "Invalid character in significand");
2447 multiplier *= 10;
2448 val = val * 10 + decValue;
2449 /* The maximum number that can be multiplied by ten with any
2450 digit added without overflowing an integerPart. */
2451 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2453 /* Multiply out the current part. */
2454 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2455 partCount, partCount + 1, false);
2457 /* If we used another part (likely but not guaranteed), increase
2458 the count. */
2459 if (decSignificand[partCount])
2460 partCount++;
2461 } while (p <= D.lastSigDigit);
2463 category = fcNormal;
2464 fs = roundSignificandWithExponent(decSignificand, partCount,
2465 D.exponent, rounding_mode);
2467 delete [] decSignificand;
2470 return fs;
2473 APFloat::opStatus
2474 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2476 assertArithmeticOK(*semantics);
2477 assert(!str.empty() && "Invalid string length");
2479 /* Handle a leading minus sign. */
2480 StringRef::iterator p = str.begin();
2481 size_t slen = str.size();
2482 sign = *p == '-' ? 1 : 0;
2483 if (*p == '-' || *p == '+') {
2484 p++;
2485 slen--;
2486 assert(slen && "String has no digits");
2489 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2490 assert(slen - 2 && "Invalid string");
2491 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2492 rounding_mode);
2495 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2498 /* Write out a hexadecimal representation of the floating point value
2499 to DST, which must be of sufficient size, in the C99 form
2500 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2501 excluding the terminating NUL.
2503 If UPPERCASE, the output is in upper case, otherwise in lower case.
2505 HEXDIGITS digits appear altogether, rounding the value if
2506 necessary. If HEXDIGITS is 0, the minimal precision to display the
2507 number precisely is used instead. If nothing would appear after
2508 the decimal point it is suppressed.
2510 The decimal exponent is always printed and has at least one digit.
2511 Zero values display an exponent of zero. Infinities and NaNs
2512 appear as "infinity" or "nan" respectively.
2514 The above rules are as specified by C99. There is ambiguity about
2515 what the leading hexadecimal digit should be. This implementation
2516 uses whatever is necessary so that the exponent is displayed as
2517 stored. This implies the exponent will fall within the IEEE format
2518 range, and the leading hexadecimal digit will be 0 (for denormals),
2519 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2520 any other digits zero).
2522 unsigned int
2523 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2524 bool upperCase, roundingMode rounding_mode) const
2526 char *p;
2528 assertArithmeticOK(*semantics);
2530 p = dst;
2531 if (sign)
2532 *dst++ = '-';
2534 switch (category) {
2535 case fcInfinity:
2536 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2537 dst += sizeof infinityL - 1;
2538 break;
2540 case fcNaN:
2541 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2542 dst += sizeof NaNU - 1;
2543 break;
2545 case fcZero:
2546 *dst++ = '0';
2547 *dst++ = upperCase ? 'X': 'x';
2548 *dst++ = '0';
2549 if (hexDigits > 1) {
2550 *dst++ = '.';
2551 memset (dst, '0', hexDigits - 1);
2552 dst += hexDigits - 1;
2554 *dst++ = upperCase ? 'P': 'p';
2555 *dst++ = '0';
2556 break;
2558 case fcNormal:
2559 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2560 break;
2563 *dst = 0;
2565 return static_cast<unsigned int>(dst - p);
2568 /* Does the hard work of outputting the correctly rounded hexadecimal
2569 form of a normal floating point number with the specified number of
2570 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2571 digits necessary to print the value precisely is output. */
2572 char *
2573 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2574 bool upperCase,
2575 roundingMode rounding_mode) const
2577 unsigned int count, valueBits, shift, partsCount, outputDigits;
2578 const char *hexDigitChars;
2579 const integerPart *significand;
2580 char *p;
2581 bool roundUp;
2583 *dst++ = '0';
2584 *dst++ = upperCase ? 'X': 'x';
2586 roundUp = false;
2587 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2589 significand = significandParts();
2590 partsCount = partCount();
2592 /* +3 because the first digit only uses the single integer bit, so
2593 we have 3 virtual zero most-significant-bits. */
2594 valueBits = semantics->precision + 3;
2595 shift = integerPartWidth - valueBits % integerPartWidth;
2597 /* The natural number of digits required ignoring trailing
2598 insignificant zeroes. */
2599 outputDigits = (valueBits - significandLSB () + 3) / 4;
2601 /* hexDigits of zero means use the required number for the
2602 precision. Otherwise, see if we are truncating. If we are,
2603 find out if we need to round away from zero. */
2604 if (hexDigits) {
2605 if (hexDigits < outputDigits) {
2606 /* We are dropping non-zero bits, so need to check how to round.
2607 "bits" is the number of dropped bits. */
2608 unsigned int bits;
2609 lostFraction fraction;
2611 bits = valueBits - hexDigits * 4;
2612 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2613 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2615 outputDigits = hexDigits;
2618 /* Write the digits consecutively, and start writing in the location
2619 of the hexadecimal point. We move the most significant digit
2620 left and add the hexadecimal point later. */
2621 p = ++dst;
2623 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2625 while (outputDigits && count) {
2626 integerPart part;
2628 /* Put the most significant integerPartWidth bits in "part". */
2629 if (--count == partsCount)
2630 part = 0; /* An imaginary higher zero part. */
2631 else
2632 part = significand[count] << shift;
2634 if (count && shift)
2635 part |= significand[count - 1] >> (integerPartWidth - shift);
2637 /* Convert as much of "part" to hexdigits as we can. */
2638 unsigned int curDigits = integerPartWidth / 4;
2640 if (curDigits > outputDigits)
2641 curDigits = outputDigits;
2642 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2643 outputDigits -= curDigits;
2646 if (roundUp) {
2647 char *q = dst;
2649 /* Note that hexDigitChars has a trailing '0'. */
2650 do {
2651 q--;
2652 *q = hexDigitChars[hexDigitValue (*q) + 1];
2653 } while (*q == '0');
2654 assert(q >= p);
2655 } else {
2656 /* Add trailing zeroes. */
2657 memset (dst, '0', outputDigits);
2658 dst += outputDigits;
2661 /* Move the most significant digit to before the point, and if there
2662 is something after the decimal point add it. This must come
2663 after rounding above. */
2664 p[-1] = p[0];
2665 if (dst -1 == p)
2666 dst--;
2667 else
2668 p[0] = '.';
2670 /* Finally output the exponent. */
2671 *dst++ = upperCase ? 'P': 'p';
2673 return writeSignedDecimal (dst, exponent);
2676 // For good performance it is desirable for different APFloats
2677 // to produce different integers.
2678 uint32_t
2679 APFloat::getHashValue() const
2681 if (category==fcZero) return sign<<8 | semantics->precision ;
2682 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2683 else if (category==fcNaN) return 1<<10 | semantics->precision;
2684 else {
2685 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2686 const integerPart* p = significandParts();
2687 for (int i=partCount(); i>0; i--, p++)
2688 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2689 return hash;
2693 // Conversion from APFloat to/from host float/double. It may eventually be
2694 // possible to eliminate these and have everybody deal with APFloats, but that
2695 // will take a while. This approach will not easily extend to long double.
2696 // Current implementation requires integerPartWidth==64, which is correct at
2697 // the moment but could be made more general.
2699 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2700 // the actual IEEE respresentations. We compensate for that here.
2702 APInt
2703 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2705 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2706 assert(partCount()==2);
2708 uint64_t myexponent, mysignificand;
2710 if (category==fcNormal) {
2711 myexponent = exponent+16383; //bias
2712 mysignificand = significandParts()[0];
2713 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2714 myexponent = 0; // denormal
2715 } else if (category==fcZero) {
2716 myexponent = 0;
2717 mysignificand = 0;
2718 } else if (category==fcInfinity) {
2719 myexponent = 0x7fff;
2720 mysignificand = 0x8000000000000000ULL;
2721 } else {
2722 assert(category == fcNaN && "Unknown category");
2723 myexponent = 0x7fff;
2724 mysignificand = significandParts()[0];
2727 uint64_t words[2];
2728 words[0] = mysignificand;
2729 words[1] = ((uint64_t)(sign & 1) << 15) |
2730 (myexponent & 0x7fffLL);
2731 return APInt(80, 2, words);
2734 APInt
2735 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2737 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2738 assert(partCount()==2);
2740 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2742 if (category==fcNormal) {
2743 myexponent = exponent + 1023; //bias
2744 myexponent2 = exponent2 + 1023;
2745 mysignificand = significandParts()[0];
2746 mysignificand2 = significandParts()[1];
2747 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2748 myexponent = 0; // denormal
2749 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2750 myexponent2 = 0; // denormal
2751 } else if (category==fcZero) {
2752 myexponent = 0;
2753 mysignificand = 0;
2754 myexponent2 = 0;
2755 mysignificand2 = 0;
2756 } else if (category==fcInfinity) {
2757 myexponent = 0x7ff;
2758 myexponent2 = 0;
2759 mysignificand = 0;
2760 mysignificand2 = 0;
2761 } else {
2762 assert(category == fcNaN && "Unknown category");
2763 myexponent = 0x7ff;
2764 mysignificand = significandParts()[0];
2765 myexponent2 = exponent2;
2766 mysignificand2 = significandParts()[1];
2769 uint64_t words[2];
2770 words[0] = ((uint64_t)(sign & 1) << 63) |
2771 ((myexponent & 0x7ff) << 52) |
2772 (mysignificand & 0xfffffffffffffLL);
2773 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2774 ((myexponent2 & 0x7ff) << 52) |
2775 (mysignificand2 & 0xfffffffffffffLL);
2776 return APInt(128, 2, words);
2779 APInt
2780 APFloat::convertQuadrupleAPFloatToAPInt() const
2782 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2783 assert(partCount()==2);
2785 uint64_t myexponent, mysignificand, mysignificand2;
2787 if (category==fcNormal) {
2788 myexponent = exponent+16383; //bias
2789 mysignificand = significandParts()[0];
2790 mysignificand2 = significandParts()[1];
2791 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2792 myexponent = 0; // denormal
2793 } else if (category==fcZero) {
2794 myexponent = 0;
2795 mysignificand = mysignificand2 = 0;
2796 } else if (category==fcInfinity) {
2797 myexponent = 0x7fff;
2798 mysignificand = mysignificand2 = 0;
2799 } else {
2800 assert(category == fcNaN && "Unknown category!");
2801 myexponent = 0x7fff;
2802 mysignificand = significandParts()[0];
2803 mysignificand2 = significandParts()[1];
2806 uint64_t words[2];
2807 words[0] = mysignificand;
2808 words[1] = ((uint64_t)(sign & 1) << 63) |
2809 ((myexponent & 0x7fff) << 48) |
2810 (mysignificand2 & 0xffffffffffffLL);
2812 return APInt(128, 2, words);
2815 APInt
2816 APFloat::convertDoubleAPFloatToAPInt() const
2818 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2819 assert(partCount()==1);
2821 uint64_t myexponent, mysignificand;
2823 if (category==fcNormal) {
2824 myexponent = exponent+1023; //bias
2825 mysignificand = *significandParts();
2826 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2827 myexponent = 0; // denormal
2828 } else if (category==fcZero) {
2829 myexponent = 0;
2830 mysignificand = 0;
2831 } else if (category==fcInfinity) {
2832 myexponent = 0x7ff;
2833 mysignificand = 0;
2834 } else {
2835 assert(category == fcNaN && "Unknown category!");
2836 myexponent = 0x7ff;
2837 mysignificand = *significandParts();
2840 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2841 ((myexponent & 0x7ff) << 52) |
2842 (mysignificand & 0xfffffffffffffLL))));
2845 APInt
2846 APFloat::convertFloatAPFloatToAPInt() const
2848 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2849 assert(partCount()==1);
2851 uint32_t myexponent, mysignificand;
2853 if (category==fcNormal) {
2854 myexponent = exponent+127; //bias
2855 mysignificand = (uint32_t)*significandParts();
2856 if (myexponent == 1 && !(mysignificand & 0x800000))
2857 myexponent = 0; // denormal
2858 } else if (category==fcZero) {
2859 myexponent = 0;
2860 mysignificand = 0;
2861 } else if (category==fcInfinity) {
2862 myexponent = 0xff;
2863 mysignificand = 0;
2864 } else {
2865 assert(category == fcNaN && "Unknown category!");
2866 myexponent = 0xff;
2867 mysignificand = (uint32_t)*significandParts();
2870 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2871 (mysignificand & 0x7fffff)));
2874 APInt
2875 APFloat::convertHalfAPFloatToAPInt() const
2877 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2878 assert(partCount()==1);
2880 uint32_t myexponent, mysignificand;
2882 if (category==fcNormal) {
2883 myexponent = exponent+15; //bias
2884 mysignificand = (uint32_t)*significandParts();
2885 if (myexponent == 1 && !(mysignificand & 0x400))
2886 myexponent = 0; // denormal
2887 } else if (category==fcZero) {
2888 myexponent = 0;
2889 mysignificand = 0;
2890 } else if (category==fcInfinity) {
2891 myexponent = 0x1f;
2892 mysignificand = 0;
2893 } else {
2894 assert(category == fcNaN && "Unknown category!");
2895 myexponent = 0x1f;
2896 mysignificand = (uint32_t)*significandParts();
2899 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2900 (mysignificand & 0x3ff)));
2903 // This function creates an APInt that is just a bit map of the floating
2904 // point constant as it would appear in memory. It is not a conversion,
2905 // and treating the result as a normal integer is unlikely to be useful.
2907 APInt
2908 APFloat::bitcastToAPInt() const
2910 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2911 return convertHalfAPFloatToAPInt();
2913 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2914 return convertFloatAPFloatToAPInt();
2916 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2917 return convertDoubleAPFloatToAPInt();
2919 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2920 return convertQuadrupleAPFloatToAPInt();
2922 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2923 return convertPPCDoubleDoubleAPFloatToAPInt();
2925 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2926 "unknown format!");
2927 return convertF80LongDoubleAPFloatToAPInt();
2930 float
2931 APFloat::convertToFloat() const
2933 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2934 "Float semantics are not IEEEsingle");
2935 APInt api = bitcastToAPInt();
2936 return api.bitsToFloat();
2939 double
2940 APFloat::convertToDouble() const
2942 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2943 "Float semantics are not IEEEdouble");
2944 APInt api = bitcastToAPInt();
2945 return api.bitsToDouble();
2948 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2949 /// does not support these bit patterns:
2950 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2951 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2952 /// exponent = 0, integer bit 1 ("pseudodenormal")
2953 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2954 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2955 void
2956 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2958 assert(api.getBitWidth()==80);
2959 uint64_t i1 = api.getRawData()[0];
2960 uint64_t i2 = api.getRawData()[1];
2961 uint64_t myexponent = (i2 & 0x7fff);
2962 uint64_t mysignificand = i1;
2964 initialize(&APFloat::x87DoubleExtended);
2965 assert(partCount()==2);
2967 sign = static_cast<unsigned int>(i2>>15);
2968 if (myexponent==0 && mysignificand==0) {
2969 // exponent, significand meaningless
2970 category = fcZero;
2971 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2972 // exponent, significand meaningless
2973 category = fcInfinity;
2974 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2975 // exponent meaningless
2976 category = fcNaN;
2977 significandParts()[0] = mysignificand;
2978 significandParts()[1] = 0;
2979 } else {
2980 category = fcNormal;
2981 exponent = myexponent - 16383;
2982 significandParts()[0] = mysignificand;
2983 significandParts()[1] = 0;
2984 if (myexponent==0) // denormal
2985 exponent = -16382;
2989 void
2990 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2992 assert(api.getBitWidth()==128);
2993 uint64_t i1 = api.getRawData()[0];
2994 uint64_t i2 = api.getRawData()[1];
2995 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2996 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2997 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2998 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3000 initialize(&APFloat::PPCDoubleDouble);
3001 assert(partCount()==2);
3003 sign = static_cast<unsigned int>(i1>>63);
3004 sign2 = static_cast<unsigned int>(i2>>63);
3005 if (myexponent==0 && mysignificand==0) {
3006 // exponent, significand meaningless
3007 // exponent2 and significand2 are required to be 0; we don't check
3008 category = fcZero;
3009 } else if (myexponent==0x7ff && mysignificand==0) {
3010 // exponent, significand meaningless
3011 // exponent2 and significand2 are required to be 0; we don't check
3012 category = fcInfinity;
3013 } else if (myexponent==0x7ff && mysignificand!=0) {
3014 // exponent meaningless. So is the whole second word, but keep it
3015 // for determinism.
3016 category = fcNaN;
3017 exponent2 = myexponent2;
3018 significandParts()[0] = mysignificand;
3019 significandParts()[1] = mysignificand2;
3020 } else {
3021 category = fcNormal;
3022 // Note there is no category2; the second word is treated as if it is
3023 // fcNormal, although it might be something else considered by itself.
3024 exponent = myexponent - 1023;
3025 exponent2 = myexponent2 - 1023;
3026 significandParts()[0] = mysignificand;
3027 significandParts()[1] = mysignificand2;
3028 if (myexponent==0) // denormal
3029 exponent = -1022;
3030 else
3031 significandParts()[0] |= 0x10000000000000LL; // integer bit
3032 if (myexponent2==0)
3033 exponent2 = -1022;
3034 else
3035 significandParts()[1] |= 0x10000000000000LL; // integer bit
3039 void
3040 APFloat::initFromQuadrupleAPInt(const APInt &api)
3042 assert(api.getBitWidth()==128);
3043 uint64_t i1 = api.getRawData()[0];
3044 uint64_t i2 = api.getRawData()[1];
3045 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3046 uint64_t mysignificand = i1;
3047 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3049 initialize(&APFloat::IEEEquad);
3050 assert(partCount()==2);
3052 sign = static_cast<unsigned int>(i2>>63);
3053 if (myexponent==0 &&
3054 (mysignificand==0 && mysignificand2==0)) {
3055 // exponent, significand meaningless
3056 category = fcZero;
3057 } else if (myexponent==0x7fff &&
3058 (mysignificand==0 && mysignificand2==0)) {
3059 // exponent, significand meaningless
3060 category = fcInfinity;
3061 } else if (myexponent==0x7fff &&
3062 (mysignificand!=0 || mysignificand2 !=0)) {
3063 // exponent meaningless
3064 category = fcNaN;
3065 significandParts()[0] = mysignificand;
3066 significandParts()[1] = mysignificand2;
3067 } else {
3068 category = fcNormal;
3069 exponent = myexponent - 16383;
3070 significandParts()[0] = mysignificand;
3071 significandParts()[1] = mysignificand2;
3072 if (myexponent==0) // denormal
3073 exponent = -16382;
3074 else
3075 significandParts()[1] |= 0x1000000000000LL; // integer bit
3079 void
3080 APFloat::initFromDoubleAPInt(const APInt &api)
3082 assert(api.getBitWidth()==64);
3083 uint64_t i = *api.getRawData();
3084 uint64_t myexponent = (i >> 52) & 0x7ff;
3085 uint64_t mysignificand = i & 0xfffffffffffffLL;
3087 initialize(&APFloat::IEEEdouble);
3088 assert(partCount()==1);
3090 sign = static_cast<unsigned int>(i>>63);
3091 if (myexponent==0 && mysignificand==0) {
3092 // exponent, significand meaningless
3093 category = fcZero;
3094 } else if (myexponent==0x7ff && mysignificand==0) {
3095 // exponent, significand meaningless
3096 category = fcInfinity;
3097 } else if (myexponent==0x7ff && mysignificand!=0) {
3098 // exponent meaningless
3099 category = fcNaN;
3100 *significandParts() = mysignificand;
3101 } else {
3102 category = fcNormal;
3103 exponent = myexponent - 1023;
3104 *significandParts() = mysignificand;
3105 if (myexponent==0) // denormal
3106 exponent = -1022;
3107 else
3108 *significandParts() |= 0x10000000000000LL; // integer bit
3112 void
3113 APFloat::initFromFloatAPInt(const APInt & api)
3115 assert(api.getBitWidth()==32);
3116 uint32_t i = (uint32_t)*api.getRawData();
3117 uint32_t myexponent = (i >> 23) & 0xff;
3118 uint32_t mysignificand = i & 0x7fffff;
3120 initialize(&APFloat::IEEEsingle);
3121 assert(partCount()==1);
3123 sign = i >> 31;
3124 if (myexponent==0 && mysignificand==0) {
3125 // exponent, significand meaningless
3126 category = fcZero;
3127 } else if (myexponent==0xff && mysignificand==0) {
3128 // exponent, significand meaningless
3129 category = fcInfinity;
3130 } else if (myexponent==0xff && mysignificand!=0) {
3131 // sign, exponent, significand meaningless
3132 category = fcNaN;
3133 *significandParts() = mysignificand;
3134 } else {
3135 category = fcNormal;
3136 exponent = myexponent - 127; //bias
3137 *significandParts() = mysignificand;
3138 if (myexponent==0) // denormal
3139 exponent = -126;
3140 else
3141 *significandParts() |= 0x800000; // integer bit
3145 void
3146 APFloat::initFromHalfAPInt(const APInt & api)
3148 assert(api.getBitWidth()==16);
3149 uint32_t i = (uint32_t)*api.getRawData();
3150 uint32_t myexponent = (i >> 10) & 0x1f;
3151 uint32_t mysignificand = i & 0x3ff;
3153 initialize(&APFloat::IEEEhalf);
3154 assert(partCount()==1);
3156 sign = i >> 15;
3157 if (myexponent==0 && mysignificand==0) {
3158 // exponent, significand meaningless
3159 category = fcZero;
3160 } else if (myexponent==0x1f && mysignificand==0) {
3161 // exponent, significand meaningless
3162 category = fcInfinity;
3163 } else if (myexponent==0x1f && mysignificand!=0) {
3164 // sign, exponent, significand meaningless
3165 category = fcNaN;
3166 *significandParts() = mysignificand;
3167 } else {
3168 category = fcNormal;
3169 exponent = myexponent - 15; //bias
3170 *significandParts() = mysignificand;
3171 if (myexponent==0) // denormal
3172 exponent = -14;
3173 else
3174 *significandParts() |= 0x400; // integer bit
3178 /// Treat api as containing the bits of a floating point number. Currently
3179 /// we infer the floating point type from the size of the APInt. The
3180 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3181 /// when the size is anything else).
3182 void
3183 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3185 if (api.getBitWidth() == 16)
3186 return initFromHalfAPInt(api);
3187 else if (api.getBitWidth() == 32)
3188 return initFromFloatAPInt(api);
3189 else if (api.getBitWidth()==64)
3190 return initFromDoubleAPInt(api);
3191 else if (api.getBitWidth()==80)
3192 return initFromF80LongDoubleAPInt(api);
3193 else if (api.getBitWidth()==128)
3194 return (isIEEE ?
3195 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3196 else
3197 llvm_unreachable(0);
3200 APFloat
3201 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3203 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3206 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3207 APFloat Val(Sem, fcNormal, Negative);
3209 // We want (in interchange format):
3210 // sign = {Negative}
3211 // exponent = 1..10
3212 // significand = 1..1
3214 Val.exponent = Sem.maxExponent; // unbiased
3216 // 1-initialize all bits....
3217 Val.zeroSignificand();
3218 integerPart *significand = Val.significandParts();
3219 unsigned N = partCountForBits(Sem.precision);
3220 for (unsigned i = 0; i != N; ++i)
3221 significand[i] = ~((integerPart) 0);
3223 // ...and then clear the top bits for internal consistency.
3224 significand[N-1] &=
3225 (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
3227 return Val;
3230 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3231 APFloat Val(Sem, fcNormal, Negative);
3233 // We want (in interchange format):
3234 // sign = {Negative}
3235 // exponent = 0..0
3236 // significand = 0..01
3238 Val.exponent = Sem.minExponent; // unbiased
3239 Val.zeroSignificand();
3240 Val.significandParts()[0] = 1;
3241 return Val;
3244 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3245 APFloat Val(Sem, fcNormal, Negative);
3247 // We want (in interchange format):
3248 // sign = {Negative}
3249 // exponent = 0..0
3250 // significand = 10..0
3252 Val.exponent = Sem.minExponent;
3253 Val.zeroSignificand();
3254 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3255 (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
3257 return Val;
3260 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3261 initFromAPInt(api, isIEEE);
3264 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3265 initFromAPInt(APInt::floatToBits(f));
3268 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3269 initFromAPInt(APInt::doubleToBits(d));
3272 namespace {
3273 static void append(SmallVectorImpl<char> &Buffer,
3274 unsigned N, const char *Str) {
3275 unsigned Start = Buffer.size();
3276 Buffer.set_size(Start + N);
3277 memcpy(&Buffer[Start], Str, N);
3280 template <unsigned N>
3281 void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
3282 append(Buffer, N, Str);
3285 /// Removes data from the given significand until it is no more
3286 /// precise than is required for the desired precision.
3287 void AdjustToPrecision(APInt &significand,
3288 int &exp, unsigned FormatPrecision) {
3289 unsigned bits = significand.getActiveBits();
3291 // 196/59 is a very slight overestimate of lg_2(10).
3292 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3294 if (bits <= bitsRequired) return;
3296 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3297 if (!tensRemovable) return;
3299 exp += tensRemovable;
3301 APInt divisor(significand.getBitWidth(), 1);
3302 APInt powten(significand.getBitWidth(), 10);
3303 while (true) {
3304 if (tensRemovable & 1)
3305 divisor *= powten;
3306 tensRemovable >>= 1;
3307 if (!tensRemovable) break;
3308 powten *= powten;
3311 significand = significand.udiv(divisor);
3313 // Truncate the significand down to its active bit count, but
3314 // don't try to drop below 32.
3315 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3316 significand = significand.trunc(newPrecision);
3320 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3321 int &exp, unsigned FormatPrecision) {
3322 unsigned N = buffer.size();
3323 if (N <= FormatPrecision) return;
3325 // The most significant figures are the last ones in the buffer.
3326 unsigned FirstSignificant = N - FormatPrecision;
3328 // Round.
3329 // FIXME: this probably shouldn't use 'round half up'.
3331 // Rounding down is just a truncation, except we also want to drop
3332 // trailing zeros from the new result.
3333 if (buffer[FirstSignificant - 1] < '5') {
3334 while (buffer[FirstSignificant] == '0')
3335 FirstSignificant++;
3337 exp += FirstSignificant;
3338 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3339 return;
3342 // Rounding up requires a decimal add-with-carry. If we continue
3343 // the carry, the newly-introduced zeros will just be truncated.
3344 for (unsigned I = FirstSignificant; I != N; ++I) {
3345 if (buffer[I] == '9') {
3346 FirstSignificant++;
3347 } else {
3348 buffer[I]++;
3349 break;
3353 // If we carried through, we have exactly one digit of precision.
3354 if (FirstSignificant == N) {
3355 exp += FirstSignificant;
3356 buffer.clear();
3357 buffer.push_back('1');
3358 return;
3361 exp += FirstSignificant;
3362 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3366 void APFloat::toString(SmallVectorImpl<char> &Str,
3367 unsigned FormatPrecision,
3368 unsigned FormatMaxPadding) const {
3369 switch (category) {
3370 case fcInfinity:
3371 if (isNegative())
3372 return append(Str, "-Inf");
3373 else
3374 return append(Str, "+Inf");
3376 case fcNaN: return append(Str, "NaN");
3378 case fcZero:
3379 if (isNegative())
3380 Str.push_back('-');
3382 if (!FormatMaxPadding)
3383 append(Str, "0.0E+0");
3384 else
3385 Str.push_back('0');
3386 return;
3388 case fcNormal:
3389 break;
3392 if (isNegative())
3393 Str.push_back('-');
3395 // Decompose the number into an APInt and an exponent.
3396 int exp = exponent - ((int) semantics->precision - 1);
3397 APInt significand(semantics->precision,
3398 partCountForBits(semantics->precision),
3399 significandParts());
3401 // Set FormatPrecision if zero. We want to do this before we
3402 // truncate trailing zeros, as those are part of the precision.
3403 if (!FormatPrecision) {
3404 // It's an interesting question whether to use the nominal
3405 // precision or the active precision here for denormals.
3407 // FormatPrecision = ceil(significandBits / lg_2(10))
3408 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3411 // Ignore trailing binary zeros.
3412 int trailingZeros = significand.countTrailingZeros();
3413 exp += trailingZeros;
3414 significand = significand.lshr(trailingZeros);
3416 // Change the exponent from 2^e to 10^e.
3417 if (exp == 0) {
3418 // Nothing to do.
3419 } else if (exp > 0) {
3420 // Just shift left.
3421 significand = significand.zext(semantics->precision + exp);
3422 significand <<= exp;
3423 exp = 0;
3424 } else { /* exp < 0 */
3425 int texp = -exp;
3427 // We transform this using the identity:
3428 // (N)(2^-e) == (N)(5^e)(10^-e)
3429 // This means we have to multiply N (the significand) by 5^e.
3430 // To avoid overflow, we have to operate on numbers large
3431 // enough to store N * 5^e:
3432 // log2(N * 5^e) == log2(N) + e * log2(5)
3433 // <= semantics->precision + e * 137 / 59
3434 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3436 unsigned precision = semantics->precision + 137 * texp / 59;
3438 // Multiply significand by 5^e.
3439 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3440 significand = significand.zext(precision);
3441 APInt five_to_the_i(precision, 5);
3442 while (true) {
3443 if (texp & 1) significand *= five_to_the_i;
3445 texp >>= 1;
3446 if (!texp) break;
3447 five_to_the_i *= five_to_the_i;
3451 AdjustToPrecision(significand, exp, FormatPrecision);
3453 llvm::SmallVector<char, 256> buffer;
3455 // Fill the buffer.
3456 unsigned precision = significand.getBitWidth();
3457 APInt ten(precision, 10);
3458 APInt digit(precision, 0);
3460 bool inTrail = true;
3461 while (significand != 0) {
3462 // digit <- significand % 10
3463 // significand <- significand / 10
3464 APInt::udivrem(significand, ten, significand, digit);
3466 unsigned d = digit.getZExtValue();
3468 // Drop trailing zeros.
3469 if (inTrail && !d) exp++;
3470 else {
3471 buffer.push_back((char) ('0' + d));
3472 inTrail = false;
3476 assert(!buffer.empty() && "no characters in buffer!");
3478 // Drop down to FormatPrecision.
3479 // TODO: don't do more precise calculations above than are required.
3480 AdjustToPrecision(buffer, exp, FormatPrecision);
3482 unsigned NDigits = buffer.size();
3484 // Check whether we should use scientific notation.
3485 bool FormatScientific;
3486 if (!FormatMaxPadding)
3487 FormatScientific = true;
3488 else {
3489 if (exp >= 0) {
3490 // 765e3 --> 765000
3491 // ^^^
3492 // But we shouldn't make the number look more precise than it is.
3493 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3494 NDigits + (unsigned) exp > FormatPrecision);
3495 } else {
3496 // Power of the most significant digit.
3497 int MSD = exp + (int) (NDigits - 1);
3498 if (MSD >= 0) {
3499 // 765e-2 == 7.65
3500 FormatScientific = false;
3501 } else {
3502 // 765e-5 == 0.00765
3503 // ^ ^^
3504 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3509 // Scientific formatting is pretty straightforward.
3510 if (FormatScientific) {
3511 exp += (NDigits - 1);
3513 Str.push_back(buffer[NDigits-1]);
3514 Str.push_back('.');
3515 if (NDigits == 1)
3516 Str.push_back('0');
3517 else
3518 for (unsigned I = 1; I != NDigits; ++I)
3519 Str.push_back(buffer[NDigits-1-I]);
3520 Str.push_back('E');
3522 Str.push_back(exp >= 0 ? '+' : '-');
3523 if (exp < 0) exp = -exp;
3524 SmallVector<char, 6> expbuf;
3525 do {
3526 expbuf.push_back((char) ('0' + (exp % 10)));
3527 exp /= 10;
3528 } while (exp);
3529 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3530 Str.push_back(expbuf[E-1-I]);
3531 return;
3534 // Non-scientific, positive exponents.
3535 if (exp >= 0) {
3536 for (unsigned I = 0; I != NDigits; ++I)
3537 Str.push_back(buffer[NDigits-1-I]);
3538 for (unsigned I = 0; I != (unsigned) exp; ++I)
3539 Str.push_back('0');
3540 return;
3543 // Non-scientific, negative exponents.
3545 // The number of digits to the left of the decimal point.
3546 int NWholeDigits = exp + (int) NDigits;
3548 unsigned I = 0;
3549 if (NWholeDigits > 0) {
3550 for (; I != (unsigned) NWholeDigits; ++I)
3551 Str.push_back(buffer[NDigits-I-1]);
3552 Str.push_back('.');
3553 } else {
3554 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3556 Str.push_back('0');
3557 Str.push_back('.');
3558 for (unsigned Z = 1; Z != NZeros; ++Z)
3559 Str.push_back('0');
3562 for (; I != NDigits; ++I)
3563 Str.push_back(buffer[NDigits-I-1]);
3566 bool APFloat::getExactInverse(APFloat *inv) const {
3567 // We can only guarantee the existence of an exact inverse for IEEE floats.
3568 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3569 semantics != &IEEEdouble && semantics != &IEEEquad)
3570 return false;
3572 // Special floats and denormals have no exact inverse.
3573 if (category != fcNormal)
3574 return false;
3576 // Check that the number is a power of two by making sure that only the
3577 // integer bit is set in the significand.
3578 if (significandLSB() != semantics->precision - 1)
3579 return false;
3581 // Get the inverse.
3582 APFloat reciprocal(*semantics, 1ULL);
3583 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3584 return false;
3586 // Avoid multiplication with a denormal, it is not safe on all platforms and
3587 // may be slower than a normal division.
3588 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3589 return false;
3591 assert(reciprocal.category == fcNormal &&
3592 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3594 if (inv)
3595 *inv = reciprocal;
3597 return true;