Change allowsUnalignedMemoryAccesses to take type argument since some targets
[llvm/avr.git] / lib / Support / APFloat.cpp
blob214abecf35a32fd8fc100796e246959827131042
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/ErrorHandling.h"
18 #include "llvm/Support/MathExtras.h"
19 #include <cstring>
21 using namespace llvm;
23 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
25 /* Assumed in hexadecimal significand parsing, and conversion to
26 hexadecimal strings. */
27 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
28 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
30 namespace llvm {
32 /* Represents floating point arithmetic semantics. */
33 struct fltSemantics {
34 /* The largest E such that 2^E is representable; this matches the
35 definition of IEEE 754. */
36 exponent_t maxExponent;
38 /* The smallest E such that 2^E is a normalized number; this
39 matches the definition of IEEE 754. */
40 exponent_t minExponent;
42 /* Number of bits in the significand. This includes the integer
43 bit. */
44 unsigned int precision;
46 /* True if arithmetic is supported. */
47 unsigned int arithmeticOK;
50 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
51 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
52 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
53 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
54 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
56 // The PowerPC format consists of two doubles. It does not map cleanly
57 // onto the usual format above. For now only storage of constants of
58 // this type is supported, no arithmetic.
59 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
61 /* A tight upper bound on number of parts required to hold the value
62 pow(5, power) is
64 power * 815 / (351 * integerPartWidth) + 1
66 However, whilst the result may require only this many parts,
67 because we are multiplying two values to get it, the
68 multiplication may require an extra part with the excess part
69 being zero (consider the trivial case of 1 * 1, tcFullMultiply
70 requires two parts to hold the single-part result). So we add an
71 extra one to guarantee enough space whilst multiplying. */
72 const unsigned int maxExponent = 16383;
73 const unsigned int maxPrecision = 113;
74 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
75 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
76 / (351 * integerPartWidth));
79 /* A bunch of private, handy routines. */
81 static inline unsigned int
82 partCountForBits(unsigned int bits)
84 return ((bits) + integerPartWidth - 1) / integerPartWidth;
87 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 static inline unsigned int
89 decDigitValue(unsigned int c)
91 return c - '0';
94 static unsigned int
95 hexDigitValue(unsigned int c)
97 unsigned int r;
99 r = c - '0';
100 if(r <= 9)
101 return r;
103 r = c - 'A';
104 if(r <= 5)
105 return r + 10;
107 r = c - 'a';
108 if(r <= 5)
109 return r + 10;
111 return -1U;
114 static inline void
115 assertArithmeticOK(const llvm::fltSemantics &semantics) {
116 assert(semantics.arithmeticOK
117 && "Compile-time arithmetic does not support these semantics");
120 /* Return the value of a decimal exponent of the form
121 [+-]ddddddd.
123 If the exponent overflows, returns a large exponent with the
124 appropriate sign. */
125 static int
126 readExponent(const char *p)
128 bool isNegative;
129 unsigned int absExponent;
130 const unsigned int overlargeExponent = 24000; /* FIXME. */
132 isNegative = (*p == '-');
133 if (*p == '-' || *p == '+')
134 p++;
136 absExponent = decDigitValue(*p++);
137 assert (absExponent < 10U);
139 for (;;) {
140 unsigned int value;
142 value = decDigitValue(*p);
143 if (value >= 10U)
144 break;
146 p++;
147 value += absExponent * 10;
148 if (absExponent >= overlargeExponent) {
149 absExponent = overlargeExponent;
150 break;
152 absExponent = value;
155 if (isNegative)
156 return -(int) absExponent;
157 else
158 return (int) absExponent;
161 /* This is ugly and needs cleaning up, but I don't immediately see
162 how whilst remaining safe. */
163 static int
164 totalExponent(const char *p, int exponentAdjustment)
166 int unsignedExponent;
167 bool negative, overflow;
168 int exponent;
170 /* Move past the exponent letter and sign to the digits. */
171 p++;
172 negative = *p == '-';
173 if(*p == '-' || *p == '+')
174 p++;
176 unsignedExponent = 0;
177 overflow = false;
178 for(;;) {
179 unsigned int value;
181 value = decDigitValue(*p);
182 if(value >= 10U)
183 break;
185 p++;
186 unsignedExponent = unsignedExponent * 10 + value;
187 if(unsignedExponent > 65535)
188 overflow = true;
191 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
192 overflow = true;
194 if(!overflow) {
195 exponent = unsignedExponent;
196 if(negative)
197 exponent = -exponent;
198 exponent += exponentAdjustment;
199 if(exponent > 65535 || exponent < -65536)
200 overflow = true;
203 if(overflow)
204 exponent = negative ? -65536: 65535;
206 return exponent;
209 static const char *
210 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
212 *dot = 0;
213 while(*p == '0')
214 p++;
216 if(*p == '.') {
217 *dot = p++;
218 while(*p == '0')
219 p++;
222 return p;
225 /* Given a normal decimal floating point number of the form
227 dddd.dddd[eE][+-]ddd
229 where the decimal point and exponent are optional, fill out the
230 structure D. Exponent is appropriate if the significand is
231 treated as an integer, and normalizedExponent if the significand
232 is taken to have the decimal point after a single leading
233 non-zero digit.
235 If the value is zero, V->firstSigDigit points to a non-digit, and
236 the return exponent is zero.
238 struct decimalInfo {
239 const char *firstSigDigit;
240 const char *lastSigDigit;
241 int exponent;
242 int normalizedExponent;
245 static void
246 interpretDecimal(const char *p, decimalInfo *D)
248 const char *dot;
250 p = skipLeadingZeroesAndAnyDot (p, &dot);
252 D->firstSigDigit = p;
253 D->exponent = 0;
254 D->normalizedExponent = 0;
256 for (;;) {
257 if (*p == '.') {
258 assert(dot == 0);
259 dot = p++;
261 if (decDigitValue(*p) >= 10U)
262 break;
263 p++;
266 /* If number is all zerooes accept any exponent. */
267 if (p != D->firstSigDigit) {
268 if (*p == 'e' || *p == 'E')
269 D->exponent = readExponent(p + 1);
271 /* Implied decimal point? */
272 if (!dot)
273 dot = p;
275 /* Drop insignificant trailing zeroes. */
278 p--;
279 while (*p == '0');
280 while (*p == '.');
282 /* Adjust the exponents for any decimal point. */
283 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
284 D->normalizedExponent = (D->exponent +
285 static_cast<exponent_t>((p - D->firstSigDigit)
286 - (dot > D->firstSigDigit && dot < p)));
289 D->lastSigDigit = p;
292 /* Return the trailing fraction of a hexadecimal number.
293 DIGITVALUE is the first hex digit of the fraction, P points to
294 the next digit. */
295 static lostFraction
296 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
298 unsigned int hexDigit;
300 /* If the first trailing digit isn't 0 or 8 we can work out the
301 fraction immediately. */
302 if(digitValue > 8)
303 return lfMoreThanHalf;
304 else if(digitValue < 8 && digitValue > 0)
305 return lfLessThanHalf;
307 /* Otherwise we need to find the first non-zero digit. */
308 while(*p == '0')
309 p++;
311 hexDigit = hexDigitValue(*p);
313 /* If we ran off the end it is exactly zero or one-half, otherwise
314 a little more. */
315 if(hexDigit == -1U)
316 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
317 else
318 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
321 /* Return the fraction lost were a bignum truncated losing the least
322 significant BITS bits. */
323 static lostFraction
324 lostFractionThroughTruncation(const integerPart *parts,
325 unsigned int partCount,
326 unsigned int bits)
328 unsigned int lsb;
330 lsb = APInt::tcLSB(parts, partCount);
332 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
333 if(bits <= lsb)
334 return lfExactlyZero;
335 if(bits == lsb + 1)
336 return lfExactlyHalf;
337 if(bits <= partCount * integerPartWidth
338 && APInt::tcExtractBit(parts, bits - 1))
339 return lfMoreThanHalf;
341 return lfLessThanHalf;
344 /* Shift DST right BITS bits noting lost fraction. */
345 static lostFraction
346 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
348 lostFraction lost_fraction;
350 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
352 APInt::tcShiftRight(dst, parts, bits);
354 return lost_fraction;
357 /* Combine the effect of two lost fractions. */
358 static lostFraction
359 combineLostFractions(lostFraction moreSignificant,
360 lostFraction lessSignificant)
362 if(lessSignificant != lfExactlyZero) {
363 if(moreSignificant == lfExactlyZero)
364 moreSignificant = lfLessThanHalf;
365 else if(moreSignificant == lfExactlyHalf)
366 moreSignificant = lfMoreThanHalf;
369 return moreSignificant;
372 /* The error from the true value, in half-ulps, on multiplying two
373 floating point numbers, which differ from the value they
374 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
375 than the returned value.
377 See "How to Read Floating Point Numbers Accurately" by William D
378 Clinger. */
379 static unsigned int
380 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
382 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
384 if (HUerr1 + HUerr2 == 0)
385 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
386 else
387 return inexactMultiply + 2 * (HUerr1 + HUerr2);
390 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
391 when the least significant BITS are truncated. BITS cannot be
392 zero. */
393 static integerPart
394 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
396 unsigned int count, partBits;
397 integerPart part, boundary;
399 assert (bits != 0);
401 bits--;
402 count = bits / integerPartWidth;
403 partBits = bits % integerPartWidth + 1;
405 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
407 if (isNearest)
408 boundary = (integerPart) 1 << (partBits - 1);
409 else
410 boundary = 0;
412 if (count == 0) {
413 if (part - boundary <= boundary - part)
414 return part - boundary;
415 else
416 return boundary - part;
419 if (part == boundary) {
420 while (--count)
421 if (parts[count])
422 return ~(integerPart) 0; /* A lot. */
424 return parts[0];
425 } else if (part == boundary - 1) {
426 while (--count)
427 if (~parts[count])
428 return ~(integerPart) 0; /* A lot. */
430 return -parts[0];
433 return ~(integerPart) 0; /* A lot. */
436 /* Place pow(5, power) in DST, and return the number of parts used.
437 DST must be at least one part larger than size of the answer. */
438 static unsigned int
439 powerOf5(integerPart *dst, unsigned int power)
441 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
442 15625, 78125 };
443 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
444 pow5s[0] = 78125 * 5;
446 unsigned int partsCount[16] = { 1 };
447 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
448 unsigned int result;
449 assert(power <= maxExponent);
451 p1 = dst;
452 p2 = scratch;
454 *p1 = firstEightPowers[power & 7];
455 power >>= 3;
457 result = 1;
458 pow5 = pow5s;
460 for (unsigned int n = 0; power; power >>= 1, n++) {
461 unsigned int pc;
463 pc = partsCount[n];
465 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
466 if (pc == 0) {
467 pc = partsCount[n - 1];
468 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
469 pc *= 2;
470 if (pow5[pc - 1] == 0)
471 pc--;
472 partsCount[n] = pc;
475 if (power & 1) {
476 integerPart *tmp;
478 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
479 result += pc;
480 if (p2[result - 1] == 0)
481 result--;
483 /* Now result is in p1 with partsCount parts and p2 is scratch
484 space. */
485 tmp = p1, p1 = p2, p2 = tmp;
488 pow5 += pc;
491 if (p1 != dst)
492 APInt::tcAssign(dst, p1, result);
494 return result;
497 /* Zero at the end to avoid modular arithmetic when adding one; used
498 when rounding up during hexadecimal output. */
499 static const char hexDigitsLower[] = "0123456789abcdef0";
500 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
501 static const char infinityL[] = "infinity";
502 static const char infinityU[] = "INFINITY";
503 static const char NaNL[] = "nan";
504 static const char NaNU[] = "NAN";
506 /* Write out an integerPart in hexadecimal, starting with the most
507 significant nibble. Write out exactly COUNT hexdigits, return
508 COUNT. */
509 static unsigned int
510 partAsHex (char *dst, integerPart part, unsigned int count,
511 const char *hexDigitChars)
513 unsigned int result = count;
515 assert (count != 0 && count <= integerPartWidth / 4);
517 part >>= (integerPartWidth - 4 * count);
518 while (count--) {
519 dst[count] = hexDigitChars[part & 0xf];
520 part >>= 4;
523 return result;
526 /* Write out an unsigned decimal integer. */
527 static char *
528 writeUnsignedDecimal (char *dst, unsigned int n)
530 char buff[40], *p;
532 p = buff;
534 *p++ = '0' + n % 10;
535 while (n /= 10);
538 *dst++ = *--p;
539 while (p != buff);
541 return dst;
544 /* Write out a signed decimal integer. */
545 static char *
546 writeSignedDecimal (char *dst, int value)
548 if (value < 0) {
549 *dst++ = '-';
550 dst = writeUnsignedDecimal(dst, -(unsigned) value);
551 } else
552 dst = writeUnsignedDecimal(dst, value);
554 return dst;
557 /* Constructors. */
558 void
559 APFloat::initialize(const fltSemantics *ourSemantics)
561 unsigned int count;
563 semantics = ourSemantics;
564 count = partCount();
565 if(count > 1)
566 significand.parts = new integerPart[count];
569 void
570 APFloat::freeSignificand()
572 if(partCount() > 1)
573 delete [] significand.parts;
576 void
577 APFloat::assign(const APFloat &rhs)
579 assert(semantics == rhs.semantics);
581 sign = rhs.sign;
582 category = rhs.category;
583 exponent = rhs.exponent;
584 sign2 = rhs.sign2;
585 exponent2 = rhs.exponent2;
586 if(category == fcNormal || category == fcNaN)
587 copySignificand(rhs);
590 void
591 APFloat::copySignificand(const APFloat &rhs)
593 assert(category == fcNormal || category == fcNaN);
594 assert(rhs.partCount() >= partCount());
596 APInt::tcAssign(significandParts(), rhs.significandParts(),
597 partCount());
600 /* Make this number a NaN, with an arbitrary but deterministic value
601 for the significand. If double or longer, this is a signalling NaN,
602 which may not be ideal. If float, this is QNaN(0). */
603 void
604 APFloat::makeNaN(unsigned type)
606 category = fcNaN;
607 // FIXME: Add double and long double support for QNaN(0).
608 if (semantics->precision == 24 && semantics->maxExponent == 127) {
609 type |= 0x7fc00000U;
610 type &= ~0x80000000U;
611 } else
612 type = ~0U;
613 APInt::tcSet(significandParts(), type, partCount());
616 APFloat &
617 APFloat::operator=(const APFloat &rhs)
619 if(this != &rhs) {
620 if(semantics != rhs.semantics) {
621 freeSignificand();
622 initialize(rhs.semantics);
624 assign(rhs);
627 return *this;
630 bool
631 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
632 if (this == &rhs)
633 return true;
634 if (semantics != rhs.semantics ||
635 category != rhs.category ||
636 sign != rhs.sign)
637 return false;
638 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
639 sign2 != rhs.sign2)
640 return false;
641 if (category==fcZero || category==fcInfinity)
642 return true;
643 else if (category==fcNormal && exponent!=rhs.exponent)
644 return false;
645 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
646 exponent2!=rhs.exponent2)
647 return false;
648 else {
649 int i= partCount();
650 const integerPart* p=significandParts();
651 const integerPart* q=rhs.significandParts();
652 for (; i>0; i--, p++, q++) {
653 if (*p != *q)
654 return false;
656 return true;
660 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
662 assertArithmeticOK(ourSemantics);
663 initialize(&ourSemantics);
664 sign = 0;
665 zeroSignificand();
666 exponent = ourSemantics.precision - 1;
667 significandParts()[0] = value;
668 normalize(rmNearestTiesToEven, lfExactlyZero);
671 APFloat::APFloat(const fltSemantics &ourSemantics,
672 fltCategory ourCategory, bool negative, unsigned type)
674 assertArithmeticOK(ourSemantics);
675 initialize(&ourSemantics);
676 category = ourCategory;
677 sign = negative;
678 if (category == fcNormal)
679 category = fcZero;
680 else if (ourCategory == fcNaN)
681 makeNaN(type);
684 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
686 assertArithmeticOK(ourSemantics);
687 initialize(&ourSemantics);
688 convertFromString(text, rmNearestTiesToEven);
691 APFloat::APFloat(const APFloat &rhs)
693 initialize(rhs.semantics);
694 assign(rhs);
697 APFloat::~APFloat()
699 freeSignificand();
702 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
703 void APFloat::Profile(FoldingSetNodeID& ID) const {
704 ID.Add(bitcastToAPInt());
707 unsigned int
708 APFloat::partCount() const
710 return partCountForBits(semantics->precision + 1);
713 unsigned int
714 APFloat::semanticsPrecision(const fltSemantics &semantics)
716 return semantics.precision;
719 const integerPart *
720 APFloat::significandParts() const
722 return const_cast<APFloat *>(this)->significandParts();
725 integerPart *
726 APFloat::significandParts()
728 assert(category == fcNormal || category == fcNaN);
730 if(partCount() > 1)
731 return significand.parts;
732 else
733 return &significand.part;
736 void
737 APFloat::zeroSignificand()
739 category = fcNormal;
740 APInt::tcSet(significandParts(), 0, partCount());
743 /* Increment an fcNormal floating point number's significand. */
744 void
745 APFloat::incrementSignificand()
747 integerPart carry;
749 carry = APInt::tcIncrement(significandParts(), partCount());
751 /* Our callers should never cause us to overflow. */
752 assert(carry == 0);
755 /* Add the significand of the RHS. Returns the carry flag. */
756 integerPart
757 APFloat::addSignificand(const APFloat &rhs)
759 integerPart *parts;
761 parts = significandParts();
763 assert(semantics == rhs.semantics);
764 assert(exponent == rhs.exponent);
766 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
769 /* Subtract the significand of the RHS with a borrow flag. Returns
770 the borrow flag. */
771 integerPart
772 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
774 integerPart *parts;
776 parts = significandParts();
778 assert(semantics == rhs.semantics);
779 assert(exponent == rhs.exponent);
781 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
782 partCount());
785 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
786 on to the full-precision result of the multiplication. Returns the
787 lost fraction. */
788 lostFraction
789 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
791 unsigned int omsb; // One, not zero, based MSB.
792 unsigned int partsCount, newPartsCount, precision;
793 integerPart *lhsSignificand;
794 integerPart scratch[4];
795 integerPart *fullSignificand;
796 lostFraction lost_fraction;
797 bool ignored;
799 assert(semantics == rhs.semantics);
801 precision = semantics->precision;
802 newPartsCount = partCountForBits(precision * 2);
804 if(newPartsCount > 4)
805 fullSignificand = new integerPart[newPartsCount];
806 else
807 fullSignificand = scratch;
809 lhsSignificand = significandParts();
810 partsCount = partCount();
812 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
813 rhs.significandParts(), partsCount, partsCount);
815 lost_fraction = lfExactlyZero;
816 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
817 exponent += rhs.exponent;
819 if(addend) {
820 Significand savedSignificand = significand;
821 const fltSemantics *savedSemantics = semantics;
822 fltSemantics extendedSemantics;
823 opStatus status;
824 unsigned int extendedPrecision;
826 /* Normalize our MSB. */
827 extendedPrecision = precision + precision - 1;
828 if(omsb != extendedPrecision)
830 APInt::tcShiftLeft(fullSignificand, newPartsCount,
831 extendedPrecision - omsb);
832 exponent -= extendedPrecision - omsb;
835 /* Create new semantics. */
836 extendedSemantics = *semantics;
837 extendedSemantics.precision = extendedPrecision;
839 if(newPartsCount == 1)
840 significand.part = fullSignificand[0];
841 else
842 significand.parts = fullSignificand;
843 semantics = &extendedSemantics;
845 APFloat extendedAddend(*addend);
846 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
847 assert(status == opOK);
848 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
850 /* Restore our state. */
851 if(newPartsCount == 1)
852 fullSignificand[0] = significand.part;
853 significand = savedSignificand;
854 semantics = savedSemantics;
856 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
859 exponent -= (precision - 1);
861 if(omsb > precision) {
862 unsigned int bits, significantParts;
863 lostFraction lf;
865 bits = omsb - precision;
866 significantParts = partCountForBits(omsb);
867 lf = shiftRight(fullSignificand, significantParts, bits);
868 lost_fraction = combineLostFractions(lf, lost_fraction);
869 exponent += bits;
872 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
874 if(newPartsCount > 4)
875 delete [] fullSignificand;
877 return lost_fraction;
880 /* Multiply the significands of LHS and RHS to DST. */
881 lostFraction
882 APFloat::divideSignificand(const APFloat &rhs)
884 unsigned int bit, i, partsCount;
885 const integerPart *rhsSignificand;
886 integerPart *lhsSignificand, *dividend, *divisor;
887 integerPart scratch[4];
888 lostFraction lost_fraction;
890 assert(semantics == rhs.semantics);
892 lhsSignificand = significandParts();
893 rhsSignificand = rhs.significandParts();
894 partsCount = partCount();
896 if(partsCount > 2)
897 dividend = new integerPart[partsCount * 2];
898 else
899 dividend = scratch;
901 divisor = dividend + partsCount;
903 /* Copy the dividend and divisor as they will be modified in-place. */
904 for(i = 0; i < partsCount; i++) {
905 dividend[i] = lhsSignificand[i];
906 divisor[i] = rhsSignificand[i];
907 lhsSignificand[i] = 0;
910 exponent -= rhs.exponent;
912 unsigned int precision = semantics->precision;
914 /* Normalize the divisor. */
915 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
916 if(bit) {
917 exponent += bit;
918 APInt::tcShiftLeft(divisor, partsCount, bit);
921 /* Normalize the dividend. */
922 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
923 if(bit) {
924 exponent -= bit;
925 APInt::tcShiftLeft(dividend, partsCount, bit);
928 /* Ensure the dividend >= divisor initially for the loop below.
929 Incidentally, this means that the division loop below is
930 guaranteed to set the integer bit to one. */
931 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
932 exponent--;
933 APInt::tcShiftLeft(dividend, partsCount, 1);
934 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
937 /* Long division. */
938 for(bit = precision; bit; bit -= 1) {
939 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
940 APInt::tcSubtract(dividend, divisor, 0, partsCount);
941 APInt::tcSetBit(lhsSignificand, bit - 1);
944 APInt::tcShiftLeft(dividend, partsCount, 1);
947 /* Figure out the lost fraction. */
948 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
950 if(cmp > 0)
951 lost_fraction = lfMoreThanHalf;
952 else if(cmp == 0)
953 lost_fraction = lfExactlyHalf;
954 else if(APInt::tcIsZero(dividend, partsCount))
955 lost_fraction = lfExactlyZero;
956 else
957 lost_fraction = lfLessThanHalf;
959 if(partsCount > 2)
960 delete [] dividend;
962 return lost_fraction;
965 unsigned int
966 APFloat::significandMSB() const
968 return APInt::tcMSB(significandParts(), partCount());
971 unsigned int
972 APFloat::significandLSB() const
974 return APInt::tcLSB(significandParts(), partCount());
977 /* Note that a zero result is NOT normalized to fcZero. */
978 lostFraction
979 APFloat::shiftSignificandRight(unsigned int bits)
981 /* Our exponent should not overflow. */
982 assert((exponent_t) (exponent + bits) >= exponent);
984 exponent += bits;
986 return shiftRight(significandParts(), partCount(), bits);
989 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
990 void
991 APFloat::shiftSignificandLeft(unsigned int bits)
993 assert(bits < semantics->precision);
995 if(bits) {
996 unsigned int partsCount = partCount();
998 APInt::tcShiftLeft(significandParts(), partsCount, bits);
999 exponent -= bits;
1001 assert(!APInt::tcIsZero(significandParts(), partsCount));
1005 APFloat::cmpResult
1006 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1008 int compare;
1010 assert(semantics == rhs.semantics);
1011 assert(category == fcNormal);
1012 assert(rhs.category == fcNormal);
1014 compare = exponent - rhs.exponent;
1016 /* If exponents are equal, do an unsigned bignum comparison of the
1017 significands. */
1018 if(compare == 0)
1019 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1020 partCount());
1022 if(compare > 0)
1023 return cmpGreaterThan;
1024 else if(compare < 0)
1025 return cmpLessThan;
1026 else
1027 return cmpEqual;
1030 /* Handle overflow. Sign is preserved. We either become infinity or
1031 the largest finite number. */
1032 APFloat::opStatus
1033 APFloat::handleOverflow(roundingMode rounding_mode)
1035 /* Infinity? */
1036 if(rounding_mode == rmNearestTiesToEven
1037 || rounding_mode == rmNearestTiesToAway
1038 || (rounding_mode == rmTowardPositive && !sign)
1039 || (rounding_mode == rmTowardNegative && sign))
1041 category = fcInfinity;
1042 return (opStatus) (opOverflow | opInexact);
1045 /* Otherwise we become the largest finite number. */
1046 category = fcNormal;
1047 exponent = semantics->maxExponent;
1048 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1049 semantics->precision);
1051 return opInexact;
1054 /* Returns TRUE if, when truncating the current number, with BIT the
1055 new LSB, with the given lost fraction and rounding mode, the result
1056 would need to be rounded away from zero (i.e., by increasing the
1057 signficand). This routine must work for fcZero of both signs, and
1058 fcNormal numbers. */
1059 bool
1060 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1061 lostFraction lost_fraction,
1062 unsigned int bit) const
1064 /* NaNs and infinities should not have lost fractions. */
1065 assert(category == fcNormal || category == fcZero);
1067 /* Current callers never pass this so we don't handle it. */
1068 assert(lost_fraction != lfExactlyZero);
1070 switch (rounding_mode) {
1071 default:
1072 llvm_unreachable(0);
1074 case rmNearestTiesToAway:
1075 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1077 case rmNearestTiesToEven:
1078 if(lost_fraction == lfMoreThanHalf)
1079 return true;
1081 /* Our zeroes don't have a significand to test. */
1082 if(lost_fraction == lfExactlyHalf && category != fcZero)
1083 return APInt::tcExtractBit(significandParts(), bit);
1085 return false;
1087 case rmTowardZero:
1088 return false;
1090 case rmTowardPositive:
1091 return sign == false;
1093 case rmTowardNegative:
1094 return sign == true;
1098 APFloat::opStatus
1099 APFloat::normalize(roundingMode rounding_mode,
1100 lostFraction lost_fraction)
1102 unsigned int omsb; /* One, not zero, based MSB. */
1103 int exponentChange;
1105 if(category != fcNormal)
1106 return opOK;
1108 /* Before rounding normalize the exponent of fcNormal numbers. */
1109 omsb = significandMSB() + 1;
1111 if(omsb) {
1112 /* OMSB is numbered from 1. We want to place it in the integer
1113 bit numbered PRECISON if possible, with a compensating change in
1114 the exponent. */
1115 exponentChange = omsb - semantics->precision;
1117 /* If the resulting exponent is too high, overflow according to
1118 the rounding mode. */
1119 if(exponent + exponentChange > semantics->maxExponent)
1120 return handleOverflow(rounding_mode);
1122 /* Subnormal numbers have exponent minExponent, and their MSB
1123 is forced based on that. */
1124 if(exponent + exponentChange < semantics->minExponent)
1125 exponentChange = semantics->minExponent - exponent;
1127 /* Shifting left is easy as we don't lose precision. */
1128 if(exponentChange < 0) {
1129 assert(lost_fraction == lfExactlyZero);
1131 shiftSignificandLeft(-exponentChange);
1133 return opOK;
1136 if(exponentChange > 0) {
1137 lostFraction lf;
1139 /* Shift right and capture any new lost fraction. */
1140 lf = shiftSignificandRight(exponentChange);
1142 lost_fraction = combineLostFractions(lf, lost_fraction);
1144 /* Keep OMSB up-to-date. */
1145 if(omsb > (unsigned) exponentChange)
1146 omsb -= exponentChange;
1147 else
1148 omsb = 0;
1152 /* Now round the number according to rounding_mode given the lost
1153 fraction. */
1155 /* As specified in IEEE 754, since we do not trap we do not report
1156 underflow for exact results. */
1157 if(lost_fraction == lfExactlyZero) {
1158 /* Canonicalize zeroes. */
1159 if(omsb == 0)
1160 category = fcZero;
1162 return opOK;
1165 /* Increment the significand if we're rounding away from zero. */
1166 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1167 if(omsb == 0)
1168 exponent = semantics->minExponent;
1170 incrementSignificand();
1171 omsb = significandMSB() + 1;
1173 /* Did the significand increment overflow? */
1174 if(omsb == (unsigned) semantics->precision + 1) {
1175 /* Renormalize by incrementing the exponent and shifting our
1176 significand right one. However if we already have the
1177 maximum exponent we overflow to infinity. */
1178 if(exponent == semantics->maxExponent) {
1179 category = fcInfinity;
1181 return (opStatus) (opOverflow | opInexact);
1184 shiftSignificandRight(1);
1186 return opInexact;
1190 /* The normal case - we were and are not denormal, and any
1191 significand increment above didn't overflow. */
1192 if(omsb == semantics->precision)
1193 return opInexact;
1195 /* We have a non-zero denormal. */
1196 assert(omsb < semantics->precision);
1198 /* Canonicalize zeroes. */
1199 if(omsb == 0)
1200 category = fcZero;
1202 /* The fcZero case is a denormal that underflowed to zero. */
1203 return (opStatus) (opUnderflow | opInexact);
1206 APFloat::opStatus
1207 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1209 switch (convolve(category, rhs.category)) {
1210 default:
1211 llvm_unreachable(0);
1213 case convolve(fcNaN, fcZero):
1214 case convolve(fcNaN, fcNormal):
1215 case convolve(fcNaN, fcInfinity):
1216 case convolve(fcNaN, fcNaN):
1217 case convolve(fcNormal, fcZero):
1218 case convolve(fcInfinity, fcNormal):
1219 case convolve(fcInfinity, fcZero):
1220 return opOK;
1222 case convolve(fcZero, fcNaN):
1223 case convolve(fcNormal, fcNaN):
1224 case convolve(fcInfinity, fcNaN):
1225 category = fcNaN;
1226 copySignificand(rhs);
1227 return opOK;
1229 case convolve(fcNormal, fcInfinity):
1230 case convolve(fcZero, fcInfinity):
1231 category = fcInfinity;
1232 sign = rhs.sign ^ subtract;
1233 return opOK;
1235 case convolve(fcZero, fcNormal):
1236 assign(rhs);
1237 sign = rhs.sign ^ subtract;
1238 return opOK;
1240 case convolve(fcZero, fcZero):
1241 /* Sign depends on rounding mode; handled by caller. */
1242 return opOK;
1244 case convolve(fcInfinity, fcInfinity):
1245 /* Differently signed infinities can only be validly
1246 subtracted. */
1247 if(((sign ^ rhs.sign)!=0) != subtract) {
1248 makeNaN();
1249 return opInvalidOp;
1252 return opOK;
1254 case convolve(fcNormal, fcNormal):
1255 return opDivByZero;
1259 /* Add or subtract two normal numbers. */
1260 lostFraction
1261 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1263 integerPart carry;
1264 lostFraction lost_fraction;
1265 int bits;
1267 /* Determine if the operation on the absolute values is effectively
1268 an addition or subtraction. */
1269 subtract ^= (sign ^ rhs.sign) ? true : false;
1271 /* Are we bigger exponent-wise than the RHS? */
1272 bits = exponent - rhs.exponent;
1274 /* Subtraction is more subtle than one might naively expect. */
1275 if(subtract) {
1276 APFloat temp_rhs(rhs);
1277 bool reverse;
1279 if (bits == 0) {
1280 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1281 lost_fraction = lfExactlyZero;
1282 } else if (bits > 0) {
1283 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1284 shiftSignificandLeft(1);
1285 reverse = false;
1286 } else {
1287 lost_fraction = shiftSignificandRight(-bits - 1);
1288 temp_rhs.shiftSignificandLeft(1);
1289 reverse = true;
1292 if (reverse) {
1293 carry = temp_rhs.subtractSignificand
1294 (*this, lost_fraction != lfExactlyZero);
1295 copySignificand(temp_rhs);
1296 sign = !sign;
1297 } else {
1298 carry = subtractSignificand
1299 (temp_rhs, lost_fraction != lfExactlyZero);
1302 /* Invert the lost fraction - it was on the RHS and
1303 subtracted. */
1304 if(lost_fraction == lfLessThanHalf)
1305 lost_fraction = lfMoreThanHalf;
1306 else if(lost_fraction == lfMoreThanHalf)
1307 lost_fraction = lfLessThanHalf;
1309 /* The code above is intended to ensure that no borrow is
1310 necessary. */
1311 assert(!carry);
1312 } else {
1313 if(bits > 0) {
1314 APFloat temp_rhs(rhs);
1316 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1317 carry = addSignificand(temp_rhs);
1318 } else {
1319 lost_fraction = shiftSignificandRight(-bits);
1320 carry = addSignificand(rhs);
1323 /* We have a guard bit; generating a carry cannot happen. */
1324 assert(!carry);
1327 return lost_fraction;
1330 APFloat::opStatus
1331 APFloat::multiplySpecials(const APFloat &rhs)
1333 switch (convolve(category, rhs.category)) {
1334 default:
1335 llvm_unreachable(0);
1337 case convolve(fcNaN, fcZero):
1338 case convolve(fcNaN, fcNormal):
1339 case convolve(fcNaN, fcInfinity):
1340 case convolve(fcNaN, fcNaN):
1341 return opOK;
1343 case convolve(fcZero, fcNaN):
1344 case convolve(fcNormal, fcNaN):
1345 case convolve(fcInfinity, fcNaN):
1346 category = fcNaN;
1347 copySignificand(rhs);
1348 return opOK;
1350 case convolve(fcNormal, fcInfinity):
1351 case convolve(fcInfinity, fcNormal):
1352 case convolve(fcInfinity, fcInfinity):
1353 category = fcInfinity;
1354 return opOK;
1356 case convolve(fcZero, fcNormal):
1357 case convolve(fcNormal, fcZero):
1358 case convolve(fcZero, fcZero):
1359 category = fcZero;
1360 return opOK;
1362 case convolve(fcZero, fcInfinity):
1363 case convolve(fcInfinity, fcZero):
1364 makeNaN();
1365 return opInvalidOp;
1367 case convolve(fcNormal, fcNormal):
1368 return opOK;
1372 APFloat::opStatus
1373 APFloat::divideSpecials(const APFloat &rhs)
1375 switch (convolve(category, rhs.category)) {
1376 default:
1377 llvm_unreachable(0);
1379 case convolve(fcNaN, fcZero):
1380 case convolve(fcNaN, fcNormal):
1381 case convolve(fcNaN, fcInfinity):
1382 case convolve(fcNaN, fcNaN):
1383 case convolve(fcInfinity, fcZero):
1384 case convolve(fcInfinity, fcNormal):
1385 case convolve(fcZero, fcInfinity):
1386 case convolve(fcZero, fcNormal):
1387 return opOK;
1389 case convolve(fcZero, fcNaN):
1390 case convolve(fcNormal, fcNaN):
1391 case convolve(fcInfinity, fcNaN):
1392 category = fcNaN;
1393 copySignificand(rhs);
1394 return opOK;
1396 case convolve(fcNormal, fcInfinity):
1397 category = fcZero;
1398 return opOK;
1400 case convolve(fcNormal, fcZero):
1401 category = fcInfinity;
1402 return opDivByZero;
1404 case convolve(fcInfinity, fcInfinity):
1405 case convolve(fcZero, fcZero):
1406 makeNaN();
1407 return opInvalidOp;
1409 case convolve(fcNormal, fcNormal):
1410 return opOK;
1414 APFloat::opStatus
1415 APFloat::modSpecials(const APFloat &rhs)
1417 switch (convolve(category, rhs.category)) {
1418 default:
1419 llvm_unreachable(0);
1421 case convolve(fcNaN, fcZero):
1422 case convolve(fcNaN, fcNormal):
1423 case convolve(fcNaN, fcInfinity):
1424 case convolve(fcNaN, fcNaN):
1425 case convolve(fcZero, fcInfinity):
1426 case convolve(fcZero, fcNormal):
1427 case convolve(fcNormal, fcInfinity):
1428 return opOK;
1430 case convolve(fcZero, fcNaN):
1431 case convolve(fcNormal, fcNaN):
1432 case convolve(fcInfinity, fcNaN):
1433 category = fcNaN;
1434 copySignificand(rhs);
1435 return opOK;
1437 case convolve(fcNormal, fcZero):
1438 case convolve(fcInfinity, fcZero):
1439 case convolve(fcInfinity, fcNormal):
1440 case convolve(fcInfinity, fcInfinity):
1441 case convolve(fcZero, fcZero):
1442 makeNaN();
1443 return opInvalidOp;
1445 case convolve(fcNormal, fcNormal):
1446 return opOK;
1450 /* Change sign. */
1451 void
1452 APFloat::changeSign()
1454 /* Look mummy, this one's easy. */
1455 sign = !sign;
1458 void
1459 APFloat::clearSign()
1461 /* So is this one. */
1462 sign = 0;
1465 void
1466 APFloat::copySign(const APFloat &rhs)
1468 /* And this one. */
1469 sign = rhs.sign;
1472 /* Normalized addition or subtraction. */
1473 APFloat::opStatus
1474 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1475 bool subtract)
1477 opStatus fs;
1479 assertArithmeticOK(*semantics);
1481 fs = addOrSubtractSpecials(rhs, subtract);
1483 /* This return code means it was not a simple case. */
1484 if(fs == opDivByZero) {
1485 lostFraction lost_fraction;
1487 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1488 fs = normalize(rounding_mode, lost_fraction);
1490 /* Can only be zero if we lost no fraction. */
1491 assert(category != fcZero || lost_fraction == lfExactlyZero);
1494 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1495 positive zero unless rounding to minus infinity, except that
1496 adding two like-signed zeroes gives that zero. */
1497 if(category == fcZero) {
1498 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1499 sign = (rounding_mode == rmTowardNegative);
1502 return fs;
1505 /* Normalized addition. */
1506 APFloat::opStatus
1507 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1509 return addOrSubtract(rhs, rounding_mode, false);
1512 /* Normalized subtraction. */
1513 APFloat::opStatus
1514 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1516 return addOrSubtract(rhs, rounding_mode, true);
1519 /* Normalized multiply. */
1520 APFloat::opStatus
1521 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1523 opStatus fs;
1525 assertArithmeticOK(*semantics);
1526 sign ^= rhs.sign;
1527 fs = multiplySpecials(rhs);
1529 if(category == fcNormal) {
1530 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1531 fs = normalize(rounding_mode, lost_fraction);
1532 if(lost_fraction != lfExactlyZero)
1533 fs = (opStatus) (fs | opInexact);
1536 return fs;
1539 /* Normalized divide. */
1540 APFloat::opStatus
1541 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1543 opStatus fs;
1545 assertArithmeticOK(*semantics);
1546 sign ^= rhs.sign;
1547 fs = divideSpecials(rhs);
1549 if(category == fcNormal) {
1550 lostFraction lost_fraction = divideSignificand(rhs);
1551 fs = normalize(rounding_mode, lost_fraction);
1552 if(lost_fraction != lfExactlyZero)
1553 fs = (opStatus) (fs | opInexact);
1556 return fs;
1559 /* Normalized remainder. This is not currently correct in all cases. */
1560 APFloat::opStatus
1561 APFloat::remainder(const APFloat &rhs)
1563 opStatus fs;
1564 APFloat V = *this;
1565 unsigned int origSign = sign;
1567 assertArithmeticOK(*semantics);
1568 fs = V.divide(rhs, rmNearestTiesToEven);
1569 if (fs == opDivByZero)
1570 return fs;
1572 int parts = partCount();
1573 integerPart *x = new integerPart[parts];
1574 bool ignored;
1575 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1576 rmNearestTiesToEven, &ignored);
1577 if (fs==opInvalidOp)
1578 return fs;
1580 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1581 rmNearestTiesToEven);
1582 assert(fs==opOK); // should always work
1584 fs = V.multiply(rhs, rmNearestTiesToEven);
1585 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1587 fs = subtract(V, rmNearestTiesToEven);
1588 assert(fs==opOK || fs==opInexact); // likewise
1590 if (isZero())
1591 sign = origSign; // IEEE754 requires this
1592 delete[] x;
1593 return fs;
1596 /* Normalized llvm frem (C fmod).
1597 This is not currently correct in all cases. */
1598 APFloat::opStatus
1599 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1601 opStatus fs;
1602 assertArithmeticOK(*semantics);
1603 fs = modSpecials(rhs);
1605 if (category == fcNormal && rhs.category == fcNormal) {
1606 APFloat V = *this;
1607 unsigned int origSign = sign;
1609 fs = V.divide(rhs, rmNearestTiesToEven);
1610 if (fs == opDivByZero)
1611 return fs;
1613 int parts = partCount();
1614 integerPart *x = new integerPart[parts];
1615 bool ignored;
1616 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1617 rmTowardZero, &ignored);
1618 if (fs==opInvalidOp)
1619 return fs;
1621 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1622 rmNearestTiesToEven);
1623 assert(fs==opOK); // should always work
1625 fs = V.multiply(rhs, rounding_mode);
1626 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1628 fs = subtract(V, rounding_mode);
1629 assert(fs==opOK || fs==opInexact); // likewise
1631 if (isZero())
1632 sign = origSign; // IEEE754 requires this
1633 delete[] x;
1635 return fs;
1638 /* Normalized fused-multiply-add. */
1639 APFloat::opStatus
1640 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1641 const APFloat &addend,
1642 roundingMode rounding_mode)
1644 opStatus fs;
1646 assertArithmeticOK(*semantics);
1648 /* Post-multiplication sign, before addition. */
1649 sign ^= multiplicand.sign;
1651 /* If and only if all arguments are normal do we need to do an
1652 extended-precision calculation. */
1653 if(category == fcNormal
1654 && multiplicand.category == fcNormal
1655 && addend.category == fcNormal) {
1656 lostFraction lost_fraction;
1658 lost_fraction = multiplySignificand(multiplicand, &addend);
1659 fs = normalize(rounding_mode, lost_fraction);
1660 if(lost_fraction != lfExactlyZero)
1661 fs = (opStatus) (fs | opInexact);
1663 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1664 positive zero unless rounding to minus infinity, except that
1665 adding two like-signed zeroes gives that zero. */
1666 if(category == fcZero && sign != addend.sign)
1667 sign = (rounding_mode == rmTowardNegative);
1668 } else {
1669 fs = multiplySpecials(multiplicand);
1671 /* FS can only be opOK or opInvalidOp. There is no more work
1672 to do in the latter case. The IEEE-754R standard says it is
1673 implementation-defined in this case whether, if ADDEND is a
1674 quiet NaN, we raise invalid op; this implementation does so.
1676 If we need to do the addition we can do so with normal
1677 precision. */
1678 if(fs == opOK)
1679 fs = addOrSubtract(addend, rounding_mode, false);
1682 return fs;
1685 /* Comparison requires normalized numbers. */
1686 APFloat::cmpResult
1687 APFloat::compare(const APFloat &rhs) const
1689 cmpResult result;
1691 assertArithmeticOK(*semantics);
1692 assert(semantics == rhs.semantics);
1694 switch (convolve(category, rhs.category)) {
1695 default:
1696 llvm_unreachable(0);
1698 case convolve(fcNaN, fcZero):
1699 case convolve(fcNaN, fcNormal):
1700 case convolve(fcNaN, fcInfinity):
1701 case convolve(fcNaN, fcNaN):
1702 case convolve(fcZero, fcNaN):
1703 case convolve(fcNormal, fcNaN):
1704 case convolve(fcInfinity, fcNaN):
1705 return cmpUnordered;
1707 case convolve(fcInfinity, fcNormal):
1708 case convolve(fcInfinity, fcZero):
1709 case convolve(fcNormal, fcZero):
1710 if(sign)
1711 return cmpLessThan;
1712 else
1713 return cmpGreaterThan;
1715 case convolve(fcNormal, fcInfinity):
1716 case convolve(fcZero, fcInfinity):
1717 case convolve(fcZero, fcNormal):
1718 if(rhs.sign)
1719 return cmpGreaterThan;
1720 else
1721 return cmpLessThan;
1723 case convolve(fcInfinity, fcInfinity):
1724 if(sign == rhs.sign)
1725 return cmpEqual;
1726 else if(sign)
1727 return cmpLessThan;
1728 else
1729 return cmpGreaterThan;
1731 case convolve(fcZero, fcZero):
1732 return cmpEqual;
1734 case convolve(fcNormal, fcNormal):
1735 break;
1738 /* Two normal numbers. Do they have the same sign? */
1739 if(sign != rhs.sign) {
1740 if(sign)
1741 result = cmpLessThan;
1742 else
1743 result = cmpGreaterThan;
1744 } else {
1745 /* Compare absolute values; invert result if negative. */
1746 result = compareAbsoluteValue(rhs);
1748 if(sign) {
1749 if(result == cmpLessThan)
1750 result = cmpGreaterThan;
1751 else if(result == cmpGreaterThan)
1752 result = cmpLessThan;
1756 return result;
1759 /// APFloat::convert - convert a value of one floating point type to another.
1760 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1761 /// records whether the transformation lost information, i.e. whether
1762 /// converting the result back to the original type will produce the
1763 /// original value (this is almost the same as return value==fsOK, but there
1764 /// are edge cases where this is not so).
1766 APFloat::opStatus
1767 APFloat::convert(const fltSemantics &toSemantics,
1768 roundingMode rounding_mode, bool *losesInfo)
1770 lostFraction lostFraction;
1771 unsigned int newPartCount, oldPartCount;
1772 opStatus fs;
1774 assertArithmeticOK(*semantics);
1775 assertArithmeticOK(toSemantics);
1776 lostFraction = lfExactlyZero;
1777 newPartCount = partCountForBits(toSemantics.precision + 1);
1778 oldPartCount = partCount();
1780 /* Handle storage complications. If our new form is wider,
1781 re-allocate our bit pattern into wider storage. If it is
1782 narrower, we ignore the excess parts, but if narrowing to a
1783 single part we need to free the old storage.
1784 Be careful not to reference significandParts for zeroes
1785 and infinities, since it aborts. */
1786 if (newPartCount > oldPartCount) {
1787 integerPart *newParts;
1788 newParts = new integerPart[newPartCount];
1789 APInt::tcSet(newParts, 0, newPartCount);
1790 if (category==fcNormal || category==fcNaN)
1791 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1792 freeSignificand();
1793 significand.parts = newParts;
1794 } else if (newPartCount < oldPartCount) {
1795 /* Capture any lost fraction through truncation of parts so we get
1796 correct rounding whilst normalizing. */
1797 if (category==fcNormal)
1798 lostFraction = lostFractionThroughTruncation
1799 (significandParts(), oldPartCount, toSemantics.precision);
1800 if (newPartCount == 1) {
1801 integerPart newPart = 0;
1802 if (category==fcNormal || category==fcNaN)
1803 newPart = significandParts()[0];
1804 freeSignificand();
1805 significand.part = newPart;
1809 if(category == fcNormal) {
1810 /* Re-interpret our bit-pattern. */
1811 exponent += toSemantics.precision - semantics->precision;
1812 semantics = &toSemantics;
1813 fs = normalize(rounding_mode, lostFraction);
1814 *losesInfo = (fs != opOK);
1815 } else if (category == fcNaN) {
1816 int shift = toSemantics.precision - semantics->precision;
1817 // Do this now so significandParts gets the right answer
1818 const fltSemantics *oldSemantics = semantics;
1819 semantics = &toSemantics;
1820 *losesInfo = false;
1821 // No normalization here, just truncate
1822 if (shift>0)
1823 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1824 else if (shift < 0) {
1825 unsigned ushift = -shift;
1826 // Figure out if we are losing information. This happens
1827 // if are shifting out something other than 0s, or if the x87 long
1828 // double input did not have its integer bit set (pseudo-NaN), or if the
1829 // x87 long double input did not have its QNan bit set (because the x87
1830 // hardware sets this bit when converting a lower-precision NaN to
1831 // x87 long double).
1832 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1833 *losesInfo = true;
1834 if (oldSemantics == &APFloat::x87DoubleExtended &&
1835 (!(*significandParts() & 0x8000000000000000ULL) ||
1836 !(*significandParts() & 0x4000000000000000ULL)))
1837 *losesInfo = true;
1838 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1840 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1841 // does not give you back the same bits. This is dubious, and we
1842 // don't currently do it. You're really supposed to get
1843 // an invalid operation signal at runtime, but nobody does that.
1844 fs = opOK;
1845 } else {
1846 semantics = &toSemantics;
1847 fs = opOK;
1848 *losesInfo = false;
1851 return fs;
1854 /* Convert a floating point number to an integer according to the
1855 rounding mode. If the rounded integer value is out of range this
1856 returns an invalid operation exception and the contents of the
1857 destination parts are unspecified. If the rounded value is in
1858 range but the floating point number is not the exact integer, the C
1859 standard doesn't require an inexact exception to be raised. IEEE
1860 854 does require it so we do that.
1862 Note that for conversions to integer type the C standard requires
1863 round-to-zero to always be used. */
1864 APFloat::opStatus
1865 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1866 bool isSigned,
1867 roundingMode rounding_mode,
1868 bool *isExact) const
1870 lostFraction lost_fraction;
1871 const integerPart *src;
1872 unsigned int dstPartsCount, truncatedBits;
1874 assertArithmeticOK(*semantics);
1876 *isExact = false;
1878 /* Handle the three special cases first. */
1879 if(category == fcInfinity || category == fcNaN)
1880 return opInvalidOp;
1882 dstPartsCount = partCountForBits(width);
1884 if(category == fcZero) {
1885 APInt::tcSet(parts, 0, dstPartsCount);
1886 // Negative zero can't be represented as an int.
1887 *isExact = !sign;
1888 return opOK;
1891 src = significandParts();
1893 /* Step 1: place our absolute value, with any fraction truncated, in
1894 the destination. */
1895 if (exponent < 0) {
1896 /* Our absolute value is less than one; truncate everything. */
1897 APInt::tcSet(parts, 0, dstPartsCount);
1898 /* For exponent -1 the integer bit represents .5, look at that.
1899 For smaller exponents leftmost truncated bit is 0. */
1900 truncatedBits = semantics->precision -1U - exponent;
1901 } else {
1902 /* We want the most significant (exponent + 1) bits; the rest are
1903 truncated. */
1904 unsigned int bits = exponent + 1U;
1906 /* Hopelessly large in magnitude? */
1907 if (bits > width)
1908 return opInvalidOp;
1910 if (bits < semantics->precision) {
1911 /* We truncate (semantics->precision - bits) bits. */
1912 truncatedBits = semantics->precision - bits;
1913 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1914 } else {
1915 /* We want at least as many bits as are available. */
1916 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1917 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1918 truncatedBits = 0;
1922 /* Step 2: work out any lost fraction, and increment the absolute
1923 value if we would round away from zero. */
1924 if (truncatedBits) {
1925 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1926 truncatedBits);
1927 if (lost_fraction != lfExactlyZero
1928 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1929 if (APInt::tcIncrement(parts, dstPartsCount))
1930 return opInvalidOp; /* Overflow. */
1932 } else {
1933 lost_fraction = lfExactlyZero;
1936 /* Step 3: check if we fit in the destination. */
1937 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1939 if (sign) {
1940 if (!isSigned) {
1941 /* Negative numbers cannot be represented as unsigned. */
1942 if (omsb != 0)
1943 return opInvalidOp;
1944 } else {
1945 /* It takes omsb bits to represent the unsigned integer value.
1946 We lose a bit for the sign, but care is needed as the
1947 maximally negative integer is a special case. */
1948 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1949 return opInvalidOp;
1951 /* This case can happen because of rounding. */
1952 if (omsb > width)
1953 return opInvalidOp;
1956 APInt::tcNegate (parts, dstPartsCount);
1957 } else {
1958 if (omsb >= width + !isSigned)
1959 return opInvalidOp;
1962 if (lost_fraction == lfExactlyZero) {
1963 *isExact = true;
1964 return opOK;
1965 } else
1966 return opInexact;
1969 /* Same as convertToSignExtendedInteger, except we provide
1970 deterministic values in case of an invalid operation exception,
1971 namely zero for NaNs and the minimal or maximal value respectively
1972 for underflow or overflow.
1973 The *isExact output tells whether the result is exact, in the sense
1974 that converting it back to the original floating point type produces
1975 the original value. This is almost equivalent to result==opOK,
1976 except for negative zeroes.
1978 APFloat::opStatus
1979 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1980 bool isSigned,
1981 roundingMode rounding_mode, bool *isExact) const
1983 opStatus fs;
1985 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
1986 isExact);
1988 if (fs == opInvalidOp) {
1989 unsigned int bits, dstPartsCount;
1991 dstPartsCount = partCountForBits(width);
1993 if (category == fcNaN)
1994 bits = 0;
1995 else if (sign)
1996 bits = isSigned;
1997 else
1998 bits = width - isSigned;
2000 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2001 if (sign && isSigned)
2002 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2005 return fs;
2008 /* Convert an unsigned integer SRC to a floating point number,
2009 rounding according to ROUNDING_MODE. The sign of the floating
2010 point number is not modified. */
2011 APFloat::opStatus
2012 APFloat::convertFromUnsignedParts(const integerPart *src,
2013 unsigned int srcCount,
2014 roundingMode rounding_mode)
2016 unsigned int omsb, precision, dstCount;
2017 integerPart *dst;
2018 lostFraction lost_fraction;
2020 assertArithmeticOK(*semantics);
2021 category = fcNormal;
2022 omsb = APInt::tcMSB(src, srcCount) + 1;
2023 dst = significandParts();
2024 dstCount = partCount();
2025 precision = semantics->precision;
2027 /* We want the most significant PRECISON bits of SRC. There may not
2028 be that many; extract what we can. */
2029 if (precision <= omsb) {
2030 exponent = omsb - 1;
2031 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2032 omsb - precision);
2033 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2034 } else {
2035 exponent = precision - 1;
2036 lost_fraction = lfExactlyZero;
2037 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2040 return normalize(rounding_mode, lost_fraction);
2043 APFloat::opStatus
2044 APFloat::convertFromAPInt(const APInt &Val,
2045 bool isSigned,
2046 roundingMode rounding_mode)
2048 unsigned int partCount = Val.getNumWords();
2049 APInt api = Val;
2051 sign = false;
2052 if (isSigned && api.isNegative()) {
2053 sign = true;
2054 api = -api;
2057 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2060 /* Convert a two's complement integer SRC to a floating point number,
2061 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2062 integer is signed, in which case it must be sign-extended. */
2063 APFloat::opStatus
2064 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2065 unsigned int srcCount,
2066 bool isSigned,
2067 roundingMode rounding_mode)
2069 opStatus status;
2071 assertArithmeticOK(*semantics);
2072 if (isSigned
2073 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2074 integerPart *copy;
2076 /* If we're signed and negative negate a copy. */
2077 sign = true;
2078 copy = new integerPart[srcCount];
2079 APInt::tcAssign(copy, src, srcCount);
2080 APInt::tcNegate(copy, srcCount);
2081 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2082 delete [] copy;
2083 } else {
2084 sign = false;
2085 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2088 return status;
2091 /* FIXME: should this just take a const APInt reference? */
2092 APFloat::opStatus
2093 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2094 unsigned int width, bool isSigned,
2095 roundingMode rounding_mode)
2097 unsigned int partCount = partCountForBits(width);
2098 APInt api = APInt(width, partCount, parts);
2100 sign = false;
2101 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2102 sign = true;
2103 api = -api;
2106 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2109 APFloat::opStatus
2110 APFloat::convertFromHexadecimalString(const char *p,
2111 roundingMode rounding_mode)
2113 lostFraction lost_fraction;
2114 integerPart *significand;
2115 unsigned int bitPos, partsCount;
2116 const char *dot, *firstSignificantDigit;
2118 zeroSignificand();
2119 exponent = 0;
2120 category = fcNormal;
2122 significand = significandParts();
2123 partsCount = partCount();
2124 bitPos = partsCount * integerPartWidth;
2126 /* Skip leading zeroes and any (hexa)decimal point. */
2127 p = skipLeadingZeroesAndAnyDot(p, &dot);
2128 firstSignificantDigit = p;
2130 for(;;) {
2131 integerPart hex_value;
2133 if(*p == '.') {
2134 assert(dot == 0);
2135 dot = p++;
2138 hex_value = hexDigitValue(*p);
2139 if(hex_value == -1U) {
2140 lost_fraction = lfExactlyZero;
2141 break;
2144 p++;
2146 /* Store the number whilst 4-bit nibbles remain. */
2147 if(bitPos) {
2148 bitPos -= 4;
2149 hex_value <<= bitPos % integerPartWidth;
2150 significand[bitPos / integerPartWidth] |= hex_value;
2151 } else {
2152 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2153 while(hexDigitValue(*p) != -1U)
2154 p++;
2155 break;
2159 /* Hex floats require an exponent but not a hexadecimal point. */
2160 assert(*p == 'p' || *p == 'P');
2162 /* Ignore the exponent if we are zero. */
2163 if(p != firstSignificantDigit) {
2164 int expAdjustment;
2166 /* Implicit hexadecimal point? */
2167 if(!dot)
2168 dot = p;
2170 /* Calculate the exponent adjustment implicit in the number of
2171 significant digits. */
2172 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2173 if(expAdjustment < 0)
2174 expAdjustment++;
2175 expAdjustment = expAdjustment * 4 - 1;
2177 /* Adjust for writing the significand starting at the most
2178 significant nibble. */
2179 expAdjustment += semantics->precision;
2180 expAdjustment -= partsCount * integerPartWidth;
2182 /* Adjust for the given exponent. */
2183 exponent = totalExponent(p, expAdjustment);
2186 return normalize(rounding_mode, lost_fraction);
2189 APFloat::opStatus
2190 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2191 unsigned sigPartCount, int exp,
2192 roundingMode rounding_mode)
2194 unsigned int parts, pow5PartCount;
2195 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2196 integerPart pow5Parts[maxPowerOfFiveParts];
2197 bool isNearest;
2199 isNearest = (rounding_mode == rmNearestTiesToEven
2200 || rounding_mode == rmNearestTiesToAway);
2202 parts = partCountForBits(semantics->precision + 11);
2204 /* Calculate pow(5, abs(exp)). */
2205 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2207 for (;; parts *= 2) {
2208 opStatus sigStatus, powStatus;
2209 unsigned int excessPrecision, truncatedBits;
2211 calcSemantics.precision = parts * integerPartWidth - 1;
2212 excessPrecision = calcSemantics.precision - semantics->precision;
2213 truncatedBits = excessPrecision;
2215 APFloat decSig(calcSemantics, fcZero, sign);
2216 APFloat pow5(calcSemantics, fcZero, false);
2218 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2219 rmNearestTiesToEven);
2220 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2221 rmNearestTiesToEven);
2222 /* Add exp, as 10^n = 5^n * 2^n. */
2223 decSig.exponent += exp;
2225 lostFraction calcLostFraction;
2226 integerPart HUerr, HUdistance;
2227 unsigned int powHUerr;
2229 if (exp >= 0) {
2230 /* multiplySignificand leaves the precision-th bit set to 1. */
2231 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2232 powHUerr = powStatus != opOK;
2233 } else {
2234 calcLostFraction = decSig.divideSignificand(pow5);
2235 /* Denormal numbers have less precision. */
2236 if (decSig.exponent < semantics->minExponent) {
2237 excessPrecision += (semantics->minExponent - decSig.exponent);
2238 truncatedBits = excessPrecision;
2239 if (excessPrecision > calcSemantics.precision)
2240 excessPrecision = calcSemantics.precision;
2242 /* Extra half-ulp lost in reciprocal of exponent. */
2243 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2246 /* Both multiplySignificand and divideSignificand return the
2247 result with the integer bit set. */
2248 assert (APInt::tcExtractBit
2249 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2251 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2252 powHUerr);
2253 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2254 excessPrecision, isNearest);
2256 /* Are we guaranteed to round correctly if we truncate? */
2257 if (HUdistance >= HUerr) {
2258 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2259 calcSemantics.precision - excessPrecision,
2260 excessPrecision);
2261 /* Take the exponent of decSig. If we tcExtract-ed less bits
2262 above we must adjust our exponent to compensate for the
2263 implicit right shift. */
2264 exponent = (decSig.exponent + semantics->precision
2265 - (calcSemantics.precision - excessPrecision));
2266 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2267 decSig.partCount(),
2268 truncatedBits);
2269 return normalize(rounding_mode, calcLostFraction);
2274 APFloat::opStatus
2275 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2277 decimalInfo D;
2278 opStatus fs;
2280 /* Scan the text. */
2281 interpretDecimal(p, &D);
2283 /* Handle the quick cases. First the case of no significant digits,
2284 i.e. zero, and then exponents that are obviously too large or too
2285 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2286 definitely overflows if
2288 (exp - 1) * L >= maxExponent
2290 and definitely underflows to zero where
2292 (exp + 1) * L <= minExponent - precision
2294 With integer arithmetic the tightest bounds for L are
2296 93/28 < L < 196/59 [ numerator <= 256 ]
2297 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2300 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2301 category = fcZero;
2302 fs = opOK;
2303 } else if ((D.normalizedExponent + 1) * 28738
2304 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2305 /* Underflow to zero and round. */
2306 zeroSignificand();
2307 fs = normalize(rounding_mode, lfLessThanHalf);
2308 } else if ((D.normalizedExponent - 1) * 42039
2309 >= 12655 * semantics->maxExponent) {
2310 /* Overflow and round. */
2311 fs = handleOverflow(rounding_mode);
2312 } else {
2313 integerPart *decSignificand;
2314 unsigned int partCount;
2316 /* A tight upper bound on number of bits required to hold an
2317 N-digit decimal integer is N * 196 / 59. Allocate enough space
2318 to hold the full significand, and an extra part required by
2319 tcMultiplyPart. */
2320 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2321 partCount = partCountForBits(1 + 196 * partCount / 59);
2322 decSignificand = new integerPart[partCount + 1];
2323 partCount = 0;
2325 /* Convert to binary efficiently - we do almost all multiplication
2326 in an integerPart. When this would overflow do we do a single
2327 bignum multiplication, and then revert again to multiplication
2328 in an integerPart. */
2329 do {
2330 integerPart decValue, val, multiplier;
2332 val = 0;
2333 multiplier = 1;
2335 do {
2336 if (*p == '.')
2337 p++;
2339 decValue = decDigitValue(*p++);
2340 multiplier *= 10;
2341 val = val * 10 + decValue;
2342 /* The maximum number that can be multiplied by ten with any
2343 digit added without overflowing an integerPart. */
2344 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2346 /* Multiply out the current part. */
2347 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2348 partCount, partCount + 1, false);
2350 /* If we used another part (likely but not guaranteed), increase
2351 the count. */
2352 if (decSignificand[partCount])
2353 partCount++;
2354 } while (p <= D.lastSigDigit);
2356 category = fcNormal;
2357 fs = roundSignificandWithExponent(decSignificand, partCount,
2358 D.exponent, rounding_mode);
2360 delete [] decSignificand;
2363 return fs;
2366 APFloat::opStatus
2367 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2369 assertArithmeticOK(*semantics);
2371 /* Handle a leading minus sign. */
2372 if(*p == '-')
2373 sign = 1, p++;
2374 else
2375 sign = 0;
2377 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2378 return convertFromHexadecimalString(p + 2, rounding_mode);
2380 return convertFromDecimalString(p, rounding_mode);
2383 /* Write out a hexadecimal representation of the floating point value
2384 to DST, which must be of sufficient size, in the C99 form
2385 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2386 excluding the terminating NUL.
2388 If UPPERCASE, the output is in upper case, otherwise in lower case.
2390 HEXDIGITS digits appear altogether, rounding the value if
2391 necessary. If HEXDIGITS is 0, the minimal precision to display the
2392 number precisely is used instead. If nothing would appear after
2393 the decimal point it is suppressed.
2395 The decimal exponent is always printed and has at least one digit.
2396 Zero values display an exponent of zero. Infinities and NaNs
2397 appear as "infinity" or "nan" respectively.
2399 The above rules are as specified by C99. There is ambiguity about
2400 what the leading hexadecimal digit should be. This implementation
2401 uses whatever is necessary so that the exponent is displayed as
2402 stored. This implies the exponent will fall within the IEEE format
2403 range, and the leading hexadecimal digit will be 0 (for denormals),
2404 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2405 any other digits zero).
2407 unsigned int
2408 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2409 bool upperCase, roundingMode rounding_mode) const
2411 char *p;
2413 assertArithmeticOK(*semantics);
2415 p = dst;
2416 if (sign)
2417 *dst++ = '-';
2419 switch (category) {
2420 case fcInfinity:
2421 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2422 dst += sizeof infinityL - 1;
2423 break;
2425 case fcNaN:
2426 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2427 dst += sizeof NaNU - 1;
2428 break;
2430 case fcZero:
2431 *dst++ = '0';
2432 *dst++ = upperCase ? 'X': 'x';
2433 *dst++ = '0';
2434 if (hexDigits > 1) {
2435 *dst++ = '.';
2436 memset (dst, '0', hexDigits - 1);
2437 dst += hexDigits - 1;
2439 *dst++ = upperCase ? 'P': 'p';
2440 *dst++ = '0';
2441 break;
2443 case fcNormal:
2444 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2445 break;
2448 *dst = 0;
2450 return static_cast<unsigned int>(dst - p);
2453 /* Does the hard work of outputting the correctly rounded hexadecimal
2454 form of a normal floating point number with the specified number of
2455 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2456 digits necessary to print the value precisely is output. */
2457 char *
2458 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2459 bool upperCase,
2460 roundingMode rounding_mode) const
2462 unsigned int count, valueBits, shift, partsCount, outputDigits;
2463 const char *hexDigitChars;
2464 const integerPart *significand;
2465 char *p;
2466 bool roundUp;
2468 *dst++ = '0';
2469 *dst++ = upperCase ? 'X': 'x';
2471 roundUp = false;
2472 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2474 significand = significandParts();
2475 partsCount = partCount();
2477 /* +3 because the first digit only uses the single integer bit, so
2478 we have 3 virtual zero most-significant-bits. */
2479 valueBits = semantics->precision + 3;
2480 shift = integerPartWidth - valueBits % integerPartWidth;
2482 /* The natural number of digits required ignoring trailing
2483 insignificant zeroes. */
2484 outputDigits = (valueBits - significandLSB () + 3) / 4;
2486 /* hexDigits of zero means use the required number for the
2487 precision. Otherwise, see if we are truncating. If we are,
2488 find out if we need to round away from zero. */
2489 if (hexDigits) {
2490 if (hexDigits < outputDigits) {
2491 /* We are dropping non-zero bits, so need to check how to round.
2492 "bits" is the number of dropped bits. */
2493 unsigned int bits;
2494 lostFraction fraction;
2496 bits = valueBits - hexDigits * 4;
2497 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2498 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2500 outputDigits = hexDigits;
2503 /* Write the digits consecutively, and start writing in the location
2504 of the hexadecimal point. We move the most significant digit
2505 left and add the hexadecimal point later. */
2506 p = ++dst;
2508 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2510 while (outputDigits && count) {
2511 integerPart part;
2513 /* Put the most significant integerPartWidth bits in "part". */
2514 if (--count == partsCount)
2515 part = 0; /* An imaginary higher zero part. */
2516 else
2517 part = significand[count] << shift;
2519 if (count && shift)
2520 part |= significand[count - 1] >> (integerPartWidth - shift);
2522 /* Convert as much of "part" to hexdigits as we can. */
2523 unsigned int curDigits = integerPartWidth / 4;
2525 if (curDigits > outputDigits)
2526 curDigits = outputDigits;
2527 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2528 outputDigits -= curDigits;
2531 if (roundUp) {
2532 char *q = dst;
2534 /* Note that hexDigitChars has a trailing '0'. */
2535 do {
2536 q--;
2537 *q = hexDigitChars[hexDigitValue (*q) + 1];
2538 } while (*q == '0');
2539 assert (q >= p);
2540 } else {
2541 /* Add trailing zeroes. */
2542 memset (dst, '0', outputDigits);
2543 dst += outputDigits;
2546 /* Move the most significant digit to before the point, and if there
2547 is something after the decimal point add it. This must come
2548 after rounding above. */
2549 p[-1] = p[0];
2550 if (dst -1 == p)
2551 dst--;
2552 else
2553 p[0] = '.';
2555 /* Finally output the exponent. */
2556 *dst++ = upperCase ? 'P': 'p';
2558 return writeSignedDecimal (dst, exponent);
2561 // For good performance it is desirable for different APFloats
2562 // to produce different integers.
2563 uint32_t
2564 APFloat::getHashValue() const
2566 if (category==fcZero) return sign<<8 | semantics->precision ;
2567 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2568 else if (category==fcNaN) return 1<<10 | semantics->precision;
2569 else {
2570 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2571 const integerPart* p = significandParts();
2572 for (int i=partCount(); i>0; i--, p++)
2573 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2574 return hash;
2578 // Conversion from APFloat to/from host float/double. It may eventually be
2579 // possible to eliminate these and have everybody deal with APFloats, but that
2580 // will take a while. This approach will not easily extend to long double.
2581 // Current implementation requires integerPartWidth==64, which is correct at
2582 // the moment but could be made more general.
2584 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2585 // the actual IEEE respresentations. We compensate for that here.
2587 APInt
2588 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2590 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2591 assert (partCount()==2);
2593 uint64_t myexponent, mysignificand;
2595 if (category==fcNormal) {
2596 myexponent = exponent+16383; //bias
2597 mysignificand = significandParts()[0];
2598 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2599 myexponent = 0; // denormal
2600 } else if (category==fcZero) {
2601 myexponent = 0;
2602 mysignificand = 0;
2603 } else if (category==fcInfinity) {
2604 myexponent = 0x7fff;
2605 mysignificand = 0x8000000000000000ULL;
2606 } else {
2607 assert(category == fcNaN && "Unknown category");
2608 myexponent = 0x7fff;
2609 mysignificand = significandParts()[0];
2612 uint64_t words[2];
2613 words[0] = mysignificand;
2614 words[1] = ((uint64_t)(sign & 1) << 15) |
2615 (myexponent & 0x7fffLL);
2616 return APInt(80, 2, words);
2619 APInt
2620 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2622 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2623 assert (partCount()==2);
2625 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2627 if (category==fcNormal) {
2628 myexponent = exponent + 1023; //bias
2629 myexponent2 = exponent2 + 1023;
2630 mysignificand = significandParts()[0];
2631 mysignificand2 = significandParts()[1];
2632 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2633 myexponent = 0; // denormal
2634 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2635 myexponent2 = 0; // denormal
2636 } else if (category==fcZero) {
2637 myexponent = 0;
2638 mysignificand = 0;
2639 myexponent2 = 0;
2640 mysignificand2 = 0;
2641 } else if (category==fcInfinity) {
2642 myexponent = 0x7ff;
2643 myexponent2 = 0;
2644 mysignificand = 0;
2645 mysignificand2 = 0;
2646 } else {
2647 assert(category == fcNaN && "Unknown category");
2648 myexponent = 0x7ff;
2649 mysignificand = significandParts()[0];
2650 myexponent2 = exponent2;
2651 mysignificand2 = significandParts()[1];
2654 uint64_t words[2];
2655 words[0] = ((uint64_t)(sign & 1) << 63) |
2656 ((myexponent & 0x7ff) << 52) |
2657 (mysignificand & 0xfffffffffffffLL);
2658 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2659 ((myexponent2 & 0x7ff) << 52) |
2660 (mysignificand2 & 0xfffffffffffffLL);
2661 return APInt(128, 2, words);
2664 APInt
2665 APFloat::convertDoubleAPFloatToAPInt() const
2667 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2668 assert (partCount()==1);
2670 uint64_t myexponent, mysignificand;
2672 if (category==fcNormal) {
2673 myexponent = exponent+1023; //bias
2674 mysignificand = *significandParts();
2675 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2676 myexponent = 0; // denormal
2677 } else if (category==fcZero) {
2678 myexponent = 0;
2679 mysignificand = 0;
2680 } else if (category==fcInfinity) {
2681 myexponent = 0x7ff;
2682 mysignificand = 0;
2683 } else {
2684 assert(category == fcNaN && "Unknown category!");
2685 myexponent = 0x7ff;
2686 mysignificand = *significandParts();
2689 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2690 ((myexponent & 0x7ff) << 52) |
2691 (mysignificand & 0xfffffffffffffLL))));
2694 APInt
2695 APFloat::convertFloatAPFloatToAPInt() const
2697 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2698 assert (partCount()==1);
2700 uint32_t myexponent, mysignificand;
2702 if (category==fcNormal) {
2703 myexponent = exponent+127; //bias
2704 mysignificand = (uint32_t)*significandParts();
2705 if (myexponent == 1 && !(mysignificand & 0x800000))
2706 myexponent = 0; // denormal
2707 } else if (category==fcZero) {
2708 myexponent = 0;
2709 mysignificand = 0;
2710 } else if (category==fcInfinity) {
2711 myexponent = 0xff;
2712 mysignificand = 0;
2713 } else {
2714 assert(category == fcNaN && "Unknown category!");
2715 myexponent = 0xff;
2716 mysignificand = (uint32_t)*significandParts();
2719 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2720 (mysignificand & 0x7fffff)));
2723 // This function creates an APInt that is just a bit map of the floating
2724 // point constant as it would appear in memory. It is not a conversion,
2725 // and treating the result as a normal integer is unlikely to be useful.
2727 APInt
2728 APFloat::bitcastToAPInt() const
2730 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2731 return convertFloatAPFloatToAPInt();
2733 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2734 return convertDoubleAPFloatToAPInt();
2736 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2737 return convertPPCDoubleDoubleAPFloatToAPInt();
2739 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2740 "unknown format!");
2741 return convertF80LongDoubleAPFloatToAPInt();
2744 float
2745 APFloat::convertToFloat() const
2747 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2748 APInt api = bitcastToAPInt();
2749 return api.bitsToFloat();
2752 double
2753 APFloat::convertToDouble() const
2755 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2756 APInt api = bitcastToAPInt();
2757 return api.bitsToDouble();
2760 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2761 /// does not support these bit patterns:
2762 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2763 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2764 /// exponent = 0, integer bit 1 ("pseudodenormal")
2765 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2766 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2767 void
2768 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2770 assert(api.getBitWidth()==80);
2771 uint64_t i1 = api.getRawData()[0];
2772 uint64_t i2 = api.getRawData()[1];
2773 uint64_t myexponent = (i2 & 0x7fff);
2774 uint64_t mysignificand = i1;
2776 initialize(&APFloat::x87DoubleExtended);
2777 assert(partCount()==2);
2779 sign = static_cast<unsigned int>(i2>>15);
2780 if (myexponent==0 && mysignificand==0) {
2781 // exponent, significand meaningless
2782 category = fcZero;
2783 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2784 // exponent, significand meaningless
2785 category = fcInfinity;
2786 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2787 // exponent meaningless
2788 category = fcNaN;
2789 significandParts()[0] = mysignificand;
2790 significandParts()[1] = 0;
2791 } else {
2792 category = fcNormal;
2793 exponent = myexponent - 16383;
2794 significandParts()[0] = mysignificand;
2795 significandParts()[1] = 0;
2796 if (myexponent==0) // denormal
2797 exponent = -16382;
2801 void
2802 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2804 assert(api.getBitWidth()==128);
2805 uint64_t i1 = api.getRawData()[0];
2806 uint64_t i2 = api.getRawData()[1];
2807 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2808 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2809 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2810 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2812 initialize(&APFloat::PPCDoubleDouble);
2813 assert(partCount()==2);
2815 sign = static_cast<unsigned int>(i1>>63);
2816 sign2 = static_cast<unsigned int>(i2>>63);
2817 if (myexponent==0 && mysignificand==0) {
2818 // exponent, significand meaningless
2819 // exponent2 and significand2 are required to be 0; we don't check
2820 category = fcZero;
2821 } else if (myexponent==0x7ff && mysignificand==0) {
2822 // exponent, significand meaningless
2823 // exponent2 and significand2 are required to be 0; we don't check
2824 category = fcInfinity;
2825 } else if (myexponent==0x7ff && mysignificand!=0) {
2826 // exponent meaningless. So is the whole second word, but keep it
2827 // for determinism.
2828 category = fcNaN;
2829 exponent2 = myexponent2;
2830 significandParts()[0] = mysignificand;
2831 significandParts()[1] = mysignificand2;
2832 } else {
2833 category = fcNormal;
2834 // Note there is no category2; the second word is treated as if it is
2835 // fcNormal, although it might be something else considered by itself.
2836 exponent = myexponent - 1023;
2837 exponent2 = myexponent2 - 1023;
2838 significandParts()[0] = mysignificand;
2839 significandParts()[1] = mysignificand2;
2840 if (myexponent==0) // denormal
2841 exponent = -1022;
2842 else
2843 significandParts()[0] |= 0x10000000000000LL; // integer bit
2844 if (myexponent2==0)
2845 exponent2 = -1022;
2846 else
2847 significandParts()[1] |= 0x10000000000000LL; // integer bit
2851 void
2852 APFloat::initFromDoubleAPInt(const APInt &api)
2854 assert(api.getBitWidth()==64);
2855 uint64_t i = *api.getRawData();
2856 uint64_t myexponent = (i >> 52) & 0x7ff;
2857 uint64_t mysignificand = i & 0xfffffffffffffLL;
2859 initialize(&APFloat::IEEEdouble);
2860 assert(partCount()==1);
2862 sign = static_cast<unsigned int>(i>>63);
2863 if (myexponent==0 && mysignificand==0) {
2864 // exponent, significand meaningless
2865 category = fcZero;
2866 } else if (myexponent==0x7ff && mysignificand==0) {
2867 // exponent, significand meaningless
2868 category = fcInfinity;
2869 } else if (myexponent==0x7ff && mysignificand!=0) {
2870 // exponent meaningless
2871 category = fcNaN;
2872 *significandParts() = mysignificand;
2873 } else {
2874 category = fcNormal;
2875 exponent = myexponent - 1023;
2876 *significandParts() = mysignificand;
2877 if (myexponent==0) // denormal
2878 exponent = -1022;
2879 else
2880 *significandParts() |= 0x10000000000000LL; // integer bit
2884 void
2885 APFloat::initFromFloatAPInt(const APInt & api)
2887 assert(api.getBitWidth()==32);
2888 uint32_t i = (uint32_t)*api.getRawData();
2889 uint32_t myexponent = (i >> 23) & 0xff;
2890 uint32_t mysignificand = i & 0x7fffff;
2892 initialize(&APFloat::IEEEsingle);
2893 assert(partCount()==1);
2895 sign = i >> 31;
2896 if (myexponent==0 && mysignificand==0) {
2897 // exponent, significand meaningless
2898 category = fcZero;
2899 } else if (myexponent==0xff && mysignificand==0) {
2900 // exponent, significand meaningless
2901 category = fcInfinity;
2902 } else if (myexponent==0xff && mysignificand!=0) {
2903 // sign, exponent, significand meaningless
2904 category = fcNaN;
2905 *significandParts() = mysignificand;
2906 } else {
2907 category = fcNormal;
2908 exponent = myexponent - 127; //bias
2909 *significandParts() = mysignificand;
2910 if (myexponent==0) // denormal
2911 exponent = -126;
2912 else
2913 *significandParts() |= 0x800000; // integer bit
2917 /// Treat api as containing the bits of a floating point number. Currently
2918 /// we infer the floating point type from the size of the APInt. The
2919 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2920 /// when the size is anything else).
2921 void
2922 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2924 if (api.getBitWidth() == 32)
2925 return initFromFloatAPInt(api);
2926 else if (api.getBitWidth()==64)
2927 return initFromDoubleAPInt(api);
2928 else if (api.getBitWidth()==80)
2929 return initFromF80LongDoubleAPInt(api);
2930 else if (api.getBitWidth()==128 && !isIEEE)
2931 return initFromPPCDoubleDoubleAPInt(api);
2932 else
2933 llvm_unreachable(0);
2936 APFloat::APFloat(const APInt& api, bool isIEEE)
2938 initFromAPInt(api, isIEEE);
2941 APFloat::APFloat(float f)
2943 APInt api = APInt(32, 0);
2944 initFromAPInt(api.floatToBits(f));
2947 APFloat::APFloat(double d)
2949 APInt api = APInt(64, 0);
2950 initFromAPInt(api.doubleToBits(d));