Update comments.
[llvm/msp430.git] / lib / Support / APFloat.cpp
blob2ee6a276fdf8b3ae29d6cc51200d81cd5b2aa12b
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/MathExtras.h"
18 #include <cstring>
20 using namespace llvm;
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
27 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
29 namespace llvm {
31 /* Represents floating point arithmetic semantics. */
32 struct fltSemantics {
33 /* The largest E such that 2^E is representable; this matches the
34 definition of IEEE 754. */
35 exponent_t maxExponent;
37 /* The smallest E such that 2^E is a normalized number; this
38 matches the definition of IEEE 754. */
39 exponent_t minExponent;
41 /* Number of bits in the significand. This includes the integer
42 bit. */
43 unsigned int precision;
45 /* True if arithmetic is supported. */
46 unsigned int arithmeticOK;
49 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
50 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
51 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
52 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
53 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
55 // The PowerPC format consists of two doubles. It does not map cleanly
56 // onto the usual format above. For now only storage of constants of
57 // this type is supported, no arithmetic.
58 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
60 /* A tight upper bound on number of parts required to hold the value
61 pow(5, power) is
63 power * 815 / (351 * integerPartWidth) + 1
65 However, whilst the result may require only this many parts,
66 because we are multiplying two values to get it, the
67 multiplication may require an extra part with the excess part
68 being zero (consider the trivial case of 1 * 1, tcFullMultiply
69 requires two parts to hold the single-part result). So we add an
70 extra one to guarantee enough space whilst multiplying. */
71 const unsigned int maxExponent = 16383;
72 const unsigned int maxPrecision = 113;
73 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
74 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
75 / (351 * integerPartWidth));
78 /* A bunch of private, handy routines. */
80 static inline unsigned int
81 partCountForBits(unsigned int bits)
83 return ((bits) + integerPartWidth - 1) / integerPartWidth;
86 /* Returns 0U-9U. Return values >= 10U are not digits. */
87 static inline unsigned int
88 decDigitValue(unsigned int c)
90 return c - '0';
93 static unsigned int
94 hexDigitValue(unsigned int c)
96 unsigned int r;
98 r = c - '0';
99 if(r <= 9)
100 return r;
102 r = c - 'A';
103 if(r <= 5)
104 return r + 10;
106 r = c - 'a';
107 if(r <= 5)
108 return r + 10;
110 return -1U;
113 static inline void
114 assertArithmeticOK(const llvm::fltSemantics &semantics) {
115 assert(semantics.arithmeticOK
116 && "Compile-time arithmetic does not support these semantics");
119 /* Return the value of a decimal exponent of the form
120 [+-]ddddddd.
122 If the exponent overflows, returns a large exponent with the
123 appropriate sign. */
124 static int
125 readExponent(const char *p)
127 bool isNegative;
128 unsigned int absExponent;
129 const unsigned int overlargeExponent = 24000; /* FIXME. */
131 isNegative = (*p == '-');
132 if (*p == '-' || *p == '+')
133 p++;
135 absExponent = decDigitValue(*p++);
136 assert (absExponent < 10U);
138 for (;;) {
139 unsigned int value;
141 value = decDigitValue(*p);
142 if (value >= 10U)
143 break;
145 p++;
146 value += absExponent * 10;
147 if (absExponent >= overlargeExponent) {
148 absExponent = overlargeExponent;
149 break;
151 absExponent = value;
154 if (isNegative)
155 return -(int) absExponent;
156 else
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
162 static int
163 totalExponent(const char *p, int exponentAdjustment)
165 int unsignedExponent;
166 bool negative, overflow;
167 int exponent;
169 /* Move past the exponent letter and sign to the digits. */
170 p++;
171 negative = *p == '-';
172 if(*p == '-' || *p == '+')
173 p++;
175 unsignedExponent = 0;
176 overflow = false;
177 for(;;) {
178 unsigned int value;
180 value = decDigitValue(*p);
181 if(value >= 10U)
182 break;
184 p++;
185 unsignedExponent = unsignedExponent * 10 + value;
186 if(unsignedExponent > 65535)
187 overflow = true;
190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
191 overflow = true;
193 if(!overflow) {
194 exponent = unsignedExponent;
195 if(negative)
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if(exponent > 65535 || exponent < -65536)
199 overflow = true;
202 if(overflow)
203 exponent = negative ? -65536: 65535;
205 return exponent;
208 static const char *
209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
211 *dot = 0;
212 while(*p == '0')
213 p++;
215 if(*p == '.') {
216 *dot = p++;
217 while(*p == '0')
218 p++;
221 return p;
224 /* Given a normal decimal floating point number of the form
226 dddd.dddd[eE][+-]ddd
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
232 non-zero digit.
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
237 struct decimalInfo {
238 const char *firstSigDigit;
239 const char *lastSigDigit;
240 int exponent;
241 int normalizedExponent;
244 static void
245 interpretDecimal(const char *p, decimalInfo *D)
247 const char *dot;
249 p = skipLeadingZeroesAndAnyDot (p, &dot);
251 D->firstSigDigit = p;
252 D->exponent = 0;
253 D->normalizedExponent = 0;
255 for (;;) {
256 if (*p == '.') {
257 assert(dot == 0);
258 dot = p++;
260 if (decDigitValue(*p) >= 10U)
261 break;
262 p++;
265 /* If number is all zerooes accept any exponent. */
266 if (p != D->firstSigDigit) {
267 if (*p == 'e' || *p == 'E')
268 D->exponent = readExponent(p + 1);
270 /* Implied decimal point? */
271 if (!dot)
272 dot = p;
274 /* Drop insignificant trailing zeroes. */
277 p--;
278 while (*p == '0');
279 while (*p == '.');
281 /* Adjust the exponents for any decimal point. */
282 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
283 D->normalizedExponent = (D->exponent +
284 static_cast<exponent_t>((p - D->firstSigDigit)
285 - (dot > D->firstSigDigit && dot < p)));
288 D->lastSigDigit = p;
291 /* Return the trailing fraction of a hexadecimal number.
292 DIGITVALUE is the first hex digit of the fraction, P points to
293 the next digit. */
294 static lostFraction
295 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
297 unsigned int hexDigit;
299 /* If the first trailing digit isn't 0 or 8 we can work out the
300 fraction immediately. */
301 if(digitValue > 8)
302 return lfMoreThanHalf;
303 else if(digitValue < 8 && digitValue > 0)
304 return lfLessThanHalf;
306 /* Otherwise we need to find the first non-zero digit. */
307 while(*p == '0')
308 p++;
310 hexDigit = hexDigitValue(*p);
312 /* If we ran off the end it is exactly zero or one-half, otherwise
313 a little more. */
314 if(hexDigit == -1U)
315 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
316 else
317 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
320 /* Return the fraction lost were a bignum truncated losing the least
321 significant BITS bits. */
322 static lostFraction
323 lostFractionThroughTruncation(const integerPart *parts,
324 unsigned int partCount,
325 unsigned int bits)
327 unsigned int lsb;
329 lsb = APInt::tcLSB(parts, partCount);
331 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
332 if(bits <= lsb)
333 return lfExactlyZero;
334 if(bits == lsb + 1)
335 return lfExactlyHalf;
336 if(bits <= partCount * integerPartWidth
337 && APInt::tcExtractBit(parts, bits - 1))
338 return lfMoreThanHalf;
340 return lfLessThanHalf;
343 /* Shift DST right BITS bits noting lost fraction. */
344 static lostFraction
345 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
347 lostFraction lost_fraction;
349 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
351 APInt::tcShiftRight(dst, parts, bits);
353 return lost_fraction;
356 /* Combine the effect of two lost fractions. */
357 static lostFraction
358 combineLostFractions(lostFraction moreSignificant,
359 lostFraction lessSignificant)
361 if(lessSignificant != lfExactlyZero) {
362 if(moreSignificant == lfExactlyZero)
363 moreSignificant = lfLessThanHalf;
364 else if(moreSignificant == lfExactlyHalf)
365 moreSignificant = lfMoreThanHalf;
368 return moreSignificant;
371 /* The error from the true value, in half-ulps, on multiplying two
372 floating point numbers, which differ from the value they
373 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
374 than the returned value.
376 See "How to Read Floating Point Numbers Accurately" by William D
377 Clinger. */
378 static unsigned int
379 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
381 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
383 if (HUerr1 + HUerr2 == 0)
384 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
385 else
386 return inexactMultiply + 2 * (HUerr1 + HUerr2);
389 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
390 when the least significant BITS are truncated. BITS cannot be
391 zero. */
392 static integerPart
393 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
395 unsigned int count, partBits;
396 integerPart part, boundary;
398 assert (bits != 0);
400 bits--;
401 count = bits / integerPartWidth;
402 partBits = bits % integerPartWidth + 1;
404 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
406 if (isNearest)
407 boundary = (integerPart) 1 << (partBits - 1);
408 else
409 boundary = 0;
411 if (count == 0) {
412 if (part - boundary <= boundary - part)
413 return part - boundary;
414 else
415 return boundary - part;
418 if (part == boundary) {
419 while (--count)
420 if (parts[count])
421 return ~(integerPart) 0; /* A lot. */
423 return parts[0];
424 } else if (part == boundary - 1) {
425 while (--count)
426 if (~parts[count])
427 return ~(integerPart) 0; /* A lot. */
429 return -parts[0];
432 return ~(integerPart) 0; /* A lot. */
435 /* Place pow(5, power) in DST, and return the number of parts used.
436 DST must be at least one part larger than size of the answer. */
437 static unsigned int
438 powerOf5(integerPart *dst, unsigned int power)
440 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
441 15625, 78125 };
442 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
443 pow5s[0] = 78125 * 5;
445 unsigned int partsCount[16] = { 1 };
446 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
447 unsigned int result;
448 assert(power <= maxExponent);
450 p1 = dst;
451 p2 = scratch;
453 *p1 = firstEightPowers[power & 7];
454 power >>= 3;
456 result = 1;
457 pow5 = pow5s;
459 for (unsigned int n = 0; power; power >>= 1, n++) {
460 unsigned int pc;
462 pc = partsCount[n];
464 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
465 if (pc == 0) {
466 pc = partsCount[n - 1];
467 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
468 pc *= 2;
469 if (pow5[pc - 1] == 0)
470 pc--;
471 partsCount[n] = pc;
474 if (power & 1) {
475 integerPart *tmp;
477 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
478 result += pc;
479 if (p2[result - 1] == 0)
480 result--;
482 /* Now result is in p1 with partsCount parts and p2 is scratch
483 space. */
484 tmp = p1, p1 = p2, p2 = tmp;
487 pow5 += pc;
490 if (p1 != dst)
491 APInt::tcAssign(dst, p1, result);
493 return result;
496 /* Zero at the end to avoid modular arithmetic when adding one; used
497 when rounding up during hexadecimal output. */
498 static const char hexDigitsLower[] = "0123456789abcdef0";
499 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
500 static const char infinityL[] = "infinity";
501 static const char infinityU[] = "INFINITY";
502 static const char NaNL[] = "nan";
503 static const char NaNU[] = "NAN";
505 /* Write out an integerPart in hexadecimal, starting with the most
506 significant nibble. Write out exactly COUNT hexdigits, return
507 COUNT. */
508 static unsigned int
509 partAsHex (char *dst, integerPart part, unsigned int count,
510 const char *hexDigitChars)
512 unsigned int result = count;
514 assert (count != 0 && count <= integerPartWidth / 4);
516 part >>= (integerPartWidth - 4 * count);
517 while (count--) {
518 dst[count] = hexDigitChars[part & 0xf];
519 part >>= 4;
522 return result;
525 /* Write out an unsigned decimal integer. */
526 static char *
527 writeUnsignedDecimal (char *dst, unsigned int n)
529 char buff[40], *p;
531 p = buff;
533 *p++ = '0' + n % 10;
534 while (n /= 10);
537 *dst++ = *--p;
538 while (p != buff);
540 return dst;
543 /* Write out a signed decimal integer. */
544 static char *
545 writeSignedDecimal (char *dst, int value)
547 if (value < 0) {
548 *dst++ = '-';
549 dst = writeUnsignedDecimal(dst, -(unsigned) value);
550 } else
551 dst = writeUnsignedDecimal(dst, value);
553 return dst;
556 /* Constructors. */
557 void
558 APFloat::initialize(const fltSemantics *ourSemantics)
560 unsigned int count;
562 semantics = ourSemantics;
563 count = partCount();
564 if(count > 1)
565 significand.parts = new integerPart[count];
568 void
569 APFloat::freeSignificand()
571 if(partCount() > 1)
572 delete [] significand.parts;
575 void
576 APFloat::assign(const APFloat &rhs)
578 assert(semantics == rhs.semantics);
580 sign = rhs.sign;
581 category = rhs.category;
582 exponent = rhs.exponent;
583 sign2 = rhs.sign2;
584 exponent2 = rhs.exponent2;
585 if(category == fcNormal || category == fcNaN)
586 copySignificand(rhs);
589 void
590 APFloat::copySignificand(const APFloat &rhs)
592 assert(category == fcNormal || category == fcNaN);
593 assert(rhs.partCount() >= partCount());
595 APInt::tcAssign(significandParts(), rhs.significandParts(),
596 partCount());
599 /* Make this number a NaN, with an arbitrary but deterministic value
600 for the significand. If double or longer, this is a signalling NaN,
601 which may not be ideal. */
602 void
603 APFloat::makeNaN(void)
605 category = fcNaN;
606 APInt::tcSet(significandParts(), ~0U, partCount());
609 APFloat &
610 APFloat::operator=(const APFloat &rhs)
612 if(this != &rhs) {
613 if(semantics != rhs.semantics) {
614 freeSignificand();
615 initialize(rhs.semantics);
617 assign(rhs);
620 return *this;
623 bool
624 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
625 if (this == &rhs)
626 return true;
627 if (semantics != rhs.semantics ||
628 category != rhs.category ||
629 sign != rhs.sign)
630 return false;
631 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
632 sign2 != rhs.sign2)
633 return false;
634 if (category==fcZero || category==fcInfinity)
635 return true;
636 else if (category==fcNormal && exponent!=rhs.exponent)
637 return false;
638 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
639 exponent2!=rhs.exponent2)
640 return false;
641 else {
642 int i= partCount();
643 const integerPart* p=significandParts();
644 const integerPart* q=rhs.significandParts();
645 for (; i>0; i--, p++, q++) {
646 if (*p != *q)
647 return false;
649 return true;
653 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
655 assertArithmeticOK(ourSemantics);
656 initialize(&ourSemantics);
657 sign = 0;
658 zeroSignificand();
659 exponent = ourSemantics.precision - 1;
660 significandParts()[0] = value;
661 normalize(rmNearestTiesToEven, lfExactlyZero);
664 APFloat::APFloat(const fltSemantics &ourSemantics,
665 fltCategory ourCategory, bool negative)
667 assertArithmeticOK(ourSemantics);
668 initialize(&ourSemantics);
669 category = ourCategory;
670 sign = negative;
671 if(category == fcNormal)
672 category = fcZero;
673 else if (ourCategory == fcNaN)
674 makeNaN();
677 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
679 assertArithmeticOK(ourSemantics);
680 initialize(&ourSemantics);
681 convertFromString(text, rmNearestTiesToEven);
684 APFloat::APFloat(const APFloat &rhs)
686 initialize(rhs.semantics);
687 assign(rhs);
690 APFloat::~APFloat()
692 freeSignificand();
695 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
696 void APFloat::Profile(FoldingSetNodeID& ID) const {
697 ID.Add(bitcastToAPInt());
700 unsigned int
701 APFloat::partCount() const
703 return partCountForBits(semantics->precision + 1);
706 unsigned int
707 APFloat::semanticsPrecision(const fltSemantics &semantics)
709 return semantics.precision;
712 const integerPart *
713 APFloat::significandParts() const
715 return const_cast<APFloat *>(this)->significandParts();
718 integerPart *
719 APFloat::significandParts()
721 assert(category == fcNormal || category == fcNaN);
723 if(partCount() > 1)
724 return significand.parts;
725 else
726 return &significand.part;
729 void
730 APFloat::zeroSignificand()
732 category = fcNormal;
733 APInt::tcSet(significandParts(), 0, partCount());
736 /* Increment an fcNormal floating point number's significand. */
737 void
738 APFloat::incrementSignificand()
740 integerPart carry;
742 carry = APInt::tcIncrement(significandParts(), partCount());
744 /* Our callers should never cause us to overflow. */
745 assert(carry == 0);
748 /* Add the significand of the RHS. Returns the carry flag. */
749 integerPart
750 APFloat::addSignificand(const APFloat &rhs)
752 integerPart *parts;
754 parts = significandParts();
756 assert(semantics == rhs.semantics);
757 assert(exponent == rhs.exponent);
759 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
762 /* Subtract the significand of the RHS with a borrow flag. Returns
763 the borrow flag. */
764 integerPart
765 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
767 integerPart *parts;
769 parts = significandParts();
771 assert(semantics == rhs.semantics);
772 assert(exponent == rhs.exponent);
774 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
775 partCount());
778 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
779 on to the full-precision result of the multiplication. Returns the
780 lost fraction. */
781 lostFraction
782 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
784 unsigned int omsb; // One, not zero, based MSB.
785 unsigned int partsCount, newPartsCount, precision;
786 integerPart *lhsSignificand;
787 integerPart scratch[4];
788 integerPart *fullSignificand;
789 lostFraction lost_fraction;
790 bool ignored;
792 assert(semantics == rhs.semantics);
794 precision = semantics->precision;
795 newPartsCount = partCountForBits(precision * 2);
797 if(newPartsCount > 4)
798 fullSignificand = new integerPart[newPartsCount];
799 else
800 fullSignificand = scratch;
802 lhsSignificand = significandParts();
803 partsCount = partCount();
805 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
806 rhs.significandParts(), partsCount, partsCount);
808 lost_fraction = lfExactlyZero;
809 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
810 exponent += rhs.exponent;
812 if(addend) {
813 Significand savedSignificand = significand;
814 const fltSemantics *savedSemantics = semantics;
815 fltSemantics extendedSemantics;
816 opStatus status;
817 unsigned int extendedPrecision;
819 /* Normalize our MSB. */
820 extendedPrecision = precision + precision - 1;
821 if(omsb != extendedPrecision)
823 APInt::tcShiftLeft(fullSignificand, newPartsCount,
824 extendedPrecision - omsb);
825 exponent -= extendedPrecision - omsb;
828 /* Create new semantics. */
829 extendedSemantics = *semantics;
830 extendedSemantics.precision = extendedPrecision;
832 if(newPartsCount == 1)
833 significand.part = fullSignificand[0];
834 else
835 significand.parts = fullSignificand;
836 semantics = &extendedSemantics;
838 APFloat extendedAddend(*addend);
839 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
840 assert(status == opOK);
841 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
843 /* Restore our state. */
844 if(newPartsCount == 1)
845 fullSignificand[0] = significand.part;
846 significand = savedSignificand;
847 semantics = savedSemantics;
849 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
852 exponent -= (precision - 1);
854 if(omsb > precision) {
855 unsigned int bits, significantParts;
856 lostFraction lf;
858 bits = omsb - precision;
859 significantParts = partCountForBits(omsb);
860 lf = shiftRight(fullSignificand, significantParts, bits);
861 lost_fraction = combineLostFractions(lf, lost_fraction);
862 exponent += bits;
865 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
867 if(newPartsCount > 4)
868 delete [] fullSignificand;
870 return lost_fraction;
873 /* Multiply the significands of LHS and RHS to DST. */
874 lostFraction
875 APFloat::divideSignificand(const APFloat &rhs)
877 unsigned int bit, i, partsCount;
878 const integerPart *rhsSignificand;
879 integerPart *lhsSignificand, *dividend, *divisor;
880 integerPart scratch[4];
881 lostFraction lost_fraction;
883 assert(semantics == rhs.semantics);
885 lhsSignificand = significandParts();
886 rhsSignificand = rhs.significandParts();
887 partsCount = partCount();
889 if(partsCount > 2)
890 dividend = new integerPart[partsCount * 2];
891 else
892 dividend = scratch;
894 divisor = dividend + partsCount;
896 /* Copy the dividend and divisor as they will be modified in-place. */
897 for(i = 0; i < partsCount; i++) {
898 dividend[i] = lhsSignificand[i];
899 divisor[i] = rhsSignificand[i];
900 lhsSignificand[i] = 0;
903 exponent -= rhs.exponent;
905 unsigned int precision = semantics->precision;
907 /* Normalize the divisor. */
908 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
909 if(bit) {
910 exponent += bit;
911 APInt::tcShiftLeft(divisor, partsCount, bit);
914 /* Normalize the dividend. */
915 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
916 if(bit) {
917 exponent -= bit;
918 APInt::tcShiftLeft(dividend, partsCount, bit);
921 /* Ensure the dividend >= divisor initially for the loop below.
922 Incidentally, this means that the division loop below is
923 guaranteed to set the integer bit to one. */
924 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
925 exponent--;
926 APInt::tcShiftLeft(dividend, partsCount, 1);
927 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
930 /* Long division. */
931 for(bit = precision; bit; bit -= 1) {
932 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
933 APInt::tcSubtract(dividend, divisor, 0, partsCount);
934 APInt::tcSetBit(lhsSignificand, bit - 1);
937 APInt::tcShiftLeft(dividend, partsCount, 1);
940 /* Figure out the lost fraction. */
941 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
943 if(cmp > 0)
944 lost_fraction = lfMoreThanHalf;
945 else if(cmp == 0)
946 lost_fraction = lfExactlyHalf;
947 else if(APInt::tcIsZero(dividend, partsCount))
948 lost_fraction = lfExactlyZero;
949 else
950 lost_fraction = lfLessThanHalf;
952 if(partsCount > 2)
953 delete [] dividend;
955 return lost_fraction;
958 unsigned int
959 APFloat::significandMSB() const
961 return APInt::tcMSB(significandParts(), partCount());
964 unsigned int
965 APFloat::significandLSB() const
967 return APInt::tcLSB(significandParts(), partCount());
970 /* Note that a zero result is NOT normalized to fcZero. */
971 lostFraction
972 APFloat::shiftSignificandRight(unsigned int bits)
974 /* Our exponent should not overflow. */
975 assert((exponent_t) (exponent + bits) >= exponent);
977 exponent += bits;
979 return shiftRight(significandParts(), partCount(), bits);
982 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
983 void
984 APFloat::shiftSignificandLeft(unsigned int bits)
986 assert(bits < semantics->precision);
988 if(bits) {
989 unsigned int partsCount = partCount();
991 APInt::tcShiftLeft(significandParts(), partsCount, bits);
992 exponent -= bits;
994 assert(!APInt::tcIsZero(significandParts(), partsCount));
998 APFloat::cmpResult
999 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1001 int compare;
1003 assert(semantics == rhs.semantics);
1004 assert(category == fcNormal);
1005 assert(rhs.category == fcNormal);
1007 compare = exponent - rhs.exponent;
1009 /* If exponents are equal, do an unsigned bignum comparison of the
1010 significands. */
1011 if(compare == 0)
1012 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1013 partCount());
1015 if(compare > 0)
1016 return cmpGreaterThan;
1017 else if(compare < 0)
1018 return cmpLessThan;
1019 else
1020 return cmpEqual;
1023 /* Handle overflow. Sign is preserved. We either become infinity or
1024 the largest finite number. */
1025 APFloat::opStatus
1026 APFloat::handleOverflow(roundingMode rounding_mode)
1028 /* Infinity? */
1029 if(rounding_mode == rmNearestTiesToEven
1030 || rounding_mode == rmNearestTiesToAway
1031 || (rounding_mode == rmTowardPositive && !sign)
1032 || (rounding_mode == rmTowardNegative && sign))
1034 category = fcInfinity;
1035 return (opStatus) (opOverflow | opInexact);
1038 /* Otherwise we become the largest finite number. */
1039 category = fcNormal;
1040 exponent = semantics->maxExponent;
1041 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1042 semantics->precision);
1044 return opInexact;
1047 /* Returns TRUE if, when truncating the current number, with BIT the
1048 new LSB, with the given lost fraction and rounding mode, the result
1049 would need to be rounded away from zero (i.e., by increasing the
1050 signficand). This routine must work for fcZero of both signs, and
1051 fcNormal numbers. */
1052 bool
1053 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1054 lostFraction lost_fraction,
1055 unsigned int bit) const
1057 /* NaNs and infinities should not have lost fractions. */
1058 assert(category == fcNormal || category == fcZero);
1060 /* Current callers never pass this so we don't handle it. */
1061 assert(lost_fraction != lfExactlyZero);
1063 switch(rounding_mode) {
1064 default:
1065 assert(0);
1067 case rmNearestTiesToAway:
1068 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1070 case rmNearestTiesToEven:
1071 if(lost_fraction == lfMoreThanHalf)
1072 return true;
1074 /* Our zeroes don't have a significand to test. */
1075 if(lost_fraction == lfExactlyHalf && category != fcZero)
1076 return APInt::tcExtractBit(significandParts(), bit);
1078 return false;
1080 case rmTowardZero:
1081 return false;
1083 case rmTowardPositive:
1084 return sign == false;
1086 case rmTowardNegative:
1087 return sign == true;
1091 APFloat::opStatus
1092 APFloat::normalize(roundingMode rounding_mode,
1093 lostFraction lost_fraction)
1095 unsigned int omsb; /* One, not zero, based MSB. */
1096 int exponentChange;
1098 if(category != fcNormal)
1099 return opOK;
1101 /* Before rounding normalize the exponent of fcNormal numbers. */
1102 omsb = significandMSB() + 1;
1104 if(omsb) {
1105 /* OMSB is numbered from 1. We want to place it in the integer
1106 bit numbered PRECISON if possible, with a compensating change in
1107 the exponent. */
1108 exponentChange = omsb - semantics->precision;
1110 /* If the resulting exponent is too high, overflow according to
1111 the rounding mode. */
1112 if(exponent + exponentChange > semantics->maxExponent)
1113 return handleOverflow(rounding_mode);
1115 /* Subnormal numbers have exponent minExponent, and their MSB
1116 is forced based on that. */
1117 if(exponent + exponentChange < semantics->minExponent)
1118 exponentChange = semantics->minExponent - exponent;
1120 /* Shifting left is easy as we don't lose precision. */
1121 if(exponentChange < 0) {
1122 assert(lost_fraction == lfExactlyZero);
1124 shiftSignificandLeft(-exponentChange);
1126 return opOK;
1129 if(exponentChange > 0) {
1130 lostFraction lf;
1132 /* Shift right and capture any new lost fraction. */
1133 lf = shiftSignificandRight(exponentChange);
1135 lost_fraction = combineLostFractions(lf, lost_fraction);
1137 /* Keep OMSB up-to-date. */
1138 if(omsb > (unsigned) exponentChange)
1139 omsb -= exponentChange;
1140 else
1141 omsb = 0;
1145 /* Now round the number according to rounding_mode given the lost
1146 fraction. */
1148 /* As specified in IEEE 754, since we do not trap we do not report
1149 underflow for exact results. */
1150 if(lost_fraction == lfExactlyZero) {
1151 /* Canonicalize zeroes. */
1152 if(omsb == 0)
1153 category = fcZero;
1155 return opOK;
1158 /* Increment the significand if we're rounding away from zero. */
1159 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1160 if(omsb == 0)
1161 exponent = semantics->minExponent;
1163 incrementSignificand();
1164 omsb = significandMSB() + 1;
1166 /* Did the significand increment overflow? */
1167 if(omsb == (unsigned) semantics->precision + 1) {
1168 /* Renormalize by incrementing the exponent and shifting our
1169 significand right one. However if we already have the
1170 maximum exponent we overflow to infinity. */
1171 if(exponent == semantics->maxExponent) {
1172 category = fcInfinity;
1174 return (opStatus) (opOverflow | opInexact);
1177 shiftSignificandRight(1);
1179 return opInexact;
1183 /* The normal case - we were and are not denormal, and any
1184 significand increment above didn't overflow. */
1185 if(omsb == semantics->precision)
1186 return opInexact;
1188 /* We have a non-zero denormal. */
1189 assert(omsb < semantics->precision);
1191 /* Canonicalize zeroes. */
1192 if(omsb == 0)
1193 category = fcZero;
1195 /* The fcZero case is a denormal that underflowed to zero. */
1196 return (opStatus) (opUnderflow | opInexact);
1199 APFloat::opStatus
1200 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1202 switch(convolve(category, rhs.category)) {
1203 default:
1204 assert(0);
1206 case convolve(fcNaN, fcZero):
1207 case convolve(fcNaN, fcNormal):
1208 case convolve(fcNaN, fcInfinity):
1209 case convolve(fcNaN, fcNaN):
1210 case convolve(fcNormal, fcZero):
1211 case convolve(fcInfinity, fcNormal):
1212 case convolve(fcInfinity, fcZero):
1213 return opOK;
1215 case convolve(fcZero, fcNaN):
1216 case convolve(fcNormal, fcNaN):
1217 case convolve(fcInfinity, fcNaN):
1218 category = fcNaN;
1219 copySignificand(rhs);
1220 return opOK;
1222 case convolve(fcNormal, fcInfinity):
1223 case convolve(fcZero, fcInfinity):
1224 category = fcInfinity;
1225 sign = rhs.sign ^ subtract;
1226 return opOK;
1228 case convolve(fcZero, fcNormal):
1229 assign(rhs);
1230 sign = rhs.sign ^ subtract;
1231 return opOK;
1233 case convolve(fcZero, fcZero):
1234 /* Sign depends on rounding mode; handled by caller. */
1235 return opOK;
1237 case convolve(fcInfinity, fcInfinity):
1238 /* Differently signed infinities can only be validly
1239 subtracted. */
1240 if(((sign ^ rhs.sign)!=0) != subtract) {
1241 makeNaN();
1242 return opInvalidOp;
1245 return opOK;
1247 case convolve(fcNormal, fcNormal):
1248 return opDivByZero;
1252 /* Add or subtract two normal numbers. */
1253 lostFraction
1254 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1256 integerPart carry;
1257 lostFraction lost_fraction;
1258 int bits;
1260 /* Determine if the operation on the absolute values is effectively
1261 an addition or subtraction. */
1262 subtract ^= (sign ^ rhs.sign) ? true : false;
1264 /* Are we bigger exponent-wise than the RHS? */
1265 bits = exponent - rhs.exponent;
1267 /* Subtraction is more subtle than one might naively expect. */
1268 if(subtract) {
1269 APFloat temp_rhs(rhs);
1270 bool reverse;
1272 if (bits == 0) {
1273 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1274 lost_fraction = lfExactlyZero;
1275 } else if (bits > 0) {
1276 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1277 shiftSignificandLeft(1);
1278 reverse = false;
1279 } else {
1280 lost_fraction = shiftSignificandRight(-bits - 1);
1281 temp_rhs.shiftSignificandLeft(1);
1282 reverse = true;
1285 if (reverse) {
1286 carry = temp_rhs.subtractSignificand
1287 (*this, lost_fraction != lfExactlyZero);
1288 copySignificand(temp_rhs);
1289 sign = !sign;
1290 } else {
1291 carry = subtractSignificand
1292 (temp_rhs, lost_fraction != lfExactlyZero);
1295 /* Invert the lost fraction - it was on the RHS and
1296 subtracted. */
1297 if(lost_fraction == lfLessThanHalf)
1298 lost_fraction = lfMoreThanHalf;
1299 else if(lost_fraction == lfMoreThanHalf)
1300 lost_fraction = lfLessThanHalf;
1302 /* The code above is intended to ensure that no borrow is
1303 necessary. */
1304 assert(!carry);
1305 } else {
1306 if(bits > 0) {
1307 APFloat temp_rhs(rhs);
1309 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1310 carry = addSignificand(temp_rhs);
1311 } else {
1312 lost_fraction = shiftSignificandRight(-bits);
1313 carry = addSignificand(rhs);
1316 /* We have a guard bit; generating a carry cannot happen. */
1317 assert(!carry);
1320 return lost_fraction;
1323 APFloat::opStatus
1324 APFloat::multiplySpecials(const APFloat &rhs)
1326 switch(convolve(category, rhs.category)) {
1327 default:
1328 assert(0);
1330 case convolve(fcNaN, fcZero):
1331 case convolve(fcNaN, fcNormal):
1332 case convolve(fcNaN, fcInfinity):
1333 case convolve(fcNaN, fcNaN):
1334 return opOK;
1336 case convolve(fcZero, fcNaN):
1337 case convolve(fcNormal, fcNaN):
1338 case convolve(fcInfinity, fcNaN):
1339 category = fcNaN;
1340 copySignificand(rhs);
1341 return opOK;
1343 case convolve(fcNormal, fcInfinity):
1344 case convolve(fcInfinity, fcNormal):
1345 case convolve(fcInfinity, fcInfinity):
1346 category = fcInfinity;
1347 return opOK;
1349 case convolve(fcZero, fcNormal):
1350 case convolve(fcNormal, fcZero):
1351 case convolve(fcZero, fcZero):
1352 category = fcZero;
1353 return opOK;
1355 case convolve(fcZero, fcInfinity):
1356 case convolve(fcInfinity, fcZero):
1357 makeNaN();
1358 return opInvalidOp;
1360 case convolve(fcNormal, fcNormal):
1361 return opOK;
1365 APFloat::opStatus
1366 APFloat::divideSpecials(const APFloat &rhs)
1368 switch(convolve(category, rhs.category)) {
1369 default:
1370 assert(0);
1372 case convolve(fcNaN, fcZero):
1373 case convolve(fcNaN, fcNormal):
1374 case convolve(fcNaN, fcInfinity):
1375 case convolve(fcNaN, fcNaN):
1376 case convolve(fcInfinity, fcZero):
1377 case convolve(fcInfinity, fcNormal):
1378 case convolve(fcZero, fcInfinity):
1379 case convolve(fcZero, fcNormal):
1380 return opOK;
1382 case convolve(fcZero, fcNaN):
1383 case convolve(fcNormal, fcNaN):
1384 case convolve(fcInfinity, fcNaN):
1385 category = fcNaN;
1386 copySignificand(rhs);
1387 return opOK;
1389 case convolve(fcNormal, fcInfinity):
1390 category = fcZero;
1391 return opOK;
1393 case convolve(fcNormal, fcZero):
1394 category = fcInfinity;
1395 return opDivByZero;
1397 case convolve(fcInfinity, fcInfinity):
1398 case convolve(fcZero, fcZero):
1399 makeNaN();
1400 return opInvalidOp;
1402 case convolve(fcNormal, fcNormal):
1403 return opOK;
1407 APFloat::opStatus
1408 APFloat::modSpecials(const APFloat &rhs)
1410 switch(convolve(category, rhs.category)) {
1411 default:
1412 assert(0);
1414 case convolve(fcNaN, fcZero):
1415 case convolve(fcNaN, fcNormal):
1416 case convolve(fcNaN, fcInfinity):
1417 case convolve(fcNaN, fcNaN):
1418 case convolve(fcZero, fcInfinity):
1419 case convolve(fcZero, fcNormal):
1420 case convolve(fcNormal, fcInfinity):
1421 return opOK;
1423 case convolve(fcZero, fcNaN):
1424 case convolve(fcNormal, fcNaN):
1425 case convolve(fcInfinity, fcNaN):
1426 category = fcNaN;
1427 copySignificand(rhs);
1428 return opOK;
1430 case convolve(fcNormal, fcZero):
1431 case convolve(fcInfinity, fcZero):
1432 case convolve(fcInfinity, fcNormal):
1433 case convolve(fcInfinity, fcInfinity):
1434 case convolve(fcZero, fcZero):
1435 makeNaN();
1436 return opInvalidOp;
1438 case convolve(fcNormal, fcNormal):
1439 return opOK;
1443 /* Change sign. */
1444 void
1445 APFloat::changeSign()
1447 /* Look mummy, this one's easy. */
1448 sign = !sign;
1451 void
1452 APFloat::clearSign()
1454 /* So is this one. */
1455 sign = 0;
1458 void
1459 APFloat::copySign(const APFloat &rhs)
1461 /* And this one. */
1462 sign = rhs.sign;
1465 /* Normalized addition or subtraction. */
1466 APFloat::opStatus
1467 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1468 bool subtract)
1470 opStatus fs;
1472 assertArithmeticOK(*semantics);
1474 fs = addOrSubtractSpecials(rhs, subtract);
1476 /* This return code means it was not a simple case. */
1477 if(fs == opDivByZero) {
1478 lostFraction lost_fraction;
1480 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1481 fs = normalize(rounding_mode, lost_fraction);
1483 /* Can only be zero if we lost no fraction. */
1484 assert(category != fcZero || lost_fraction == lfExactlyZero);
1487 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1488 positive zero unless rounding to minus infinity, except that
1489 adding two like-signed zeroes gives that zero. */
1490 if(category == fcZero) {
1491 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1492 sign = (rounding_mode == rmTowardNegative);
1495 return fs;
1498 /* Normalized addition. */
1499 APFloat::opStatus
1500 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1502 return addOrSubtract(rhs, rounding_mode, false);
1505 /* Normalized subtraction. */
1506 APFloat::opStatus
1507 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1509 return addOrSubtract(rhs, rounding_mode, true);
1512 /* Normalized multiply. */
1513 APFloat::opStatus
1514 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1516 opStatus fs;
1518 assertArithmeticOK(*semantics);
1519 sign ^= rhs.sign;
1520 fs = multiplySpecials(rhs);
1522 if(category == fcNormal) {
1523 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1524 fs = normalize(rounding_mode, lost_fraction);
1525 if(lost_fraction != lfExactlyZero)
1526 fs = (opStatus) (fs | opInexact);
1529 return fs;
1532 /* Normalized divide. */
1533 APFloat::opStatus
1534 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1536 opStatus fs;
1538 assertArithmeticOK(*semantics);
1539 sign ^= rhs.sign;
1540 fs = divideSpecials(rhs);
1542 if(category == fcNormal) {
1543 lostFraction lost_fraction = divideSignificand(rhs);
1544 fs = normalize(rounding_mode, lost_fraction);
1545 if(lost_fraction != lfExactlyZero)
1546 fs = (opStatus) (fs | opInexact);
1549 return fs;
1552 /* Normalized remainder. This is not currently correct in all cases. */
1553 APFloat::opStatus
1554 APFloat::remainder(const APFloat &rhs)
1556 opStatus fs;
1557 APFloat V = *this;
1558 unsigned int origSign = sign;
1560 assertArithmeticOK(*semantics);
1561 fs = V.divide(rhs, rmNearestTiesToEven);
1562 if (fs == opDivByZero)
1563 return fs;
1565 int parts = partCount();
1566 integerPart *x = new integerPart[parts];
1567 bool ignored;
1568 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1569 rmNearestTiesToEven, &ignored);
1570 if (fs==opInvalidOp)
1571 return fs;
1573 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1574 rmNearestTiesToEven);
1575 assert(fs==opOK); // should always work
1577 fs = V.multiply(rhs, rmNearestTiesToEven);
1578 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1580 fs = subtract(V, rmNearestTiesToEven);
1581 assert(fs==opOK || fs==opInexact); // likewise
1583 if (isZero())
1584 sign = origSign; // IEEE754 requires this
1585 delete[] x;
1586 return fs;
1589 /* Normalized llvm frem (C fmod).
1590 This is not currently correct in all cases. */
1591 APFloat::opStatus
1592 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1594 opStatus fs;
1595 assertArithmeticOK(*semantics);
1596 fs = modSpecials(rhs);
1598 if (category == fcNormal && rhs.category == fcNormal) {
1599 APFloat V = *this;
1600 unsigned int origSign = sign;
1602 fs = V.divide(rhs, rmNearestTiesToEven);
1603 if (fs == opDivByZero)
1604 return fs;
1606 int parts = partCount();
1607 integerPart *x = new integerPart[parts];
1608 bool ignored;
1609 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1610 rmTowardZero, &ignored);
1611 if (fs==opInvalidOp)
1612 return fs;
1614 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1615 rmNearestTiesToEven);
1616 assert(fs==opOK); // should always work
1618 fs = V.multiply(rhs, rounding_mode);
1619 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1621 fs = subtract(V, rounding_mode);
1622 assert(fs==opOK || fs==opInexact); // likewise
1624 if (isZero())
1625 sign = origSign; // IEEE754 requires this
1626 delete[] x;
1628 return fs;
1631 /* Normalized fused-multiply-add. */
1632 APFloat::opStatus
1633 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1634 const APFloat &addend,
1635 roundingMode rounding_mode)
1637 opStatus fs;
1639 assertArithmeticOK(*semantics);
1641 /* Post-multiplication sign, before addition. */
1642 sign ^= multiplicand.sign;
1644 /* If and only if all arguments are normal do we need to do an
1645 extended-precision calculation. */
1646 if(category == fcNormal
1647 && multiplicand.category == fcNormal
1648 && addend.category == fcNormal) {
1649 lostFraction lost_fraction;
1651 lost_fraction = multiplySignificand(multiplicand, &addend);
1652 fs = normalize(rounding_mode, lost_fraction);
1653 if(lost_fraction != lfExactlyZero)
1654 fs = (opStatus) (fs | opInexact);
1656 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1657 positive zero unless rounding to minus infinity, except that
1658 adding two like-signed zeroes gives that zero. */
1659 if(category == fcZero && sign != addend.sign)
1660 sign = (rounding_mode == rmTowardNegative);
1661 } else {
1662 fs = multiplySpecials(multiplicand);
1664 /* FS can only be opOK or opInvalidOp. There is no more work
1665 to do in the latter case. The IEEE-754R standard says it is
1666 implementation-defined in this case whether, if ADDEND is a
1667 quiet NaN, we raise invalid op; this implementation does so.
1669 If we need to do the addition we can do so with normal
1670 precision. */
1671 if(fs == opOK)
1672 fs = addOrSubtract(addend, rounding_mode, false);
1675 return fs;
1678 /* Comparison requires normalized numbers. */
1679 APFloat::cmpResult
1680 APFloat::compare(const APFloat &rhs) const
1682 cmpResult result;
1684 assertArithmeticOK(*semantics);
1685 assert(semantics == rhs.semantics);
1687 switch(convolve(category, rhs.category)) {
1688 default:
1689 assert(0);
1691 case convolve(fcNaN, fcZero):
1692 case convolve(fcNaN, fcNormal):
1693 case convolve(fcNaN, fcInfinity):
1694 case convolve(fcNaN, fcNaN):
1695 case convolve(fcZero, fcNaN):
1696 case convolve(fcNormal, fcNaN):
1697 case convolve(fcInfinity, fcNaN):
1698 return cmpUnordered;
1700 case convolve(fcInfinity, fcNormal):
1701 case convolve(fcInfinity, fcZero):
1702 case convolve(fcNormal, fcZero):
1703 if(sign)
1704 return cmpLessThan;
1705 else
1706 return cmpGreaterThan;
1708 case convolve(fcNormal, fcInfinity):
1709 case convolve(fcZero, fcInfinity):
1710 case convolve(fcZero, fcNormal):
1711 if(rhs.sign)
1712 return cmpGreaterThan;
1713 else
1714 return cmpLessThan;
1716 case convolve(fcInfinity, fcInfinity):
1717 if(sign == rhs.sign)
1718 return cmpEqual;
1719 else if(sign)
1720 return cmpLessThan;
1721 else
1722 return cmpGreaterThan;
1724 case convolve(fcZero, fcZero):
1725 return cmpEqual;
1727 case convolve(fcNormal, fcNormal):
1728 break;
1731 /* Two normal numbers. Do they have the same sign? */
1732 if(sign != rhs.sign) {
1733 if(sign)
1734 result = cmpLessThan;
1735 else
1736 result = cmpGreaterThan;
1737 } else {
1738 /* Compare absolute values; invert result if negative. */
1739 result = compareAbsoluteValue(rhs);
1741 if(sign) {
1742 if(result == cmpLessThan)
1743 result = cmpGreaterThan;
1744 else if(result == cmpGreaterThan)
1745 result = cmpLessThan;
1749 return result;
1752 /// APFloat::convert - convert a value of one floating point type to another.
1753 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1754 /// records whether the transformation lost information, i.e. whether
1755 /// converting the result back to the original type will produce the
1756 /// original value (this is almost the same as return value==fsOK, but there
1757 /// are edge cases where this is not so).
1759 APFloat::opStatus
1760 APFloat::convert(const fltSemantics &toSemantics,
1761 roundingMode rounding_mode, bool *losesInfo)
1763 lostFraction lostFraction;
1764 unsigned int newPartCount, oldPartCount;
1765 opStatus fs;
1767 assertArithmeticOK(*semantics);
1768 assertArithmeticOK(toSemantics);
1769 lostFraction = lfExactlyZero;
1770 newPartCount = partCountForBits(toSemantics.precision + 1);
1771 oldPartCount = partCount();
1773 /* Handle storage complications. If our new form is wider,
1774 re-allocate our bit pattern into wider storage. If it is
1775 narrower, we ignore the excess parts, but if narrowing to a
1776 single part we need to free the old storage.
1777 Be careful not to reference significandParts for zeroes
1778 and infinities, since it aborts. */
1779 if (newPartCount > oldPartCount) {
1780 integerPart *newParts;
1781 newParts = new integerPart[newPartCount];
1782 APInt::tcSet(newParts, 0, newPartCount);
1783 if (category==fcNormal || category==fcNaN)
1784 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1785 freeSignificand();
1786 significand.parts = newParts;
1787 } else if (newPartCount < oldPartCount) {
1788 /* Capture any lost fraction through truncation of parts so we get
1789 correct rounding whilst normalizing. */
1790 if (category==fcNormal)
1791 lostFraction = lostFractionThroughTruncation
1792 (significandParts(), oldPartCount, toSemantics.precision);
1793 if (newPartCount == 1) {
1794 integerPart newPart = 0;
1795 if (category==fcNormal || category==fcNaN)
1796 newPart = significandParts()[0];
1797 freeSignificand();
1798 significand.part = newPart;
1802 if(category == fcNormal) {
1803 /* Re-interpret our bit-pattern. */
1804 exponent += toSemantics.precision - semantics->precision;
1805 semantics = &toSemantics;
1806 fs = normalize(rounding_mode, lostFraction);
1807 *losesInfo = (fs != opOK);
1808 } else if (category == fcNaN) {
1809 int shift = toSemantics.precision - semantics->precision;
1810 // Do this now so significandParts gets the right answer
1811 const fltSemantics *oldSemantics = semantics;
1812 semantics = &toSemantics;
1813 *losesInfo = false;
1814 // No normalization here, just truncate
1815 if (shift>0)
1816 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1817 else if (shift < 0) {
1818 unsigned ushift = -shift;
1819 // Figure out if we are losing information. This happens
1820 // if are shifting out something other than 0s, or if the x87 long
1821 // double input did not have its integer bit set (pseudo-NaN), or if the
1822 // x87 long double input did not have its QNan bit set (because the x87
1823 // hardware sets this bit when converting a lower-precision NaN to
1824 // x87 long double).
1825 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1826 *losesInfo = true;
1827 if (oldSemantics == &APFloat::x87DoubleExtended &&
1828 (!(*significandParts() & 0x8000000000000000ULL) ||
1829 !(*significandParts() & 0x4000000000000000ULL)))
1830 *losesInfo = true;
1831 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1833 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1834 // does not give you back the same bits. This is dubious, and we
1835 // don't currently do it. You're really supposed to get
1836 // an invalid operation signal at runtime, but nobody does that.
1837 fs = opOK;
1838 } else {
1839 semantics = &toSemantics;
1840 fs = opOK;
1841 *losesInfo = false;
1844 return fs;
1847 /* Convert a floating point number to an integer according to the
1848 rounding mode. If the rounded integer value is out of range this
1849 returns an invalid operation exception and the contents of the
1850 destination parts are unspecified. If the rounded value is in
1851 range but the floating point number is not the exact integer, the C
1852 standard doesn't require an inexact exception to be raised. IEEE
1853 854 does require it so we do that.
1855 Note that for conversions to integer type the C standard requires
1856 round-to-zero to always be used. */
1857 APFloat::opStatus
1858 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1859 bool isSigned,
1860 roundingMode rounding_mode,
1861 bool *isExact) const
1863 lostFraction lost_fraction;
1864 const integerPart *src;
1865 unsigned int dstPartsCount, truncatedBits;
1867 assertArithmeticOK(*semantics);
1869 *isExact = false;
1871 /* Handle the three special cases first. */
1872 if(category == fcInfinity || category == fcNaN)
1873 return opInvalidOp;
1875 dstPartsCount = partCountForBits(width);
1877 if(category == fcZero) {
1878 APInt::tcSet(parts, 0, dstPartsCount);
1879 // Negative zero can't be represented as an int.
1880 *isExact = !sign;
1881 return opOK;
1884 src = significandParts();
1886 /* Step 1: place our absolute value, with any fraction truncated, in
1887 the destination. */
1888 if (exponent < 0) {
1889 /* Our absolute value is less than one; truncate everything. */
1890 APInt::tcSet(parts, 0, dstPartsCount);
1891 /* For exponent -1 the integer bit represents .5, look at that.
1892 For smaller exponents leftmost truncated bit is 0. */
1893 truncatedBits = semantics->precision -1U - exponent;
1894 } else {
1895 /* We want the most significant (exponent + 1) bits; the rest are
1896 truncated. */
1897 unsigned int bits = exponent + 1U;
1899 /* Hopelessly large in magnitude? */
1900 if (bits > width)
1901 return opInvalidOp;
1903 if (bits < semantics->precision) {
1904 /* We truncate (semantics->precision - bits) bits. */
1905 truncatedBits = semantics->precision - bits;
1906 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1907 } else {
1908 /* We want at least as many bits as are available. */
1909 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1910 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1911 truncatedBits = 0;
1915 /* Step 2: work out any lost fraction, and increment the absolute
1916 value if we would round away from zero. */
1917 if (truncatedBits) {
1918 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1919 truncatedBits);
1920 if (lost_fraction != lfExactlyZero
1921 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1922 if (APInt::tcIncrement(parts, dstPartsCount))
1923 return opInvalidOp; /* Overflow. */
1925 } else {
1926 lost_fraction = lfExactlyZero;
1929 /* Step 3: check if we fit in the destination. */
1930 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1932 if (sign) {
1933 if (!isSigned) {
1934 /* Negative numbers cannot be represented as unsigned. */
1935 if (omsb != 0)
1936 return opInvalidOp;
1937 } else {
1938 /* It takes omsb bits to represent the unsigned integer value.
1939 We lose a bit for the sign, but care is needed as the
1940 maximally negative integer is a special case. */
1941 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1942 return opInvalidOp;
1944 /* This case can happen because of rounding. */
1945 if (omsb > width)
1946 return opInvalidOp;
1949 APInt::tcNegate (parts, dstPartsCount);
1950 } else {
1951 if (omsb >= width + !isSigned)
1952 return opInvalidOp;
1955 if (lost_fraction == lfExactlyZero) {
1956 *isExact = true;
1957 return opOK;
1958 } else
1959 return opInexact;
1962 /* Same as convertToSignExtendedInteger, except we provide
1963 deterministic values in case of an invalid operation exception,
1964 namely zero for NaNs and the minimal or maximal value respectively
1965 for underflow or overflow.
1966 The *isExact output tells whether the result is exact, in the sense
1967 that converting it back to the original floating point type produces
1968 the original value. This is almost equivalent to result==opOK,
1969 except for negative zeroes.
1971 APFloat::opStatus
1972 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1973 bool isSigned,
1974 roundingMode rounding_mode, bool *isExact) const
1976 opStatus fs;
1978 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
1979 isExact);
1981 if (fs == opInvalidOp) {
1982 unsigned int bits, dstPartsCount;
1984 dstPartsCount = partCountForBits(width);
1986 if (category == fcNaN)
1987 bits = 0;
1988 else if (sign)
1989 bits = isSigned;
1990 else
1991 bits = width - isSigned;
1993 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
1994 if (sign && isSigned)
1995 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
1998 return fs;
2001 /* Convert an unsigned integer SRC to a floating point number,
2002 rounding according to ROUNDING_MODE. The sign of the floating
2003 point number is not modified. */
2004 APFloat::opStatus
2005 APFloat::convertFromUnsignedParts(const integerPart *src,
2006 unsigned int srcCount,
2007 roundingMode rounding_mode)
2009 unsigned int omsb, precision, dstCount;
2010 integerPart *dst;
2011 lostFraction lost_fraction;
2013 assertArithmeticOK(*semantics);
2014 category = fcNormal;
2015 omsb = APInt::tcMSB(src, srcCount) + 1;
2016 dst = significandParts();
2017 dstCount = partCount();
2018 precision = semantics->precision;
2020 /* We want the most significant PRECISON bits of SRC. There may not
2021 be that many; extract what we can. */
2022 if (precision <= omsb) {
2023 exponent = omsb - 1;
2024 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2025 omsb - precision);
2026 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2027 } else {
2028 exponent = precision - 1;
2029 lost_fraction = lfExactlyZero;
2030 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2033 return normalize(rounding_mode, lost_fraction);
2036 APFloat::opStatus
2037 APFloat::convertFromAPInt(const APInt &Val,
2038 bool isSigned,
2039 roundingMode rounding_mode)
2041 unsigned int partCount = Val.getNumWords();
2042 APInt api = Val;
2044 sign = false;
2045 if (isSigned && api.isNegative()) {
2046 sign = true;
2047 api = -api;
2050 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2053 /* Convert a two's complement integer SRC to a floating point number,
2054 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2055 integer is signed, in which case it must be sign-extended. */
2056 APFloat::opStatus
2057 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2058 unsigned int srcCount,
2059 bool isSigned,
2060 roundingMode rounding_mode)
2062 opStatus status;
2064 assertArithmeticOK(*semantics);
2065 if (isSigned
2066 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2067 integerPart *copy;
2069 /* If we're signed and negative negate a copy. */
2070 sign = true;
2071 copy = new integerPart[srcCount];
2072 APInt::tcAssign(copy, src, srcCount);
2073 APInt::tcNegate(copy, srcCount);
2074 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2075 delete [] copy;
2076 } else {
2077 sign = false;
2078 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2081 return status;
2084 /* FIXME: should this just take a const APInt reference? */
2085 APFloat::opStatus
2086 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2087 unsigned int width, bool isSigned,
2088 roundingMode rounding_mode)
2090 unsigned int partCount = partCountForBits(width);
2091 APInt api = APInt(width, partCount, parts);
2093 sign = false;
2094 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2095 sign = true;
2096 api = -api;
2099 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2102 APFloat::opStatus
2103 APFloat::convertFromHexadecimalString(const char *p,
2104 roundingMode rounding_mode)
2106 lostFraction lost_fraction;
2107 integerPart *significand;
2108 unsigned int bitPos, partsCount;
2109 const char *dot, *firstSignificantDigit;
2111 zeroSignificand();
2112 exponent = 0;
2113 category = fcNormal;
2115 significand = significandParts();
2116 partsCount = partCount();
2117 bitPos = partsCount * integerPartWidth;
2119 /* Skip leading zeroes and any (hexa)decimal point. */
2120 p = skipLeadingZeroesAndAnyDot(p, &dot);
2121 firstSignificantDigit = p;
2123 for(;;) {
2124 integerPart hex_value;
2126 if(*p == '.') {
2127 assert(dot == 0);
2128 dot = p++;
2131 hex_value = hexDigitValue(*p);
2132 if(hex_value == -1U) {
2133 lost_fraction = lfExactlyZero;
2134 break;
2137 p++;
2139 /* Store the number whilst 4-bit nibbles remain. */
2140 if(bitPos) {
2141 bitPos -= 4;
2142 hex_value <<= bitPos % integerPartWidth;
2143 significand[bitPos / integerPartWidth] |= hex_value;
2144 } else {
2145 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2146 while(hexDigitValue(*p) != -1U)
2147 p++;
2148 break;
2152 /* Hex floats require an exponent but not a hexadecimal point. */
2153 assert(*p == 'p' || *p == 'P');
2155 /* Ignore the exponent if we are zero. */
2156 if(p != firstSignificantDigit) {
2157 int expAdjustment;
2159 /* Implicit hexadecimal point? */
2160 if(!dot)
2161 dot = p;
2163 /* Calculate the exponent adjustment implicit in the number of
2164 significant digits. */
2165 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2166 if(expAdjustment < 0)
2167 expAdjustment++;
2168 expAdjustment = expAdjustment * 4 - 1;
2170 /* Adjust for writing the significand starting at the most
2171 significant nibble. */
2172 expAdjustment += semantics->precision;
2173 expAdjustment -= partsCount * integerPartWidth;
2175 /* Adjust for the given exponent. */
2176 exponent = totalExponent(p, expAdjustment);
2179 return normalize(rounding_mode, lost_fraction);
2182 APFloat::opStatus
2183 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2184 unsigned sigPartCount, int exp,
2185 roundingMode rounding_mode)
2187 unsigned int parts, pow5PartCount;
2188 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2189 integerPart pow5Parts[maxPowerOfFiveParts];
2190 bool isNearest;
2192 isNearest = (rounding_mode == rmNearestTiesToEven
2193 || rounding_mode == rmNearestTiesToAway);
2195 parts = partCountForBits(semantics->precision + 11);
2197 /* Calculate pow(5, abs(exp)). */
2198 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2200 for (;; parts *= 2) {
2201 opStatus sigStatus, powStatus;
2202 unsigned int excessPrecision, truncatedBits;
2204 calcSemantics.precision = parts * integerPartWidth - 1;
2205 excessPrecision = calcSemantics.precision - semantics->precision;
2206 truncatedBits = excessPrecision;
2208 APFloat decSig(calcSemantics, fcZero, sign);
2209 APFloat pow5(calcSemantics, fcZero, false);
2211 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2212 rmNearestTiesToEven);
2213 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2214 rmNearestTiesToEven);
2215 /* Add exp, as 10^n = 5^n * 2^n. */
2216 decSig.exponent += exp;
2218 lostFraction calcLostFraction;
2219 integerPart HUerr, HUdistance;
2220 unsigned int powHUerr;
2222 if (exp >= 0) {
2223 /* multiplySignificand leaves the precision-th bit set to 1. */
2224 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2225 powHUerr = powStatus != opOK;
2226 } else {
2227 calcLostFraction = decSig.divideSignificand(pow5);
2228 /* Denormal numbers have less precision. */
2229 if (decSig.exponent < semantics->minExponent) {
2230 excessPrecision += (semantics->minExponent - decSig.exponent);
2231 truncatedBits = excessPrecision;
2232 if (excessPrecision > calcSemantics.precision)
2233 excessPrecision = calcSemantics.precision;
2235 /* Extra half-ulp lost in reciprocal of exponent. */
2236 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2239 /* Both multiplySignificand and divideSignificand return the
2240 result with the integer bit set. */
2241 assert (APInt::tcExtractBit
2242 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2244 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2245 powHUerr);
2246 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2247 excessPrecision, isNearest);
2249 /* Are we guaranteed to round correctly if we truncate? */
2250 if (HUdistance >= HUerr) {
2251 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2252 calcSemantics.precision - excessPrecision,
2253 excessPrecision);
2254 /* Take the exponent of decSig. If we tcExtract-ed less bits
2255 above we must adjust our exponent to compensate for the
2256 implicit right shift. */
2257 exponent = (decSig.exponent + semantics->precision
2258 - (calcSemantics.precision - excessPrecision));
2259 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2260 decSig.partCount(),
2261 truncatedBits);
2262 return normalize(rounding_mode, calcLostFraction);
2267 APFloat::opStatus
2268 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2270 decimalInfo D;
2271 opStatus fs;
2273 /* Scan the text. */
2274 interpretDecimal(p, &D);
2276 /* Handle the quick cases. First the case of no significant digits,
2277 i.e. zero, and then exponents that are obviously too large or too
2278 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2279 definitely overflows if
2281 (exp - 1) * L >= maxExponent
2283 and definitely underflows to zero where
2285 (exp + 1) * L <= minExponent - precision
2287 With integer arithmetic the tightest bounds for L are
2289 93/28 < L < 196/59 [ numerator <= 256 ]
2290 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2293 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2294 category = fcZero;
2295 fs = opOK;
2296 } else if ((D.normalizedExponent + 1) * 28738
2297 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2298 /* Underflow to zero and round. */
2299 zeroSignificand();
2300 fs = normalize(rounding_mode, lfLessThanHalf);
2301 } else if ((D.normalizedExponent - 1) * 42039
2302 >= 12655 * semantics->maxExponent) {
2303 /* Overflow and round. */
2304 fs = handleOverflow(rounding_mode);
2305 } else {
2306 integerPart *decSignificand;
2307 unsigned int partCount;
2309 /* A tight upper bound on number of bits required to hold an
2310 N-digit decimal integer is N * 196 / 59. Allocate enough space
2311 to hold the full significand, and an extra part required by
2312 tcMultiplyPart. */
2313 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2314 partCount = partCountForBits(1 + 196 * partCount / 59);
2315 decSignificand = new integerPart[partCount + 1];
2316 partCount = 0;
2318 /* Convert to binary efficiently - we do almost all multiplication
2319 in an integerPart. When this would overflow do we do a single
2320 bignum multiplication, and then revert again to multiplication
2321 in an integerPart. */
2322 do {
2323 integerPart decValue, val, multiplier;
2325 val = 0;
2326 multiplier = 1;
2328 do {
2329 if (*p == '.')
2330 p++;
2332 decValue = decDigitValue(*p++);
2333 multiplier *= 10;
2334 val = val * 10 + decValue;
2335 /* The maximum number that can be multiplied by ten with any
2336 digit added without overflowing an integerPart. */
2337 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2339 /* Multiply out the current part. */
2340 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2341 partCount, partCount + 1, false);
2343 /* If we used another part (likely but not guaranteed), increase
2344 the count. */
2345 if (decSignificand[partCount])
2346 partCount++;
2347 } while (p <= D.lastSigDigit);
2349 category = fcNormal;
2350 fs = roundSignificandWithExponent(decSignificand, partCount,
2351 D.exponent, rounding_mode);
2353 delete [] decSignificand;
2356 return fs;
2359 APFloat::opStatus
2360 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2362 assertArithmeticOK(*semantics);
2364 /* Handle a leading minus sign. */
2365 if(*p == '-')
2366 sign = 1, p++;
2367 else
2368 sign = 0;
2370 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2371 return convertFromHexadecimalString(p + 2, rounding_mode);
2373 return convertFromDecimalString(p, rounding_mode);
2376 /* Write out a hexadecimal representation of the floating point value
2377 to DST, which must be of sufficient size, in the C99 form
2378 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2379 excluding the terminating NUL.
2381 If UPPERCASE, the output is in upper case, otherwise in lower case.
2383 HEXDIGITS digits appear altogether, rounding the value if
2384 necessary. If HEXDIGITS is 0, the minimal precision to display the
2385 number precisely is used instead. If nothing would appear after
2386 the decimal point it is suppressed.
2388 The decimal exponent is always printed and has at least one digit.
2389 Zero values display an exponent of zero. Infinities and NaNs
2390 appear as "infinity" or "nan" respectively.
2392 The above rules are as specified by C99. There is ambiguity about
2393 what the leading hexadecimal digit should be. This implementation
2394 uses whatever is necessary so that the exponent is displayed as
2395 stored. This implies the exponent will fall within the IEEE format
2396 range, and the leading hexadecimal digit will be 0 (for denormals),
2397 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2398 any other digits zero).
2400 unsigned int
2401 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2402 bool upperCase, roundingMode rounding_mode) const
2404 char *p;
2406 assertArithmeticOK(*semantics);
2408 p = dst;
2409 if (sign)
2410 *dst++ = '-';
2412 switch (category) {
2413 case fcInfinity:
2414 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2415 dst += sizeof infinityL - 1;
2416 break;
2418 case fcNaN:
2419 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2420 dst += sizeof NaNU - 1;
2421 break;
2423 case fcZero:
2424 *dst++ = '0';
2425 *dst++ = upperCase ? 'X': 'x';
2426 *dst++ = '0';
2427 if (hexDigits > 1) {
2428 *dst++ = '.';
2429 memset (dst, '0', hexDigits - 1);
2430 dst += hexDigits - 1;
2432 *dst++ = upperCase ? 'P': 'p';
2433 *dst++ = '0';
2434 break;
2436 case fcNormal:
2437 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2438 break;
2441 *dst = 0;
2443 return static_cast<unsigned int>(dst - p);
2446 /* Does the hard work of outputting the correctly rounded hexadecimal
2447 form of a normal floating point number with the specified number of
2448 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2449 digits necessary to print the value precisely is output. */
2450 char *
2451 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2452 bool upperCase,
2453 roundingMode rounding_mode) const
2455 unsigned int count, valueBits, shift, partsCount, outputDigits;
2456 const char *hexDigitChars;
2457 const integerPart *significand;
2458 char *p;
2459 bool roundUp;
2461 *dst++ = '0';
2462 *dst++ = upperCase ? 'X': 'x';
2464 roundUp = false;
2465 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2467 significand = significandParts();
2468 partsCount = partCount();
2470 /* +3 because the first digit only uses the single integer bit, so
2471 we have 3 virtual zero most-significant-bits. */
2472 valueBits = semantics->precision + 3;
2473 shift = integerPartWidth - valueBits % integerPartWidth;
2475 /* The natural number of digits required ignoring trailing
2476 insignificant zeroes. */
2477 outputDigits = (valueBits - significandLSB () + 3) / 4;
2479 /* hexDigits of zero means use the required number for the
2480 precision. Otherwise, see if we are truncating. If we are,
2481 find out if we need to round away from zero. */
2482 if (hexDigits) {
2483 if (hexDigits < outputDigits) {
2484 /* We are dropping non-zero bits, so need to check how to round.
2485 "bits" is the number of dropped bits. */
2486 unsigned int bits;
2487 lostFraction fraction;
2489 bits = valueBits - hexDigits * 4;
2490 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2491 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2493 outputDigits = hexDigits;
2496 /* Write the digits consecutively, and start writing in the location
2497 of the hexadecimal point. We move the most significant digit
2498 left and add the hexadecimal point later. */
2499 p = ++dst;
2501 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2503 while (outputDigits && count) {
2504 integerPart part;
2506 /* Put the most significant integerPartWidth bits in "part". */
2507 if (--count == partsCount)
2508 part = 0; /* An imaginary higher zero part. */
2509 else
2510 part = significand[count] << shift;
2512 if (count && shift)
2513 part |= significand[count - 1] >> (integerPartWidth - shift);
2515 /* Convert as much of "part" to hexdigits as we can. */
2516 unsigned int curDigits = integerPartWidth / 4;
2518 if (curDigits > outputDigits)
2519 curDigits = outputDigits;
2520 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2521 outputDigits -= curDigits;
2524 if (roundUp) {
2525 char *q = dst;
2527 /* Note that hexDigitChars has a trailing '0'. */
2528 do {
2529 q--;
2530 *q = hexDigitChars[hexDigitValue (*q) + 1];
2531 } while (*q == '0');
2532 assert (q >= p);
2533 } else {
2534 /* Add trailing zeroes. */
2535 memset (dst, '0', outputDigits);
2536 dst += outputDigits;
2539 /* Move the most significant digit to before the point, and if there
2540 is something after the decimal point add it. This must come
2541 after rounding above. */
2542 p[-1] = p[0];
2543 if (dst -1 == p)
2544 dst--;
2545 else
2546 p[0] = '.';
2548 /* Finally output the exponent. */
2549 *dst++ = upperCase ? 'P': 'p';
2551 return writeSignedDecimal (dst, exponent);
2554 // For good performance it is desirable for different APFloats
2555 // to produce different integers.
2556 uint32_t
2557 APFloat::getHashValue() const
2559 if (category==fcZero) return sign<<8 | semantics->precision ;
2560 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2561 else if (category==fcNaN) return 1<<10 | semantics->precision;
2562 else {
2563 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2564 const integerPart* p = significandParts();
2565 for (int i=partCount(); i>0; i--, p++)
2566 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2567 return hash;
2571 // Conversion from APFloat to/from host float/double. It may eventually be
2572 // possible to eliminate these and have everybody deal with APFloats, but that
2573 // will take a while. This approach will not easily extend to long double.
2574 // Current implementation requires integerPartWidth==64, which is correct at
2575 // the moment but could be made more general.
2577 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2578 // the actual IEEE respresentations. We compensate for that here.
2580 APInt
2581 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2583 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2584 assert (partCount()==2);
2586 uint64_t myexponent, mysignificand;
2588 if (category==fcNormal) {
2589 myexponent = exponent+16383; //bias
2590 mysignificand = significandParts()[0];
2591 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2592 myexponent = 0; // denormal
2593 } else if (category==fcZero) {
2594 myexponent = 0;
2595 mysignificand = 0;
2596 } else if (category==fcInfinity) {
2597 myexponent = 0x7fff;
2598 mysignificand = 0x8000000000000000ULL;
2599 } else {
2600 assert(category == fcNaN && "Unknown category");
2601 myexponent = 0x7fff;
2602 mysignificand = significandParts()[0];
2605 uint64_t words[2];
2606 words[0] = mysignificand;
2607 words[1] = ((uint64_t)(sign & 1) << 15) |
2608 (myexponent & 0x7fffLL);
2609 return APInt(80, 2, words);
2612 APInt
2613 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2615 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2616 assert (partCount()==2);
2618 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2620 if (category==fcNormal) {
2621 myexponent = exponent + 1023; //bias
2622 myexponent2 = exponent2 + 1023;
2623 mysignificand = significandParts()[0];
2624 mysignificand2 = significandParts()[1];
2625 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2626 myexponent = 0; // denormal
2627 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2628 myexponent2 = 0; // denormal
2629 } else if (category==fcZero) {
2630 myexponent = 0;
2631 mysignificand = 0;
2632 myexponent2 = 0;
2633 mysignificand2 = 0;
2634 } else if (category==fcInfinity) {
2635 myexponent = 0x7ff;
2636 myexponent2 = 0;
2637 mysignificand = 0;
2638 mysignificand2 = 0;
2639 } else {
2640 assert(category == fcNaN && "Unknown category");
2641 myexponent = 0x7ff;
2642 mysignificand = significandParts()[0];
2643 myexponent2 = exponent2;
2644 mysignificand2 = significandParts()[1];
2647 uint64_t words[2];
2648 words[0] = ((uint64_t)(sign & 1) << 63) |
2649 ((myexponent & 0x7ff) << 52) |
2650 (mysignificand & 0xfffffffffffffLL);
2651 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2652 ((myexponent2 & 0x7ff) << 52) |
2653 (mysignificand2 & 0xfffffffffffffLL);
2654 return APInt(128, 2, words);
2657 APInt
2658 APFloat::convertDoubleAPFloatToAPInt() const
2660 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2661 assert (partCount()==1);
2663 uint64_t myexponent, mysignificand;
2665 if (category==fcNormal) {
2666 myexponent = exponent+1023; //bias
2667 mysignificand = *significandParts();
2668 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2669 myexponent = 0; // denormal
2670 } else if (category==fcZero) {
2671 myexponent = 0;
2672 mysignificand = 0;
2673 } else if (category==fcInfinity) {
2674 myexponent = 0x7ff;
2675 mysignificand = 0;
2676 } else {
2677 assert(category == fcNaN && "Unknown category!");
2678 myexponent = 0x7ff;
2679 mysignificand = *significandParts();
2682 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2683 ((myexponent & 0x7ff) << 52) |
2684 (mysignificand & 0xfffffffffffffLL))));
2687 APInt
2688 APFloat::convertFloatAPFloatToAPInt() const
2690 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2691 assert (partCount()==1);
2693 uint32_t myexponent, mysignificand;
2695 if (category==fcNormal) {
2696 myexponent = exponent+127; //bias
2697 mysignificand = (uint32_t)*significandParts();
2698 if (myexponent == 1 && !(mysignificand & 0x800000))
2699 myexponent = 0; // denormal
2700 } else if (category==fcZero) {
2701 myexponent = 0;
2702 mysignificand = 0;
2703 } else if (category==fcInfinity) {
2704 myexponent = 0xff;
2705 mysignificand = 0;
2706 } else {
2707 assert(category == fcNaN && "Unknown category!");
2708 myexponent = 0xff;
2709 mysignificand = (uint32_t)*significandParts();
2712 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2713 (mysignificand & 0x7fffff)));
2716 // This function creates an APInt that is just a bit map of the floating
2717 // point constant as it would appear in memory. It is not a conversion,
2718 // and treating the result as a normal integer is unlikely to be useful.
2720 APInt
2721 APFloat::bitcastToAPInt() const
2723 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2724 return convertFloatAPFloatToAPInt();
2726 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2727 return convertDoubleAPFloatToAPInt();
2729 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2730 return convertPPCDoubleDoubleAPFloatToAPInt();
2732 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2733 "unknown format!");
2734 return convertF80LongDoubleAPFloatToAPInt();
2737 float
2738 APFloat::convertToFloat() const
2740 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2741 APInt api = bitcastToAPInt();
2742 return api.bitsToFloat();
2745 double
2746 APFloat::convertToDouble() const
2748 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2749 APInt api = bitcastToAPInt();
2750 return api.bitsToDouble();
2753 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2754 /// does not support these bit patterns:
2755 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2756 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2757 /// exponent = 0, integer bit 1 ("pseudodenormal")
2758 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2759 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2760 void
2761 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2763 assert(api.getBitWidth()==80);
2764 uint64_t i1 = api.getRawData()[0];
2765 uint64_t i2 = api.getRawData()[1];
2766 uint64_t myexponent = (i2 & 0x7fff);
2767 uint64_t mysignificand = i1;
2769 initialize(&APFloat::x87DoubleExtended);
2770 assert(partCount()==2);
2772 sign = static_cast<unsigned int>(i2>>15);
2773 if (myexponent==0 && mysignificand==0) {
2774 // exponent, significand meaningless
2775 category = fcZero;
2776 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2777 // exponent, significand meaningless
2778 category = fcInfinity;
2779 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2780 // exponent meaningless
2781 category = fcNaN;
2782 significandParts()[0] = mysignificand;
2783 significandParts()[1] = 0;
2784 } else {
2785 category = fcNormal;
2786 exponent = myexponent - 16383;
2787 significandParts()[0] = mysignificand;
2788 significandParts()[1] = 0;
2789 if (myexponent==0) // denormal
2790 exponent = -16382;
2794 void
2795 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2797 assert(api.getBitWidth()==128);
2798 uint64_t i1 = api.getRawData()[0];
2799 uint64_t i2 = api.getRawData()[1];
2800 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2801 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2802 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2803 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2805 initialize(&APFloat::PPCDoubleDouble);
2806 assert(partCount()==2);
2808 sign = static_cast<unsigned int>(i1>>63);
2809 sign2 = static_cast<unsigned int>(i2>>63);
2810 if (myexponent==0 && mysignificand==0) {
2811 // exponent, significand meaningless
2812 // exponent2 and significand2 are required to be 0; we don't check
2813 category = fcZero;
2814 } else if (myexponent==0x7ff && mysignificand==0) {
2815 // exponent, significand meaningless
2816 // exponent2 and significand2 are required to be 0; we don't check
2817 category = fcInfinity;
2818 } else if (myexponent==0x7ff && mysignificand!=0) {
2819 // exponent meaningless. So is the whole second word, but keep it
2820 // for determinism.
2821 category = fcNaN;
2822 exponent2 = myexponent2;
2823 significandParts()[0] = mysignificand;
2824 significandParts()[1] = mysignificand2;
2825 } else {
2826 category = fcNormal;
2827 // Note there is no category2; the second word is treated as if it is
2828 // fcNormal, although it might be something else considered by itself.
2829 exponent = myexponent - 1023;
2830 exponent2 = myexponent2 - 1023;
2831 significandParts()[0] = mysignificand;
2832 significandParts()[1] = mysignificand2;
2833 if (myexponent==0) // denormal
2834 exponent = -1022;
2835 else
2836 significandParts()[0] |= 0x10000000000000LL; // integer bit
2837 if (myexponent2==0)
2838 exponent2 = -1022;
2839 else
2840 significandParts()[1] |= 0x10000000000000LL; // integer bit
2844 void
2845 APFloat::initFromDoubleAPInt(const APInt &api)
2847 assert(api.getBitWidth()==64);
2848 uint64_t i = *api.getRawData();
2849 uint64_t myexponent = (i >> 52) & 0x7ff;
2850 uint64_t mysignificand = i & 0xfffffffffffffLL;
2852 initialize(&APFloat::IEEEdouble);
2853 assert(partCount()==1);
2855 sign = static_cast<unsigned int>(i>>63);
2856 if (myexponent==0 && mysignificand==0) {
2857 // exponent, significand meaningless
2858 category = fcZero;
2859 } else if (myexponent==0x7ff && mysignificand==0) {
2860 // exponent, significand meaningless
2861 category = fcInfinity;
2862 } else if (myexponent==0x7ff && mysignificand!=0) {
2863 // exponent meaningless
2864 category = fcNaN;
2865 *significandParts() = mysignificand;
2866 } else {
2867 category = fcNormal;
2868 exponent = myexponent - 1023;
2869 *significandParts() = mysignificand;
2870 if (myexponent==0) // denormal
2871 exponent = -1022;
2872 else
2873 *significandParts() |= 0x10000000000000LL; // integer bit
2877 void
2878 APFloat::initFromFloatAPInt(const APInt & api)
2880 assert(api.getBitWidth()==32);
2881 uint32_t i = (uint32_t)*api.getRawData();
2882 uint32_t myexponent = (i >> 23) & 0xff;
2883 uint32_t mysignificand = i & 0x7fffff;
2885 initialize(&APFloat::IEEEsingle);
2886 assert(partCount()==1);
2888 sign = i >> 31;
2889 if (myexponent==0 && mysignificand==0) {
2890 // exponent, significand meaningless
2891 category = fcZero;
2892 } else if (myexponent==0xff && mysignificand==0) {
2893 // exponent, significand meaningless
2894 category = fcInfinity;
2895 } else if (myexponent==0xff && mysignificand!=0) {
2896 // sign, exponent, significand meaningless
2897 category = fcNaN;
2898 *significandParts() = mysignificand;
2899 } else {
2900 category = fcNormal;
2901 exponent = myexponent - 127; //bias
2902 *significandParts() = mysignificand;
2903 if (myexponent==0) // denormal
2904 exponent = -126;
2905 else
2906 *significandParts() |= 0x800000; // integer bit
2910 /// Treat api as containing the bits of a floating point number. Currently
2911 /// we infer the floating point type from the size of the APInt. The
2912 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2913 /// when the size is anything else).
2914 void
2915 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2917 if (api.getBitWidth() == 32)
2918 return initFromFloatAPInt(api);
2919 else if (api.getBitWidth()==64)
2920 return initFromDoubleAPInt(api);
2921 else if (api.getBitWidth()==80)
2922 return initFromF80LongDoubleAPInt(api);
2923 else if (api.getBitWidth()==128 && !isIEEE)
2924 return initFromPPCDoubleDoubleAPInt(api);
2925 else
2926 assert(0);
2929 APFloat::APFloat(const APInt& api, bool isIEEE)
2931 initFromAPInt(api, isIEEE);
2934 APFloat::APFloat(float f)
2936 APInt api = APInt(32, 0);
2937 initFromAPInt(api.floatToBits(f));
2940 APFloat::APFloat(double d)
2942 APInt api = APInt(64, 0);
2943 initFromAPInt(api.doubleToBits(d));