1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
27 COMPILE_TIME_ASSERT(integerPartWidth
% 4 == 0);
31 /* Represents floating point arithmetic semantics. */
33 /* The largest E such that 2^E is representable; this matches the
34 definition of IEEE 754. */
35 exponent_t maxExponent
;
37 /* The smallest E such that 2^E is a normalized number; this
38 matches the definition of IEEE 754. */
39 exponent_t minExponent
;
41 /* Number of bits in the significand. This includes the integer
43 unsigned int precision
;
45 /* True if arithmetic is supported. */
46 unsigned int arithmeticOK
;
49 const fltSemantics
APFloat::IEEEsingle
= { 127, -126, 24, true };
50 const fltSemantics
APFloat::IEEEdouble
= { 1023, -1022, 53, true };
51 const fltSemantics
APFloat::IEEEquad
= { 16383, -16382, 113, true };
52 const fltSemantics
APFloat::x87DoubleExtended
= { 16383, -16382, 64, true };
53 const fltSemantics
APFloat::Bogus
= { 0, 0, 0, true };
55 // The PowerPC format consists of two doubles. It does not map cleanly
56 // onto the usual format above. For now only storage of constants of
57 // this type is supported, no arithmetic.
58 const fltSemantics
APFloat::PPCDoubleDouble
= { 1023, -1022, 106, false };
60 /* A tight upper bound on number of parts required to hold the value
63 power * 815 / (351 * integerPartWidth) + 1
65 However, whilst the result may require only this many parts,
66 because we are multiplying two values to get it, the
67 multiplication may require an extra part with the excess part
68 being zero (consider the trivial case of 1 * 1, tcFullMultiply
69 requires two parts to hold the single-part result). So we add an
70 extra one to guarantee enough space whilst multiplying. */
71 const unsigned int maxExponent
= 16383;
72 const unsigned int maxPrecision
= 113;
73 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
74 const unsigned int maxPowerOfFiveParts
= 2 + ((maxPowerOfFiveExponent
* 815)
75 / (351 * integerPartWidth
));
78 /* A bunch of private, handy routines. */
80 static inline unsigned int
81 partCountForBits(unsigned int bits
)
83 return ((bits
) + integerPartWidth
- 1) / integerPartWidth
;
86 /* Returns 0U-9U. Return values >= 10U are not digits. */
87 static inline unsigned int
88 decDigitValue(unsigned int c
)
94 hexDigitValue(unsigned int c
)
114 assertArithmeticOK(const llvm::fltSemantics
&semantics
) {
115 assert(semantics
.arithmeticOK
116 && "Compile-time arithmetic does not support these semantics");
119 /* Return the value of a decimal exponent of the form
122 If the exponent overflows, returns a large exponent with the
125 readExponent(const char *p
)
128 unsigned int absExponent
;
129 const unsigned int overlargeExponent
= 24000; /* FIXME. */
131 isNegative
= (*p
== '-');
132 if (*p
== '-' || *p
== '+')
135 absExponent
= decDigitValue(*p
++);
136 assert (absExponent
< 10U);
141 value
= decDigitValue(*p
);
146 value
+= absExponent
* 10;
147 if (absExponent
>= overlargeExponent
) {
148 absExponent
= overlargeExponent
;
155 return -(int) absExponent
;
157 return (int) absExponent
;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(const char *p
, int exponentAdjustment
)
165 int unsignedExponent
;
166 bool negative
, overflow
;
169 /* Move past the exponent letter and sign to the digits. */
171 negative
= *p
== '-';
172 if(*p
== '-' || *p
== '+')
175 unsignedExponent
= 0;
180 value
= decDigitValue(*p
);
185 unsignedExponent
= unsignedExponent
* 10 + value
;
186 if(unsignedExponent
> 65535)
190 if(exponentAdjustment
> 65535 || exponentAdjustment
< -65536)
194 exponent
= unsignedExponent
;
196 exponent
= -exponent
;
197 exponent
+= exponentAdjustment
;
198 if(exponent
> 65535 || exponent
< -65536)
203 exponent
= negative
? -65536: 65535;
209 skipLeadingZeroesAndAnyDot(const char *p
, const char **dot
)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit
;
239 const char *lastSigDigit
;
241 int normalizedExponent
;
245 interpretDecimal(const char *p
, decimalInfo
*D
)
249 p
= skipLeadingZeroesAndAnyDot (p
, &dot
);
251 D
->firstSigDigit
= p
;
253 D
->normalizedExponent
= 0;
260 if (decDigitValue(*p
) >= 10U)
265 /* If number is all zerooes accept any exponent. */
266 if (p
!= D
->firstSigDigit
) {
267 if (*p
== 'e' || *p
== 'E')
268 D
->exponent
= readExponent(p
+ 1);
270 /* Implied decimal point? */
274 /* Drop insignificant trailing zeroes. */
281 /* Adjust the exponents for any decimal point. */
282 D
->exponent
+= static_cast<exponent_t
>((dot
- p
) - (dot
> p
));
283 D
->normalizedExponent
= (D
->exponent
+
284 static_cast<exponent_t
>((p
- D
->firstSigDigit
)
285 - (dot
> D
->firstSigDigit
&& dot
< p
)));
291 /* Return the trailing fraction of a hexadecimal number.
292 DIGITVALUE is the first hex digit of the fraction, P points to
295 trailingHexadecimalFraction(const char *p
, unsigned int digitValue
)
297 unsigned int hexDigit
;
299 /* If the first trailing digit isn't 0 or 8 we can work out the
300 fraction immediately. */
302 return lfMoreThanHalf
;
303 else if(digitValue
< 8 && digitValue
> 0)
304 return lfLessThanHalf
;
306 /* Otherwise we need to find the first non-zero digit. */
310 hexDigit
= hexDigitValue(*p
);
312 /* If we ran off the end it is exactly zero or one-half, otherwise
315 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
317 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
320 /* Return the fraction lost were a bignum truncated losing the least
321 significant BITS bits. */
323 lostFractionThroughTruncation(const integerPart
*parts
,
324 unsigned int partCount
,
329 lsb
= APInt::tcLSB(parts
, partCount
);
331 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
333 return lfExactlyZero
;
335 return lfExactlyHalf
;
336 if(bits
<= partCount
* integerPartWidth
337 && APInt::tcExtractBit(parts
, bits
- 1))
338 return lfMoreThanHalf
;
340 return lfLessThanHalf
;
343 /* Shift DST right BITS bits noting lost fraction. */
345 shiftRight(integerPart
*dst
, unsigned int parts
, unsigned int bits
)
347 lostFraction lost_fraction
;
349 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
351 APInt::tcShiftRight(dst
, parts
, bits
);
353 return lost_fraction
;
356 /* Combine the effect of two lost fractions. */
358 combineLostFractions(lostFraction moreSignificant
,
359 lostFraction lessSignificant
)
361 if(lessSignificant
!= lfExactlyZero
) {
362 if(moreSignificant
== lfExactlyZero
)
363 moreSignificant
= lfLessThanHalf
;
364 else if(moreSignificant
== lfExactlyHalf
)
365 moreSignificant
= lfMoreThanHalf
;
368 return moreSignificant
;
371 /* The error from the true value, in half-ulps, on multiplying two
372 floating point numbers, which differ from the value they
373 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
374 than the returned value.
376 See "How to Read Floating Point Numbers Accurately" by William D
379 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
381 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
383 if (HUerr1
+ HUerr2
== 0)
384 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
386 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
389 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
390 when the least significant BITS are truncated. BITS cannot be
393 ulpsFromBoundary(const integerPart
*parts
, unsigned int bits
, bool isNearest
)
395 unsigned int count
, partBits
;
396 integerPart part
, boundary
;
401 count
= bits
/ integerPartWidth
;
402 partBits
= bits
% integerPartWidth
+ 1;
404 part
= parts
[count
] & (~(integerPart
) 0 >> (integerPartWidth
- partBits
));
407 boundary
= (integerPart
) 1 << (partBits
- 1);
412 if (part
- boundary
<= boundary
- part
)
413 return part
- boundary
;
415 return boundary
- part
;
418 if (part
== boundary
) {
421 return ~(integerPart
) 0; /* A lot. */
424 } else if (part
== boundary
- 1) {
427 return ~(integerPart
) 0; /* A lot. */
432 return ~(integerPart
) 0; /* A lot. */
435 /* Place pow(5, power) in DST, and return the number of parts used.
436 DST must be at least one part larger than size of the answer. */
438 powerOf5(integerPart
*dst
, unsigned int power
)
440 static const integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125,
442 integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
443 pow5s
[0] = 78125 * 5;
445 unsigned int partsCount
[16] = { 1 };
446 integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
448 assert(power
<= maxExponent
);
453 *p1
= firstEightPowers
[power
& 7];
459 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
464 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
466 pc
= partsCount
[n
- 1];
467 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
469 if (pow5
[pc
- 1] == 0)
477 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
479 if (p2
[result
- 1] == 0)
482 /* Now result is in p1 with partsCount parts and p2 is scratch
484 tmp
= p1
, p1
= p2
, p2
= tmp
;
491 APInt::tcAssign(dst
, p1
, result
);
496 /* Zero at the end to avoid modular arithmetic when adding one; used
497 when rounding up during hexadecimal output. */
498 static const char hexDigitsLower
[] = "0123456789abcdef0";
499 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
500 static const char infinityL
[] = "infinity";
501 static const char infinityU
[] = "INFINITY";
502 static const char NaNL
[] = "nan";
503 static const char NaNU
[] = "NAN";
505 /* Write out an integerPart in hexadecimal, starting with the most
506 significant nibble. Write out exactly COUNT hexdigits, return
509 partAsHex (char *dst
, integerPart part
, unsigned int count
,
510 const char *hexDigitChars
)
512 unsigned int result
= count
;
514 assert (count
!= 0 && count
<= integerPartWidth
/ 4);
516 part
>>= (integerPartWidth
- 4 * count
);
518 dst
[count
] = hexDigitChars
[part
& 0xf];
525 /* Write out an unsigned decimal integer. */
527 writeUnsignedDecimal (char *dst
, unsigned int n
)
543 /* Write out a signed decimal integer. */
545 writeSignedDecimal (char *dst
, int value
)
549 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
551 dst
= writeUnsignedDecimal(dst
, value
);
558 APFloat::initialize(const fltSemantics
*ourSemantics
)
562 semantics
= ourSemantics
;
565 significand
.parts
= new integerPart
[count
];
569 APFloat::freeSignificand()
572 delete [] significand
.parts
;
576 APFloat::assign(const APFloat
&rhs
)
578 assert(semantics
== rhs
.semantics
);
581 category
= rhs
.category
;
582 exponent
= rhs
.exponent
;
584 exponent2
= rhs
.exponent2
;
585 if(category
== fcNormal
|| category
== fcNaN
)
586 copySignificand(rhs
);
590 APFloat::copySignificand(const APFloat
&rhs
)
592 assert(category
== fcNormal
|| category
== fcNaN
);
593 assert(rhs
.partCount() >= partCount());
595 APInt::tcAssign(significandParts(), rhs
.significandParts(),
599 /* Make this number a NaN, with an arbitrary but deterministic value
600 for the significand. If double or longer, this is a signalling NaN,
601 which may not be ideal. */
603 APFloat::makeNaN(void)
606 APInt::tcSet(significandParts(), ~0U, partCount());
610 APFloat::operator=(const APFloat
&rhs
)
613 if(semantics
!= rhs
.semantics
) {
615 initialize(rhs
.semantics
);
624 APFloat::bitwiseIsEqual(const APFloat
&rhs
) const {
627 if (semantics
!= rhs
.semantics
||
628 category
!= rhs
.category
||
631 if (semantics
==(const llvm::fltSemantics
*)&PPCDoubleDouble
&&
634 if (category
==fcZero
|| category
==fcInfinity
)
636 else if (category
==fcNormal
&& exponent
!=rhs
.exponent
)
638 else if (semantics
==(const llvm::fltSemantics
*)&PPCDoubleDouble
&&
639 exponent2
!=rhs
.exponent2
)
643 const integerPart
* p
=significandParts();
644 const integerPart
* q
=rhs
.significandParts();
645 for (; i
>0; i
--, p
++, q
++) {
653 APFloat::APFloat(const fltSemantics
&ourSemantics
, integerPart value
)
655 assertArithmeticOK(ourSemantics
);
656 initialize(&ourSemantics
);
659 exponent
= ourSemantics
.precision
- 1;
660 significandParts()[0] = value
;
661 normalize(rmNearestTiesToEven
, lfExactlyZero
);
664 APFloat::APFloat(const fltSemantics
&ourSemantics
,
665 fltCategory ourCategory
, bool negative
)
667 assertArithmeticOK(ourSemantics
);
668 initialize(&ourSemantics
);
669 category
= ourCategory
;
671 if(category
== fcNormal
)
673 else if (ourCategory
== fcNaN
)
677 APFloat::APFloat(const fltSemantics
&ourSemantics
, const char *text
)
679 assertArithmeticOK(ourSemantics
);
680 initialize(&ourSemantics
);
681 convertFromString(text
, rmNearestTiesToEven
);
684 APFloat::APFloat(const APFloat
&rhs
)
686 initialize(rhs
.semantics
);
695 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
696 void APFloat::Profile(FoldingSetNodeID
& ID
) const {
697 ID
.Add(bitcastToAPInt());
701 APFloat::partCount() const
703 return partCountForBits(semantics
->precision
+ 1);
707 APFloat::semanticsPrecision(const fltSemantics
&semantics
)
709 return semantics
.precision
;
713 APFloat::significandParts() const
715 return const_cast<APFloat
*>(this)->significandParts();
719 APFloat::significandParts()
721 assert(category
== fcNormal
|| category
== fcNaN
);
724 return significand
.parts
;
726 return &significand
.part
;
730 APFloat::zeroSignificand()
733 APInt::tcSet(significandParts(), 0, partCount());
736 /* Increment an fcNormal floating point number's significand. */
738 APFloat::incrementSignificand()
742 carry
= APInt::tcIncrement(significandParts(), partCount());
744 /* Our callers should never cause us to overflow. */
748 /* Add the significand of the RHS. Returns the carry flag. */
750 APFloat::addSignificand(const APFloat
&rhs
)
754 parts
= significandParts();
756 assert(semantics
== rhs
.semantics
);
757 assert(exponent
== rhs
.exponent
);
759 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
762 /* Subtract the significand of the RHS with a borrow flag. Returns
765 APFloat::subtractSignificand(const APFloat
&rhs
, integerPart borrow
)
769 parts
= significandParts();
771 assert(semantics
== rhs
.semantics
);
772 assert(exponent
== rhs
.exponent
);
774 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
778 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
779 on to the full-precision result of the multiplication. Returns the
782 APFloat::multiplySignificand(const APFloat
&rhs
, const APFloat
*addend
)
784 unsigned int omsb
; // One, not zero, based MSB.
785 unsigned int partsCount
, newPartsCount
, precision
;
786 integerPart
*lhsSignificand
;
787 integerPart scratch
[4];
788 integerPart
*fullSignificand
;
789 lostFraction lost_fraction
;
792 assert(semantics
== rhs
.semantics
);
794 precision
= semantics
->precision
;
795 newPartsCount
= partCountForBits(precision
* 2);
797 if(newPartsCount
> 4)
798 fullSignificand
= new integerPart
[newPartsCount
];
800 fullSignificand
= scratch
;
802 lhsSignificand
= significandParts();
803 partsCount
= partCount();
805 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
806 rhs
.significandParts(), partsCount
, partsCount
);
808 lost_fraction
= lfExactlyZero
;
809 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
810 exponent
+= rhs
.exponent
;
813 Significand savedSignificand
= significand
;
814 const fltSemantics
*savedSemantics
= semantics
;
815 fltSemantics extendedSemantics
;
817 unsigned int extendedPrecision
;
819 /* Normalize our MSB. */
820 extendedPrecision
= precision
+ precision
- 1;
821 if(omsb
!= extendedPrecision
)
823 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
824 extendedPrecision
- omsb
);
825 exponent
-= extendedPrecision
- omsb
;
828 /* Create new semantics. */
829 extendedSemantics
= *semantics
;
830 extendedSemantics
.precision
= extendedPrecision
;
832 if(newPartsCount
== 1)
833 significand
.part
= fullSignificand
[0];
835 significand
.parts
= fullSignificand
;
836 semantics
= &extendedSemantics
;
838 APFloat
extendedAddend(*addend
);
839 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
840 assert(status
== opOK
);
841 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
843 /* Restore our state. */
844 if(newPartsCount
== 1)
845 fullSignificand
[0] = significand
.part
;
846 significand
= savedSignificand
;
847 semantics
= savedSemantics
;
849 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
852 exponent
-= (precision
- 1);
854 if(omsb
> precision
) {
855 unsigned int bits
, significantParts
;
858 bits
= omsb
- precision
;
859 significantParts
= partCountForBits(omsb
);
860 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
861 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
865 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
867 if(newPartsCount
> 4)
868 delete [] fullSignificand
;
870 return lost_fraction
;
873 /* Multiply the significands of LHS and RHS to DST. */
875 APFloat::divideSignificand(const APFloat
&rhs
)
877 unsigned int bit
, i
, partsCount
;
878 const integerPart
*rhsSignificand
;
879 integerPart
*lhsSignificand
, *dividend
, *divisor
;
880 integerPart scratch
[4];
881 lostFraction lost_fraction
;
883 assert(semantics
== rhs
.semantics
);
885 lhsSignificand
= significandParts();
886 rhsSignificand
= rhs
.significandParts();
887 partsCount
= partCount();
890 dividend
= new integerPart
[partsCount
* 2];
894 divisor
= dividend
+ partsCount
;
896 /* Copy the dividend and divisor as they will be modified in-place. */
897 for(i
= 0; i
< partsCount
; i
++) {
898 dividend
[i
] = lhsSignificand
[i
];
899 divisor
[i
] = rhsSignificand
[i
];
900 lhsSignificand
[i
] = 0;
903 exponent
-= rhs
.exponent
;
905 unsigned int precision
= semantics
->precision
;
907 /* Normalize the divisor. */
908 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
911 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
914 /* Normalize the dividend. */
915 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
918 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
921 /* Ensure the dividend >= divisor initially for the loop below.
922 Incidentally, this means that the division loop below is
923 guaranteed to set the integer bit to one. */
924 if(APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
926 APInt::tcShiftLeft(dividend
, partsCount
, 1);
927 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
931 for(bit
= precision
; bit
; bit
-= 1) {
932 if(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
933 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
934 APInt::tcSetBit(lhsSignificand
, bit
- 1);
937 APInt::tcShiftLeft(dividend
, partsCount
, 1);
940 /* Figure out the lost fraction. */
941 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
944 lost_fraction
= lfMoreThanHalf
;
946 lost_fraction
= lfExactlyHalf
;
947 else if(APInt::tcIsZero(dividend
, partsCount
))
948 lost_fraction
= lfExactlyZero
;
950 lost_fraction
= lfLessThanHalf
;
955 return lost_fraction
;
959 APFloat::significandMSB() const
961 return APInt::tcMSB(significandParts(), partCount());
965 APFloat::significandLSB() const
967 return APInt::tcLSB(significandParts(), partCount());
970 /* Note that a zero result is NOT normalized to fcZero. */
972 APFloat::shiftSignificandRight(unsigned int bits
)
974 /* Our exponent should not overflow. */
975 assert((exponent_t
) (exponent
+ bits
) >= exponent
);
979 return shiftRight(significandParts(), partCount(), bits
);
982 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
984 APFloat::shiftSignificandLeft(unsigned int bits
)
986 assert(bits
< semantics
->precision
);
989 unsigned int partsCount
= partCount();
991 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
994 assert(!APInt::tcIsZero(significandParts(), partsCount
));
999 APFloat::compareAbsoluteValue(const APFloat
&rhs
) const
1003 assert(semantics
== rhs
.semantics
);
1004 assert(category
== fcNormal
);
1005 assert(rhs
.category
== fcNormal
);
1007 compare
= exponent
- rhs
.exponent
;
1009 /* If exponents are equal, do an unsigned bignum comparison of the
1012 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1016 return cmpGreaterThan
;
1017 else if(compare
< 0)
1023 /* Handle overflow. Sign is preserved. We either become infinity or
1024 the largest finite number. */
1026 APFloat::handleOverflow(roundingMode rounding_mode
)
1029 if(rounding_mode
== rmNearestTiesToEven
1030 || rounding_mode
== rmNearestTiesToAway
1031 || (rounding_mode
== rmTowardPositive
&& !sign
)
1032 || (rounding_mode
== rmTowardNegative
&& sign
))
1034 category
= fcInfinity
;
1035 return (opStatus
) (opOverflow
| opInexact
);
1038 /* Otherwise we become the largest finite number. */
1039 category
= fcNormal
;
1040 exponent
= semantics
->maxExponent
;
1041 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1042 semantics
->precision
);
1047 /* Returns TRUE if, when truncating the current number, with BIT the
1048 new LSB, with the given lost fraction and rounding mode, the result
1049 would need to be rounded away from zero (i.e., by increasing the
1050 signficand). This routine must work for fcZero of both signs, and
1051 fcNormal numbers. */
1053 APFloat::roundAwayFromZero(roundingMode rounding_mode
,
1054 lostFraction lost_fraction
,
1055 unsigned int bit
) const
1057 /* NaNs and infinities should not have lost fractions. */
1058 assert(category
== fcNormal
|| category
== fcZero
);
1060 /* Current callers never pass this so we don't handle it. */
1061 assert(lost_fraction
!= lfExactlyZero
);
1063 switch(rounding_mode
) {
1067 case rmNearestTiesToAway
:
1068 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1070 case rmNearestTiesToEven
:
1071 if(lost_fraction
== lfMoreThanHalf
)
1074 /* Our zeroes don't have a significand to test. */
1075 if(lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1076 return APInt::tcExtractBit(significandParts(), bit
);
1083 case rmTowardPositive
:
1084 return sign
== false;
1086 case rmTowardNegative
:
1087 return sign
== true;
1092 APFloat::normalize(roundingMode rounding_mode
,
1093 lostFraction lost_fraction
)
1095 unsigned int omsb
; /* One, not zero, based MSB. */
1098 if(category
!= fcNormal
)
1101 /* Before rounding normalize the exponent of fcNormal numbers. */
1102 omsb
= significandMSB() + 1;
1105 /* OMSB is numbered from 1. We want to place it in the integer
1106 bit numbered PRECISON if possible, with a compensating change in
1108 exponentChange
= omsb
- semantics
->precision
;
1110 /* If the resulting exponent is too high, overflow according to
1111 the rounding mode. */
1112 if(exponent
+ exponentChange
> semantics
->maxExponent
)
1113 return handleOverflow(rounding_mode
);
1115 /* Subnormal numbers have exponent minExponent, and their MSB
1116 is forced based on that. */
1117 if(exponent
+ exponentChange
< semantics
->minExponent
)
1118 exponentChange
= semantics
->minExponent
- exponent
;
1120 /* Shifting left is easy as we don't lose precision. */
1121 if(exponentChange
< 0) {
1122 assert(lost_fraction
== lfExactlyZero
);
1124 shiftSignificandLeft(-exponentChange
);
1129 if(exponentChange
> 0) {
1132 /* Shift right and capture any new lost fraction. */
1133 lf
= shiftSignificandRight(exponentChange
);
1135 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1137 /* Keep OMSB up-to-date. */
1138 if(omsb
> (unsigned) exponentChange
)
1139 omsb
-= exponentChange
;
1145 /* Now round the number according to rounding_mode given the lost
1148 /* As specified in IEEE 754, since we do not trap we do not report
1149 underflow for exact results. */
1150 if(lost_fraction
== lfExactlyZero
) {
1151 /* Canonicalize zeroes. */
1158 /* Increment the significand if we're rounding away from zero. */
1159 if(roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1161 exponent
= semantics
->minExponent
;
1163 incrementSignificand();
1164 omsb
= significandMSB() + 1;
1166 /* Did the significand increment overflow? */
1167 if(omsb
== (unsigned) semantics
->precision
+ 1) {
1168 /* Renormalize by incrementing the exponent and shifting our
1169 significand right one. However if we already have the
1170 maximum exponent we overflow to infinity. */
1171 if(exponent
== semantics
->maxExponent
) {
1172 category
= fcInfinity
;
1174 return (opStatus
) (opOverflow
| opInexact
);
1177 shiftSignificandRight(1);
1183 /* The normal case - we were and are not denormal, and any
1184 significand increment above didn't overflow. */
1185 if(omsb
== semantics
->precision
)
1188 /* We have a non-zero denormal. */
1189 assert(omsb
< semantics
->precision
);
1191 /* Canonicalize zeroes. */
1195 /* The fcZero case is a denormal that underflowed to zero. */
1196 return (opStatus
) (opUnderflow
| opInexact
);
1200 APFloat::addOrSubtractSpecials(const APFloat
&rhs
, bool subtract
)
1202 switch(convolve(category
, rhs
.category
)) {
1206 case convolve(fcNaN
, fcZero
):
1207 case convolve(fcNaN
, fcNormal
):
1208 case convolve(fcNaN
, fcInfinity
):
1209 case convolve(fcNaN
, fcNaN
):
1210 case convolve(fcNormal
, fcZero
):
1211 case convolve(fcInfinity
, fcNormal
):
1212 case convolve(fcInfinity
, fcZero
):
1215 case convolve(fcZero
, fcNaN
):
1216 case convolve(fcNormal
, fcNaN
):
1217 case convolve(fcInfinity
, fcNaN
):
1219 copySignificand(rhs
);
1222 case convolve(fcNormal
, fcInfinity
):
1223 case convolve(fcZero
, fcInfinity
):
1224 category
= fcInfinity
;
1225 sign
= rhs
.sign
^ subtract
;
1228 case convolve(fcZero
, fcNormal
):
1230 sign
= rhs
.sign
^ subtract
;
1233 case convolve(fcZero
, fcZero
):
1234 /* Sign depends on rounding mode; handled by caller. */
1237 case convolve(fcInfinity
, fcInfinity
):
1238 /* Differently signed infinities can only be validly
1240 if(((sign
^ rhs
.sign
)!=0) != subtract
) {
1247 case convolve(fcNormal
, fcNormal
):
1252 /* Add or subtract two normal numbers. */
1254 APFloat::addOrSubtractSignificand(const APFloat
&rhs
, bool subtract
)
1257 lostFraction lost_fraction
;
1260 /* Determine if the operation on the absolute values is effectively
1261 an addition or subtraction. */
1262 subtract
^= (sign
^ rhs
.sign
) ? true : false;
1264 /* Are we bigger exponent-wise than the RHS? */
1265 bits
= exponent
- rhs
.exponent
;
1267 /* Subtraction is more subtle than one might naively expect. */
1269 APFloat
temp_rhs(rhs
);
1273 reverse
= compareAbsoluteValue(temp_rhs
) == cmpLessThan
;
1274 lost_fraction
= lfExactlyZero
;
1275 } else if (bits
> 0) {
1276 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1277 shiftSignificandLeft(1);
1280 lost_fraction
= shiftSignificandRight(-bits
- 1);
1281 temp_rhs
.shiftSignificandLeft(1);
1286 carry
= temp_rhs
.subtractSignificand
1287 (*this, lost_fraction
!= lfExactlyZero
);
1288 copySignificand(temp_rhs
);
1291 carry
= subtractSignificand
1292 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1295 /* Invert the lost fraction - it was on the RHS and
1297 if(lost_fraction
== lfLessThanHalf
)
1298 lost_fraction
= lfMoreThanHalf
;
1299 else if(lost_fraction
== lfMoreThanHalf
)
1300 lost_fraction
= lfLessThanHalf
;
1302 /* The code above is intended to ensure that no borrow is
1307 APFloat
temp_rhs(rhs
);
1309 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1310 carry
= addSignificand(temp_rhs
);
1312 lost_fraction
= shiftSignificandRight(-bits
);
1313 carry
= addSignificand(rhs
);
1316 /* We have a guard bit; generating a carry cannot happen. */
1320 return lost_fraction
;
1324 APFloat::multiplySpecials(const APFloat
&rhs
)
1326 switch(convolve(category
, rhs
.category
)) {
1330 case convolve(fcNaN
, fcZero
):
1331 case convolve(fcNaN
, fcNormal
):
1332 case convolve(fcNaN
, fcInfinity
):
1333 case convolve(fcNaN
, fcNaN
):
1336 case convolve(fcZero
, fcNaN
):
1337 case convolve(fcNormal
, fcNaN
):
1338 case convolve(fcInfinity
, fcNaN
):
1340 copySignificand(rhs
);
1343 case convolve(fcNormal
, fcInfinity
):
1344 case convolve(fcInfinity
, fcNormal
):
1345 case convolve(fcInfinity
, fcInfinity
):
1346 category
= fcInfinity
;
1349 case convolve(fcZero
, fcNormal
):
1350 case convolve(fcNormal
, fcZero
):
1351 case convolve(fcZero
, fcZero
):
1355 case convolve(fcZero
, fcInfinity
):
1356 case convolve(fcInfinity
, fcZero
):
1360 case convolve(fcNormal
, fcNormal
):
1366 APFloat::divideSpecials(const APFloat
&rhs
)
1368 switch(convolve(category
, rhs
.category
)) {
1372 case convolve(fcNaN
, fcZero
):
1373 case convolve(fcNaN
, fcNormal
):
1374 case convolve(fcNaN
, fcInfinity
):
1375 case convolve(fcNaN
, fcNaN
):
1376 case convolve(fcInfinity
, fcZero
):
1377 case convolve(fcInfinity
, fcNormal
):
1378 case convolve(fcZero
, fcInfinity
):
1379 case convolve(fcZero
, fcNormal
):
1382 case convolve(fcZero
, fcNaN
):
1383 case convolve(fcNormal
, fcNaN
):
1384 case convolve(fcInfinity
, fcNaN
):
1386 copySignificand(rhs
);
1389 case convolve(fcNormal
, fcInfinity
):
1393 case convolve(fcNormal
, fcZero
):
1394 category
= fcInfinity
;
1397 case convolve(fcInfinity
, fcInfinity
):
1398 case convolve(fcZero
, fcZero
):
1402 case convolve(fcNormal
, fcNormal
):
1408 APFloat::modSpecials(const APFloat
&rhs
)
1410 switch(convolve(category
, rhs
.category
)) {
1414 case convolve(fcNaN
, fcZero
):
1415 case convolve(fcNaN
, fcNormal
):
1416 case convolve(fcNaN
, fcInfinity
):
1417 case convolve(fcNaN
, fcNaN
):
1418 case convolve(fcZero
, fcInfinity
):
1419 case convolve(fcZero
, fcNormal
):
1420 case convolve(fcNormal
, fcInfinity
):
1423 case convolve(fcZero
, fcNaN
):
1424 case convolve(fcNormal
, fcNaN
):
1425 case convolve(fcInfinity
, fcNaN
):
1427 copySignificand(rhs
);
1430 case convolve(fcNormal
, fcZero
):
1431 case convolve(fcInfinity
, fcZero
):
1432 case convolve(fcInfinity
, fcNormal
):
1433 case convolve(fcInfinity
, fcInfinity
):
1434 case convolve(fcZero
, fcZero
):
1438 case convolve(fcNormal
, fcNormal
):
1445 APFloat::changeSign()
1447 /* Look mummy, this one's easy. */
1452 APFloat::clearSign()
1454 /* So is this one. */
1459 APFloat::copySign(const APFloat
&rhs
)
1465 /* Normalized addition or subtraction. */
1467 APFloat::addOrSubtract(const APFloat
&rhs
, roundingMode rounding_mode
,
1472 assertArithmeticOK(*semantics
);
1474 fs
= addOrSubtractSpecials(rhs
, subtract
);
1476 /* This return code means it was not a simple case. */
1477 if(fs
== opDivByZero
) {
1478 lostFraction lost_fraction
;
1480 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
1481 fs
= normalize(rounding_mode
, lost_fraction
);
1483 /* Can only be zero if we lost no fraction. */
1484 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
1487 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1488 positive zero unless rounding to minus infinity, except that
1489 adding two like-signed zeroes gives that zero. */
1490 if(category
== fcZero
) {
1491 if(rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
1492 sign
= (rounding_mode
== rmTowardNegative
);
1498 /* Normalized addition. */
1500 APFloat::add(const APFloat
&rhs
, roundingMode rounding_mode
)
1502 return addOrSubtract(rhs
, rounding_mode
, false);
1505 /* Normalized subtraction. */
1507 APFloat::subtract(const APFloat
&rhs
, roundingMode rounding_mode
)
1509 return addOrSubtract(rhs
, rounding_mode
, true);
1512 /* Normalized multiply. */
1514 APFloat::multiply(const APFloat
&rhs
, roundingMode rounding_mode
)
1518 assertArithmeticOK(*semantics
);
1520 fs
= multiplySpecials(rhs
);
1522 if(category
== fcNormal
) {
1523 lostFraction lost_fraction
= multiplySignificand(rhs
, 0);
1524 fs
= normalize(rounding_mode
, lost_fraction
);
1525 if(lost_fraction
!= lfExactlyZero
)
1526 fs
= (opStatus
) (fs
| opInexact
);
1532 /* Normalized divide. */
1534 APFloat::divide(const APFloat
&rhs
, roundingMode rounding_mode
)
1538 assertArithmeticOK(*semantics
);
1540 fs
= divideSpecials(rhs
);
1542 if(category
== fcNormal
) {
1543 lostFraction lost_fraction
= divideSignificand(rhs
);
1544 fs
= normalize(rounding_mode
, lost_fraction
);
1545 if(lost_fraction
!= lfExactlyZero
)
1546 fs
= (opStatus
) (fs
| opInexact
);
1552 /* Normalized remainder. This is not currently correct in all cases. */
1554 APFloat::remainder(const APFloat
&rhs
)
1558 unsigned int origSign
= sign
;
1560 assertArithmeticOK(*semantics
);
1561 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1562 if (fs
== opDivByZero
)
1565 int parts
= partCount();
1566 integerPart
*x
= new integerPart
[parts
];
1568 fs
= V
.convertToInteger(x
, parts
* integerPartWidth
, true,
1569 rmNearestTiesToEven
, &ignored
);
1570 if (fs
==opInvalidOp
)
1573 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1574 rmNearestTiesToEven
);
1575 assert(fs
==opOK
); // should always work
1577 fs
= V
.multiply(rhs
, rmNearestTiesToEven
);
1578 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1580 fs
= subtract(V
, rmNearestTiesToEven
);
1581 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1584 sign
= origSign
; // IEEE754 requires this
1589 /* Normalized llvm frem (C fmod).
1590 This is not currently correct in all cases. */
1592 APFloat::mod(const APFloat
&rhs
, roundingMode rounding_mode
)
1595 assertArithmeticOK(*semantics
);
1596 fs
= modSpecials(rhs
);
1598 if (category
== fcNormal
&& rhs
.category
== fcNormal
) {
1600 unsigned int origSign
= sign
;
1602 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1603 if (fs
== opDivByZero
)
1606 int parts
= partCount();
1607 integerPart
*x
= new integerPart
[parts
];
1609 fs
= V
.convertToInteger(x
, parts
* integerPartWidth
, true,
1610 rmTowardZero
, &ignored
);
1611 if (fs
==opInvalidOp
)
1614 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1615 rmNearestTiesToEven
);
1616 assert(fs
==opOK
); // should always work
1618 fs
= V
.multiply(rhs
, rounding_mode
);
1619 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1621 fs
= subtract(V
, rounding_mode
);
1622 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1625 sign
= origSign
; // IEEE754 requires this
1631 /* Normalized fused-multiply-add. */
1633 APFloat::fusedMultiplyAdd(const APFloat
&multiplicand
,
1634 const APFloat
&addend
,
1635 roundingMode rounding_mode
)
1639 assertArithmeticOK(*semantics
);
1641 /* Post-multiplication sign, before addition. */
1642 sign
^= multiplicand
.sign
;
1644 /* If and only if all arguments are normal do we need to do an
1645 extended-precision calculation. */
1646 if(category
== fcNormal
1647 && multiplicand
.category
== fcNormal
1648 && addend
.category
== fcNormal
) {
1649 lostFraction lost_fraction
;
1651 lost_fraction
= multiplySignificand(multiplicand
, &addend
);
1652 fs
= normalize(rounding_mode
, lost_fraction
);
1653 if(lost_fraction
!= lfExactlyZero
)
1654 fs
= (opStatus
) (fs
| opInexact
);
1656 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1657 positive zero unless rounding to minus infinity, except that
1658 adding two like-signed zeroes gives that zero. */
1659 if(category
== fcZero
&& sign
!= addend
.sign
)
1660 sign
= (rounding_mode
== rmTowardNegative
);
1662 fs
= multiplySpecials(multiplicand
);
1664 /* FS can only be opOK or opInvalidOp. There is no more work
1665 to do in the latter case. The IEEE-754R standard says it is
1666 implementation-defined in this case whether, if ADDEND is a
1667 quiet NaN, we raise invalid op; this implementation does so.
1669 If we need to do the addition we can do so with normal
1672 fs
= addOrSubtract(addend
, rounding_mode
, false);
1678 /* Comparison requires normalized numbers. */
1680 APFloat::compare(const APFloat
&rhs
) const
1684 assertArithmeticOK(*semantics
);
1685 assert(semantics
== rhs
.semantics
);
1687 switch(convolve(category
, rhs
.category
)) {
1691 case convolve(fcNaN
, fcZero
):
1692 case convolve(fcNaN
, fcNormal
):
1693 case convolve(fcNaN
, fcInfinity
):
1694 case convolve(fcNaN
, fcNaN
):
1695 case convolve(fcZero
, fcNaN
):
1696 case convolve(fcNormal
, fcNaN
):
1697 case convolve(fcInfinity
, fcNaN
):
1698 return cmpUnordered
;
1700 case convolve(fcInfinity
, fcNormal
):
1701 case convolve(fcInfinity
, fcZero
):
1702 case convolve(fcNormal
, fcZero
):
1706 return cmpGreaterThan
;
1708 case convolve(fcNormal
, fcInfinity
):
1709 case convolve(fcZero
, fcInfinity
):
1710 case convolve(fcZero
, fcNormal
):
1712 return cmpGreaterThan
;
1716 case convolve(fcInfinity
, fcInfinity
):
1717 if(sign
== rhs
.sign
)
1722 return cmpGreaterThan
;
1724 case convolve(fcZero
, fcZero
):
1727 case convolve(fcNormal
, fcNormal
):
1731 /* Two normal numbers. Do they have the same sign? */
1732 if(sign
!= rhs
.sign
) {
1734 result
= cmpLessThan
;
1736 result
= cmpGreaterThan
;
1738 /* Compare absolute values; invert result if negative. */
1739 result
= compareAbsoluteValue(rhs
);
1742 if(result
== cmpLessThan
)
1743 result
= cmpGreaterThan
;
1744 else if(result
== cmpGreaterThan
)
1745 result
= cmpLessThan
;
1752 /// APFloat::convert - convert a value of one floating point type to another.
1753 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1754 /// records whether the transformation lost information, i.e. whether
1755 /// converting the result back to the original type will produce the
1756 /// original value (this is almost the same as return value==fsOK, but there
1757 /// are edge cases where this is not so).
1760 APFloat::convert(const fltSemantics
&toSemantics
,
1761 roundingMode rounding_mode
, bool *losesInfo
)
1763 lostFraction lostFraction
;
1764 unsigned int newPartCount
, oldPartCount
;
1767 assertArithmeticOK(*semantics
);
1768 assertArithmeticOK(toSemantics
);
1769 lostFraction
= lfExactlyZero
;
1770 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
1771 oldPartCount
= partCount();
1773 /* Handle storage complications. If our new form is wider,
1774 re-allocate our bit pattern into wider storage. If it is
1775 narrower, we ignore the excess parts, but if narrowing to a
1776 single part we need to free the old storage.
1777 Be careful not to reference significandParts for zeroes
1778 and infinities, since it aborts. */
1779 if (newPartCount
> oldPartCount
) {
1780 integerPart
*newParts
;
1781 newParts
= new integerPart
[newPartCount
];
1782 APInt::tcSet(newParts
, 0, newPartCount
);
1783 if (category
==fcNormal
|| category
==fcNaN
)
1784 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
1786 significand
.parts
= newParts
;
1787 } else if (newPartCount
< oldPartCount
) {
1788 /* Capture any lost fraction through truncation of parts so we get
1789 correct rounding whilst normalizing. */
1790 if (category
==fcNormal
)
1791 lostFraction
= lostFractionThroughTruncation
1792 (significandParts(), oldPartCount
, toSemantics
.precision
);
1793 if (newPartCount
== 1) {
1794 integerPart newPart
= 0;
1795 if (category
==fcNormal
|| category
==fcNaN
)
1796 newPart
= significandParts()[0];
1798 significand
.part
= newPart
;
1802 if(category
== fcNormal
) {
1803 /* Re-interpret our bit-pattern. */
1804 exponent
+= toSemantics
.precision
- semantics
->precision
;
1805 semantics
= &toSemantics
;
1806 fs
= normalize(rounding_mode
, lostFraction
);
1807 *losesInfo
= (fs
!= opOK
);
1808 } else if (category
== fcNaN
) {
1809 int shift
= toSemantics
.precision
- semantics
->precision
;
1810 // Do this now so significandParts gets the right answer
1811 const fltSemantics
*oldSemantics
= semantics
;
1812 semantics
= &toSemantics
;
1814 // No normalization here, just truncate
1816 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
1817 else if (shift
< 0) {
1818 unsigned ushift
= -shift
;
1819 // Figure out if we are losing information. This happens
1820 // if are shifting out something other than 0s, or if the x87 long
1821 // double input did not have its integer bit set (pseudo-NaN), or if the
1822 // x87 long double input did not have its QNan bit set (because the x87
1823 // hardware sets this bit when converting a lower-precision NaN to
1824 // x87 long double).
1825 if (APInt::tcLSB(significandParts(), newPartCount
) < ushift
)
1827 if (oldSemantics
== &APFloat::x87DoubleExtended
&&
1828 (!(*significandParts() & 0x8000000000000000ULL
) ||
1829 !(*significandParts() & 0x4000000000000000ULL
)))
1831 APInt::tcShiftRight(significandParts(), newPartCount
, ushift
);
1833 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1834 // does not give you back the same bits. This is dubious, and we
1835 // don't currently do it. You're really supposed to get
1836 // an invalid operation signal at runtime, but nobody does that.
1839 semantics
= &toSemantics
;
1847 /* Convert a floating point number to an integer according to the
1848 rounding mode. If the rounded integer value is out of range this
1849 returns an invalid operation exception and the contents of the
1850 destination parts are unspecified. If the rounded value is in
1851 range but the floating point number is not the exact integer, the C
1852 standard doesn't require an inexact exception to be raised. IEEE
1853 854 does require it so we do that.
1855 Note that for conversions to integer type the C standard requires
1856 round-to-zero to always be used. */
1858 APFloat::convertToSignExtendedInteger(integerPart
*parts
, unsigned int width
,
1860 roundingMode rounding_mode
,
1861 bool *isExact
) const
1863 lostFraction lost_fraction
;
1864 const integerPart
*src
;
1865 unsigned int dstPartsCount
, truncatedBits
;
1867 assertArithmeticOK(*semantics
);
1871 /* Handle the three special cases first. */
1872 if(category
== fcInfinity
|| category
== fcNaN
)
1875 dstPartsCount
= partCountForBits(width
);
1877 if(category
== fcZero
) {
1878 APInt::tcSet(parts
, 0, dstPartsCount
);
1879 // Negative zero can't be represented as an int.
1884 src
= significandParts();
1886 /* Step 1: place our absolute value, with any fraction truncated, in
1889 /* Our absolute value is less than one; truncate everything. */
1890 APInt::tcSet(parts
, 0, dstPartsCount
);
1891 /* For exponent -1 the integer bit represents .5, look at that.
1892 For smaller exponents leftmost truncated bit is 0. */
1893 truncatedBits
= semantics
->precision
-1U - exponent
;
1895 /* We want the most significant (exponent + 1) bits; the rest are
1897 unsigned int bits
= exponent
+ 1U;
1899 /* Hopelessly large in magnitude? */
1903 if (bits
< semantics
->precision
) {
1904 /* We truncate (semantics->precision - bits) bits. */
1905 truncatedBits
= semantics
->precision
- bits
;
1906 APInt::tcExtract(parts
, dstPartsCount
, src
, bits
, truncatedBits
);
1908 /* We want at least as many bits as are available. */
1909 APInt::tcExtract(parts
, dstPartsCount
, src
, semantics
->precision
, 0);
1910 APInt::tcShiftLeft(parts
, dstPartsCount
, bits
- semantics
->precision
);
1915 /* Step 2: work out any lost fraction, and increment the absolute
1916 value if we would round away from zero. */
1917 if (truncatedBits
) {
1918 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
1920 if (lost_fraction
!= lfExactlyZero
1921 && roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
1922 if (APInt::tcIncrement(parts
, dstPartsCount
))
1923 return opInvalidOp
; /* Overflow. */
1926 lost_fraction
= lfExactlyZero
;
1929 /* Step 3: check if we fit in the destination. */
1930 unsigned int omsb
= APInt::tcMSB(parts
, dstPartsCount
) + 1;
1934 /* Negative numbers cannot be represented as unsigned. */
1938 /* It takes omsb bits to represent the unsigned integer value.
1939 We lose a bit for the sign, but care is needed as the
1940 maximally negative integer is a special case. */
1941 if (omsb
== width
&& APInt::tcLSB(parts
, dstPartsCount
) + 1 != omsb
)
1944 /* This case can happen because of rounding. */
1949 APInt::tcNegate (parts
, dstPartsCount
);
1951 if (omsb
>= width
+ !isSigned
)
1955 if (lost_fraction
== lfExactlyZero
) {
1962 /* Same as convertToSignExtendedInteger, except we provide
1963 deterministic values in case of an invalid operation exception,
1964 namely zero for NaNs and the minimal or maximal value respectively
1965 for underflow or overflow.
1966 The *isExact output tells whether the result is exact, in the sense
1967 that converting it back to the original floating point type produces
1968 the original value. This is almost equivalent to result==opOK,
1969 except for negative zeroes.
1972 APFloat::convertToInteger(integerPart
*parts
, unsigned int width
,
1974 roundingMode rounding_mode
, bool *isExact
) const
1978 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
1981 if (fs
== opInvalidOp
) {
1982 unsigned int bits
, dstPartsCount
;
1984 dstPartsCount
= partCountForBits(width
);
1986 if (category
== fcNaN
)
1991 bits
= width
- isSigned
;
1993 APInt::tcSetLeastSignificantBits(parts
, dstPartsCount
, bits
);
1994 if (sign
&& isSigned
)
1995 APInt::tcShiftLeft(parts
, dstPartsCount
, width
- 1);
2001 /* Convert an unsigned integer SRC to a floating point number,
2002 rounding according to ROUNDING_MODE. The sign of the floating
2003 point number is not modified. */
2005 APFloat::convertFromUnsignedParts(const integerPart
*src
,
2006 unsigned int srcCount
,
2007 roundingMode rounding_mode
)
2009 unsigned int omsb
, precision
, dstCount
;
2011 lostFraction lost_fraction
;
2013 assertArithmeticOK(*semantics
);
2014 category
= fcNormal
;
2015 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2016 dst
= significandParts();
2017 dstCount
= partCount();
2018 precision
= semantics
->precision
;
2020 /* We want the most significant PRECISON bits of SRC. There may not
2021 be that many; extract what we can. */
2022 if (precision
<= omsb
) {
2023 exponent
= omsb
- 1;
2024 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2026 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2028 exponent
= precision
- 1;
2029 lost_fraction
= lfExactlyZero
;
2030 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2033 return normalize(rounding_mode
, lost_fraction
);
2037 APFloat::convertFromAPInt(const APInt
&Val
,
2039 roundingMode rounding_mode
)
2041 unsigned int partCount
= Val
.getNumWords();
2045 if (isSigned
&& api
.isNegative()) {
2050 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2053 /* Convert a two's complement integer SRC to a floating point number,
2054 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2055 integer is signed, in which case it must be sign-extended. */
2057 APFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2058 unsigned int srcCount
,
2060 roundingMode rounding_mode
)
2064 assertArithmeticOK(*semantics
);
2066 && APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2069 /* If we're signed and negative negate a copy. */
2071 copy
= new integerPart
[srcCount
];
2072 APInt::tcAssign(copy
, src
, srcCount
);
2073 APInt::tcNegate(copy
, srcCount
);
2074 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2078 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2084 /* FIXME: should this just take a const APInt reference? */
2086 APFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2087 unsigned int width
, bool isSigned
,
2088 roundingMode rounding_mode
)
2090 unsigned int partCount
= partCountForBits(width
);
2091 APInt api
= APInt(width
, partCount
, parts
);
2094 if(isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2099 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2103 APFloat::convertFromHexadecimalString(const char *p
,
2104 roundingMode rounding_mode
)
2106 lostFraction lost_fraction
;
2107 integerPart
*significand
;
2108 unsigned int bitPos
, partsCount
;
2109 const char *dot
, *firstSignificantDigit
;
2113 category
= fcNormal
;
2115 significand
= significandParts();
2116 partsCount
= partCount();
2117 bitPos
= partsCount
* integerPartWidth
;
2119 /* Skip leading zeroes and any (hexa)decimal point. */
2120 p
= skipLeadingZeroesAndAnyDot(p
, &dot
);
2121 firstSignificantDigit
= p
;
2124 integerPart hex_value
;
2131 hex_value
= hexDigitValue(*p
);
2132 if(hex_value
== -1U) {
2133 lost_fraction
= lfExactlyZero
;
2139 /* Store the number whilst 4-bit nibbles remain. */
2142 hex_value
<<= bitPos
% integerPartWidth
;
2143 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2145 lost_fraction
= trailingHexadecimalFraction(p
, hex_value
);
2146 while(hexDigitValue(*p
) != -1U)
2152 /* Hex floats require an exponent but not a hexadecimal point. */
2153 assert(*p
== 'p' || *p
== 'P');
2155 /* Ignore the exponent if we are zero. */
2156 if(p
!= firstSignificantDigit
) {
2159 /* Implicit hexadecimal point? */
2163 /* Calculate the exponent adjustment implicit in the number of
2164 significant digits. */
2165 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2166 if(expAdjustment
< 0)
2168 expAdjustment
= expAdjustment
* 4 - 1;
2170 /* Adjust for writing the significand starting at the most
2171 significant nibble. */
2172 expAdjustment
+= semantics
->precision
;
2173 expAdjustment
-= partsCount
* integerPartWidth
;
2175 /* Adjust for the given exponent. */
2176 exponent
= totalExponent(p
, expAdjustment
);
2179 return normalize(rounding_mode
, lost_fraction
);
2183 APFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2184 unsigned sigPartCount
, int exp
,
2185 roundingMode rounding_mode
)
2187 unsigned int parts
, pow5PartCount
;
2188 fltSemantics calcSemantics
= { 32767, -32767, 0, true };
2189 integerPart pow5Parts
[maxPowerOfFiveParts
];
2192 isNearest
= (rounding_mode
== rmNearestTiesToEven
2193 || rounding_mode
== rmNearestTiesToAway
);
2195 parts
= partCountForBits(semantics
->precision
+ 11);
2197 /* Calculate pow(5, abs(exp)). */
2198 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2200 for (;; parts
*= 2) {
2201 opStatus sigStatus
, powStatus
;
2202 unsigned int excessPrecision
, truncatedBits
;
2204 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2205 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2206 truncatedBits
= excessPrecision
;
2208 APFloat
decSig(calcSemantics
, fcZero
, sign
);
2209 APFloat
pow5(calcSemantics
, fcZero
, false);
2211 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2212 rmNearestTiesToEven
);
2213 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2214 rmNearestTiesToEven
);
2215 /* Add exp, as 10^n = 5^n * 2^n. */
2216 decSig
.exponent
+= exp
;
2218 lostFraction calcLostFraction
;
2219 integerPart HUerr
, HUdistance
;
2220 unsigned int powHUerr
;
2223 /* multiplySignificand leaves the precision-th bit set to 1. */
2224 calcLostFraction
= decSig
.multiplySignificand(pow5
, NULL
);
2225 powHUerr
= powStatus
!= opOK
;
2227 calcLostFraction
= decSig
.divideSignificand(pow5
);
2228 /* Denormal numbers have less precision. */
2229 if (decSig
.exponent
< semantics
->minExponent
) {
2230 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2231 truncatedBits
= excessPrecision
;
2232 if (excessPrecision
> calcSemantics
.precision
)
2233 excessPrecision
= calcSemantics
.precision
;
2235 /* Extra half-ulp lost in reciprocal of exponent. */
2236 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2239 /* Both multiplySignificand and divideSignificand return the
2240 result with the integer bit set. */
2241 assert (APInt::tcExtractBit
2242 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2244 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2246 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2247 excessPrecision
, isNearest
);
2249 /* Are we guaranteed to round correctly if we truncate? */
2250 if (HUdistance
>= HUerr
) {
2251 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2252 calcSemantics
.precision
- excessPrecision
,
2254 /* Take the exponent of decSig. If we tcExtract-ed less bits
2255 above we must adjust our exponent to compensate for the
2256 implicit right shift. */
2257 exponent
= (decSig
.exponent
+ semantics
->precision
2258 - (calcSemantics
.precision
- excessPrecision
));
2259 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2262 return normalize(rounding_mode
, calcLostFraction
);
2268 APFloat::convertFromDecimalString(const char *p
, roundingMode rounding_mode
)
2273 /* Scan the text. */
2274 interpretDecimal(p
, &D
);
2276 /* Handle the quick cases. First the case of no significant digits,
2277 i.e. zero, and then exponents that are obviously too large or too
2278 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2279 definitely overflows if
2281 (exp - 1) * L >= maxExponent
2283 and definitely underflows to zero where
2285 (exp + 1) * L <= minExponent - precision
2287 With integer arithmetic the tightest bounds for L are
2289 93/28 < L < 196/59 [ numerator <= 256 ]
2290 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2293 if (decDigitValue(*D
.firstSigDigit
) >= 10U) {
2296 } else if ((D
.normalizedExponent
+ 1) * 28738
2297 <= 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
2298 /* Underflow to zero and round. */
2300 fs
= normalize(rounding_mode
, lfLessThanHalf
);
2301 } else if ((D
.normalizedExponent
- 1) * 42039
2302 >= 12655 * semantics
->maxExponent
) {
2303 /* Overflow and round. */
2304 fs
= handleOverflow(rounding_mode
);
2306 integerPart
*decSignificand
;
2307 unsigned int partCount
;
2309 /* A tight upper bound on number of bits required to hold an
2310 N-digit decimal integer is N * 196 / 59. Allocate enough space
2311 to hold the full significand, and an extra part required by
2313 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
2314 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
2315 decSignificand
= new integerPart
[partCount
+ 1];
2318 /* Convert to binary efficiently - we do almost all multiplication
2319 in an integerPart. When this would overflow do we do a single
2320 bignum multiplication, and then revert again to multiplication
2321 in an integerPart. */
2323 integerPart decValue
, val
, multiplier
;
2332 decValue
= decDigitValue(*p
++);
2334 val
= val
* 10 + decValue
;
2335 /* The maximum number that can be multiplied by ten with any
2336 digit added without overflowing an integerPart. */
2337 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
2339 /* Multiply out the current part. */
2340 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
2341 partCount
, partCount
+ 1, false);
2343 /* If we used another part (likely but not guaranteed), increase
2345 if (decSignificand
[partCount
])
2347 } while (p
<= D
.lastSigDigit
);
2349 category
= fcNormal
;
2350 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
2351 D
.exponent
, rounding_mode
);
2353 delete [] decSignificand
;
2360 APFloat::convertFromString(const char *p
, roundingMode rounding_mode
)
2362 assertArithmeticOK(*semantics
);
2364 /* Handle a leading minus sign. */
2370 if(p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X'))
2371 return convertFromHexadecimalString(p
+ 2, rounding_mode
);
2373 return convertFromDecimalString(p
, rounding_mode
);
2376 /* Write out a hexadecimal representation of the floating point value
2377 to DST, which must be of sufficient size, in the C99 form
2378 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2379 excluding the terminating NUL.
2381 If UPPERCASE, the output is in upper case, otherwise in lower case.
2383 HEXDIGITS digits appear altogether, rounding the value if
2384 necessary. If HEXDIGITS is 0, the minimal precision to display the
2385 number precisely is used instead. If nothing would appear after
2386 the decimal point it is suppressed.
2388 The decimal exponent is always printed and has at least one digit.
2389 Zero values display an exponent of zero. Infinities and NaNs
2390 appear as "infinity" or "nan" respectively.
2392 The above rules are as specified by C99. There is ambiguity about
2393 what the leading hexadecimal digit should be. This implementation
2394 uses whatever is necessary so that the exponent is displayed as
2395 stored. This implies the exponent will fall within the IEEE format
2396 range, and the leading hexadecimal digit will be 0 (for denormals),
2397 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2398 any other digits zero).
2401 APFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
2402 bool upperCase
, roundingMode rounding_mode
) const
2406 assertArithmeticOK(*semantics
);
2414 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
2415 dst
+= sizeof infinityL
- 1;
2419 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
2420 dst
+= sizeof NaNU
- 1;
2425 *dst
++ = upperCase
? 'X': 'x';
2427 if (hexDigits
> 1) {
2429 memset (dst
, '0', hexDigits
- 1);
2430 dst
+= hexDigits
- 1;
2432 *dst
++ = upperCase
? 'P': 'p';
2437 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
2443 return static_cast<unsigned int>(dst
- p
);
2446 /* Does the hard work of outputting the correctly rounded hexadecimal
2447 form of a normal floating point number with the specified number of
2448 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2449 digits necessary to print the value precisely is output. */
2451 APFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
2453 roundingMode rounding_mode
) const
2455 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
2456 const char *hexDigitChars
;
2457 const integerPart
*significand
;
2462 *dst
++ = upperCase
? 'X': 'x';
2465 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
2467 significand
= significandParts();
2468 partsCount
= partCount();
2470 /* +3 because the first digit only uses the single integer bit, so
2471 we have 3 virtual zero most-significant-bits. */
2472 valueBits
= semantics
->precision
+ 3;
2473 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
2475 /* The natural number of digits required ignoring trailing
2476 insignificant zeroes. */
2477 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
2479 /* hexDigits of zero means use the required number for the
2480 precision. Otherwise, see if we are truncating. If we are,
2481 find out if we need to round away from zero. */
2483 if (hexDigits
< outputDigits
) {
2484 /* We are dropping non-zero bits, so need to check how to round.
2485 "bits" is the number of dropped bits. */
2487 lostFraction fraction
;
2489 bits
= valueBits
- hexDigits
* 4;
2490 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
2491 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
2493 outputDigits
= hexDigits
;
2496 /* Write the digits consecutively, and start writing in the location
2497 of the hexadecimal point. We move the most significant digit
2498 left and add the hexadecimal point later. */
2501 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
2503 while (outputDigits
&& count
) {
2506 /* Put the most significant integerPartWidth bits in "part". */
2507 if (--count
== partsCount
)
2508 part
= 0; /* An imaginary higher zero part. */
2510 part
= significand
[count
] << shift
;
2513 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
2515 /* Convert as much of "part" to hexdigits as we can. */
2516 unsigned int curDigits
= integerPartWidth
/ 4;
2518 if (curDigits
> outputDigits
)
2519 curDigits
= outputDigits
;
2520 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
2521 outputDigits
-= curDigits
;
2527 /* Note that hexDigitChars has a trailing '0'. */
2530 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
2531 } while (*q
== '0');
2534 /* Add trailing zeroes. */
2535 memset (dst
, '0', outputDigits
);
2536 dst
+= outputDigits
;
2539 /* Move the most significant digit to before the point, and if there
2540 is something after the decimal point add it. This must come
2541 after rounding above. */
2548 /* Finally output the exponent. */
2549 *dst
++ = upperCase
? 'P': 'p';
2551 return writeSignedDecimal (dst
, exponent
);
2554 // For good performance it is desirable for different APFloats
2555 // to produce different integers.
2557 APFloat::getHashValue() const
2559 if (category
==fcZero
) return sign
<<8 | semantics
->precision
;
2560 else if (category
==fcInfinity
) return sign
<<9 | semantics
->precision
;
2561 else if (category
==fcNaN
) return 1<<10 | semantics
->precision
;
2563 uint32_t hash
= sign
<<11 | semantics
->precision
| exponent
<<12;
2564 const integerPart
* p
= significandParts();
2565 for (int i
=partCount(); i
>0; i
--, p
++)
2566 hash
^= ((uint32_t)*p
) ^ (uint32_t)((*p
)>>32);
2571 // Conversion from APFloat to/from host float/double. It may eventually be
2572 // possible to eliminate these and have everybody deal with APFloats, but that
2573 // will take a while. This approach will not easily extend to long double.
2574 // Current implementation requires integerPartWidth==64, which is correct at
2575 // the moment but could be made more general.
2577 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2578 // the actual IEEE respresentations. We compensate for that here.
2581 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2583 assert(semantics
== (const llvm::fltSemantics
*)&x87DoubleExtended
);
2584 assert (partCount()==2);
2586 uint64_t myexponent
, mysignificand
;
2588 if (category
==fcNormal
) {
2589 myexponent
= exponent
+16383; //bias
2590 mysignificand
= significandParts()[0];
2591 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
2592 myexponent
= 0; // denormal
2593 } else if (category
==fcZero
) {
2596 } else if (category
==fcInfinity
) {
2597 myexponent
= 0x7fff;
2598 mysignificand
= 0x8000000000000000ULL
;
2600 assert(category
== fcNaN
&& "Unknown category");
2601 myexponent
= 0x7fff;
2602 mysignificand
= significandParts()[0];
2606 words
[0] = mysignificand
;
2607 words
[1] = ((uint64_t)(sign
& 1) << 15) |
2608 (myexponent
& 0x7fffLL
);
2609 return APInt(80, 2, words
);
2613 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2615 assert(semantics
== (const llvm::fltSemantics
*)&PPCDoubleDouble
);
2616 assert (partCount()==2);
2618 uint64_t myexponent
, mysignificand
, myexponent2
, mysignificand2
;
2620 if (category
==fcNormal
) {
2621 myexponent
= exponent
+ 1023; //bias
2622 myexponent2
= exponent2
+ 1023;
2623 mysignificand
= significandParts()[0];
2624 mysignificand2
= significandParts()[1];
2625 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2626 myexponent
= 0; // denormal
2627 if (myexponent2
==1 && !(mysignificand2
& 0x10000000000000LL
))
2628 myexponent2
= 0; // denormal
2629 } else if (category
==fcZero
) {
2634 } else if (category
==fcInfinity
) {
2640 assert(category
== fcNaN
&& "Unknown category");
2642 mysignificand
= significandParts()[0];
2643 myexponent2
= exponent2
;
2644 mysignificand2
= significandParts()[1];
2648 words
[0] = ((uint64_t)(sign
& 1) << 63) |
2649 ((myexponent
& 0x7ff) << 52) |
2650 (mysignificand
& 0xfffffffffffffLL
);
2651 words
[1] = ((uint64_t)(sign2
& 1) << 63) |
2652 ((myexponent2
& 0x7ff) << 52) |
2653 (mysignificand2
& 0xfffffffffffffLL
);
2654 return APInt(128, 2, words
);
2658 APFloat::convertDoubleAPFloatToAPInt() const
2660 assert(semantics
== (const llvm::fltSemantics
*)&IEEEdouble
);
2661 assert (partCount()==1);
2663 uint64_t myexponent
, mysignificand
;
2665 if (category
==fcNormal
) {
2666 myexponent
= exponent
+1023; //bias
2667 mysignificand
= *significandParts();
2668 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2669 myexponent
= 0; // denormal
2670 } else if (category
==fcZero
) {
2673 } else if (category
==fcInfinity
) {
2677 assert(category
== fcNaN
&& "Unknown category!");
2679 mysignificand
= *significandParts();
2682 return APInt(64, ((((uint64_t)(sign
& 1) << 63) |
2683 ((myexponent
& 0x7ff) << 52) |
2684 (mysignificand
& 0xfffffffffffffLL
))));
2688 APFloat::convertFloatAPFloatToAPInt() const
2690 assert(semantics
== (const llvm::fltSemantics
*)&IEEEsingle
);
2691 assert (partCount()==1);
2693 uint32_t myexponent
, mysignificand
;
2695 if (category
==fcNormal
) {
2696 myexponent
= exponent
+127; //bias
2697 mysignificand
= (uint32_t)*significandParts();
2698 if (myexponent
== 1 && !(mysignificand
& 0x800000))
2699 myexponent
= 0; // denormal
2700 } else if (category
==fcZero
) {
2703 } else if (category
==fcInfinity
) {
2707 assert(category
== fcNaN
&& "Unknown category!");
2709 mysignificand
= (uint32_t)*significandParts();
2712 return APInt(32, (((sign
&1) << 31) | ((myexponent
&0xff) << 23) |
2713 (mysignificand
& 0x7fffff)));
2716 // This function creates an APInt that is just a bit map of the floating
2717 // point constant as it would appear in memory. It is not a conversion,
2718 // and treating the result as a normal integer is unlikely to be useful.
2721 APFloat::bitcastToAPInt() const
2723 if (semantics
== (const llvm::fltSemantics
*)&IEEEsingle
)
2724 return convertFloatAPFloatToAPInt();
2726 if (semantics
== (const llvm::fltSemantics
*)&IEEEdouble
)
2727 return convertDoubleAPFloatToAPInt();
2729 if (semantics
== (const llvm::fltSemantics
*)&PPCDoubleDouble
)
2730 return convertPPCDoubleDoubleAPFloatToAPInt();
2732 assert(semantics
== (const llvm::fltSemantics
*)&x87DoubleExtended
&&
2734 return convertF80LongDoubleAPFloatToAPInt();
2738 APFloat::convertToFloat() const
2740 assert(semantics
== (const llvm::fltSemantics
*)&IEEEsingle
);
2741 APInt api
= bitcastToAPInt();
2742 return api
.bitsToFloat();
2746 APFloat::convertToDouble() const
2748 assert(semantics
== (const llvm::fltSemantics
*)&IEEEdouble
);
2749 APInt api
= bitcastToAPInt();
2750 return api
.bitsToDouble();
2753 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2754 /// does not support these bit patterns:
2755 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2756 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2757 /// exponent = 0, integer bit 1 ("pseudodenormal")
2758 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2759 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2761 APFloat::initFromF80LongDoubleAPInt(const APInt
&api
)
2763 assert(api
.getBitWidth()==80);
2764 uint64_t i1
= api
.getRawData()[0];
2765 uint64_t i2
= api
.getRawData()[1];
2766 uint64_t myexponent
= (i2
& 0x7fff);
2767 uint64_t mysignificand
= i1
;
2769 initialize(&APFloat::x87DoubleExtended
);
2770 assert(partCount()==2);
2772 sign
= static_cast<unsigned int>(i2
>>15);
2773 if (myexponent
==0 && mysignificand
==0) {
2774 // exponent, significand meaningless
2776 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
2777 // exponent, significand meaningless
2778 category
= fcInfinity
;
2779 } else if (myexponent
==0x7fff && mysignificand
!=0x8000000000000000ULL
) {
2780 // exponent meaningless
2782 significandParts()[0] = mysignificand
;
2783 significandParts()[1] = 0;
2785 category
= fcNormal
;
2786 exponent
= myexponent
- 16383;
2787 significandParts()[0] = mysignificand
;
2788 significandParts()[1] = 0;
2789 if (myexponent
==0) // denormal
2795 APFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
)
2797 assert(api
.getBitWidth()==128);
2798 uint64_t i1
= api
.getRawData()[0];
2799 uint64_t i2
= api
.getRawData()[1];
2800 uint64_t myexponent
= (i1
>> 52) & 0x7ff;
2801 uint64_t mysignificand
= i1
& 0xfffffffffffffLL
;
2802 uint64_t myexponent2
= (i2
>> 52) & 0x7ff;
2803 uint64_t mysignificand2
= i2
& 0xfffffffffffffLL
;
2805 initialize(&APFloat::PPCDoubleDouble
);
2806 assert(partCount()==2);
2808 sign
= static_cast<unsigned int>(i1
>>63);
2809 sign2
= static_cast<unsigned int>(i2
>>63);
2810 if (myexponent
==0 && mysignificand
==0) {
2811 // exponent, significand meaningless
2812 // exponent2 and significand2 are required to be 0; we don't check
2814 } else if (myexponent
==0x7ff && mysignificand
==0) {
2815 // exponent, significand meaningless
2816 // exponent2 and significand2 are required to be 0; we don't check
2817 category
= fcInfinity
;
2818 } else if (myexponent
==0x7ff && mysignificand
!=0) {
2819 // exponent meaningless. So is the whole second word, but keep it
2822 exponent2
= myexponent2
;
2823 significandParts()[0] = mysignificand
;
2824 significandParts()[1] = mysignificand2
;
2826 category
= fcNormal
;
2827 // Note there is no category2; the second word is treated as if it is
2828 // fcNormal, although it might be something else considered by itself.
2829 exponent
= myexponent
- 1023;
2830 exponent2
= myexponent2
- 1023;
2831 significandParts()[0] = mysignificand
;
2832 significandParts()[1] = mysignificand2
;
2833 if (myexponent
==0) // denormal
2836 significandParts()[0] |= 0x10000000000000LL
; // integer bit
2840 significandParts()[1] |= 0x10000000000000LL
; // integer bit
2845 APFloat::initFromDoubleAPInt(const APInt
&api
)
2847 assert(api
.getBitWidth()==64);
2848 uint64_t i
= *api
.getRawData();
2849 uint64_t myexponent
= (i
>> 52) & 0x7ff;
2850 uint64_t mysignificand
= i
& 0xfffffffffffffLL
;
2852 initialize(&APFloat::IEEEdouble
);
2853 assert(partCount()==1);
2855 sign
= static_cast<unsigned int>(i
>>63);
2856 if (myexponent
==0 && mysignificand
==0) {
2857 // exponent, significand meaningless
2859 } else if (myexponent
==0x7ff && mysignificand
==0) {
2860 // exponent, significand meaningless
2861 category
= fcInfinity
;
2862 } else if (myexponent
==0x7ff && mysignificand
!=0) {
2863 // exponent meaningless
2865 *significandParts() = mysignificand
;
2867 category
= fcNormal
;
2868 exponent
= myexponent
- 1023;
2869 *significandParts() = mysignificand
;
2870 if (myexponent
==0) // denormal
2873 *significandParts() |= 0x10000000000000LL
; // integer bit
2878 APFloat::initFromFloatAPInt(const APInt
& api
)
2880 assert(api
.getBitWidth()==32);
2881 uint32_t i
= (uint32_t)*api
.getRawData();
2882 uint32_t myexponent
= (i
>> 23) & 0xff;
2883 uint32_t mysignificand
= i
& 0x7fffff;
2885 initialize(&APFloat::IEEEsingle
);
2886 assert(partCount()==1);
2889 if (myexponent
==0 && mysignificand
==0) {
2890 // exponent, significand meaningless
2892 } else if (myexponent
==0xff && mysignificand
==0) {
2893 // exponent, significand meaningless
2894 category
= fcInfinity
;
2895 } else if (myexponent
==0xff && mysignificand
!=0) {
2896 // sign, exponent, significand meaningless
2898 *significandParts() = mysignificand
;
2900 category
= fcNormal
;
2901 exponent
= myexponent
- 127; //bias
2902 *significandParts() = mysignificand
;
2903 if (myexponent
==0) // denormal
2906 *significandParts() |= 0x800000; // integer bit
2910 /// Treat api as containing the bits of a floating point number. Currently
2911 /// we infer the floating point type from the size of the APInt. The
2912 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2913 /// when the size is anything else).
2915 APFloat::initFromAPInt(const APInt
& api
, bool isIEEE
)
2917 if (api
.getBitWidth() == 32)
2918 return initFromFloatAPInt(api
);
2919 else if (api
.getBitWidth()==64)
2920 return initFromDoubleAPInt(api
);
2921 else if (api
.getBitWidth()==80)
2922 return initFromF80LongDoubleAPInt(api
);
2923 else if (api
.getBitWidth()==128 && !isIEEE
)
2924 return initFromPPCDoubleDoubleAPInt(api
);
2929 APFloat::APFloat(const APInt
& api
, bool isIEEE
)
2931 initFromAPInt(api
, isIEEE
);
2934 APFloat::APFloat(float f
)
2936 APInt api
= APInt(32, 0);
2937 initFromAPInt(api
.floatToBits(f
));
2940 APFloat::APFloat(double d
)
2942 APInt api
= APInt(64, 0);
2943 initFromAPInt(api
.doubleToBits(d
));