1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
24 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
26 /* Assumed in hexadecimal significand parsing, and conversion to
27 hexadecimal strings. */
28 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
29 COMPILE_TIME_ASSERT(integerPartWidth
% 4 == 0);
33 /* Represents floating point arithmetic semantics. */
35 /* The largest E such that 2^E is representable; this matches the
36 definition of IEEE 754. */
37 exponent_t maxExponent
;
39 /* The smallest E such that 2^E is a normalized number; this
40 matches the definition of IEEE 754. */
41 exponent_t minExponent
;
43 /* Number of bits in the significand. This includes the integer
45 unsigned int precision
;
47 /* True if arithmetic is supported. */
48 unsigned int arithmeticOK
;
51 const fltSemantics
APFloat::IEEEsingle
= { 127, -126, 24, true };
52 const fltSemantics
APFloat::IEEEdouble
= { 1023, -1022, 53, true };
53 const fltSemantics
APFloat::IEEEquad
= { 16383, -16382, 113, true };
54 const fltSemantics
APFloat::x87DoubleExtended
= { 16383, -16382, 64, true };
55 const fltSemantics
APFloat::Bogus
= { 0, 0, 0, true };
57 // The PowerPC format consists of two doubles. It does not map cleanly
58 // onto the usual format above. For now only storage of constants of
59 // this type is supported, no arithmetic.
60 const fltSemantics
APFloat::PPCDoubleDouble
= { 1023, -1022, 106, false };
62 /* A tight upper bound on number of parts required to hold the value
65 power * 815 / (351 * integerPartWidth) + 1
67 However, whilst the result may require only this many parts,
68 because we are multiplying two values to get it, the
69 multiplication may require an extra part with the excess part
70 being zero (consider the trivial case of 1 * 1, tcFullMultiply
71 requires two parts to hold the single-part result). So we add an
72 extra one to guarantee enough space whilst multiplying. */
73 const unsigned int maxExponent
= 16383;
74 const unsigned int maxPrecision
= 113;
75 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
76 const unsigned int maxPowerOfFiveParts
= 2 + ((maxPowerOfFiveExponent
* 815)
77 / (351 * integerPartWidth
));
80 /* A bunch of private, handy routines. */
82 static inline unsigned int
83 partCountForBits(unsigned int bits
)
85 return ((bits
) + integerPartWidth
- 1) / integerPartWidth
;
88 /* Returns 0U-9U. Return values >= 10U are not digits. */
89 static inline unsigned int
90 decDigitValue(unsigned int c
)
96 hexDigitValue(unsigned int c
)
116 assertArithmeticOK(const llvm::fltSemantics
&semantics
) {
117 assert(semantics
.arithmeticOK
118 && "Compile-time arithmetic does not support these semantics");
121 /* Return the value of a decimal exponent of the form
124 If the exponent overflows, returns a large exponent with the
127 readExponent(StringRef::iterator begin
, StringRef::iterator end
)
130 unsigned int absExponent
;
131 const unsigned int overlargeExponent
= 24000; /* FIXME. */
132 StringRef::iterator p
= begin
;
134 assert(p
!= end
&& "Exponent has no digits");
136 isNegative
= (*p
== '-');
137 if (*p
== '-' || *p
== '+') {
139 assert(p
!= end
&& "Exponent has no digits");
142 absExponent
= decDigitValue(*p
++);
143 assert(absExponent
< 10U && "Invalid character in exponent");
145 for (; p
!= end
; ++p
) {
148 value
= decDigitValue(*p
);
149 assert(value
< 10U && "Invalid character in exponent");
151 value
+= absExponent
* 10;
152 if (absExponent
>= overlargeExponent
) {
153 absExponent
= overlargeExponent
;
159 assert(p
== end
&& "Invalid exponent in exponent");
162 return -(int) absExponent
;
164 return (int) absExponent
;
167 /* This is ugly and needs cleaning up, but I don't immediately see
168 how whilst remaining safe. */
170 totalExponent(StringRef::iterator p
, StringRef::iterator end
,
171 int exponentAdjustment
)
173 int unsignedExponent
;
174 bool negative
, overflow
;
177 assert(p
!= end
&& "Exponent has no digits");
179 negative
= *p
== '-';
180 if(*p
== '-' || *p
== '+') {
182 assert(p
!= end
&& "Exponent has no digits");
185 unsignedExponent
= 0;
187 for(; p
!= end
; ++p
) {
190 value
= decDigitValue(*p
);
191 assert(value
< 10U && "Invalid character in exponent");
193 unsignedExponent
= unsignedExponent
* 10 + value
;
194 if(unsignedExponent
> 65535)
198 if(exponentAdjustment
> 65535 || exponentAdjustment
< -65536)
202 exponent
= unsignedExponent
;
204 exponent
= -exponent
;
205 exponent
+= exponentAdjustment
;
206 if(exponent
> 65535 || exponent
< -65536)
211 exponent
= negative
? -65536: 65535;
216 static StringRef::iterator
217 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
218 StringRef::iterator
*dot
)
220 StringRef::iterator p
= begin
;
222 while(*p
== '0' && p
!= end
)
228 assert(end
- begin
!= 1 && "Significand has no digits");
230 while(*p
== '0' && p
!= end
)
237 /* Given a normal decimal floating point number of the form
241 where the decimal point and exponent are optional, fill out the
242 structure D. Exponent is appropriate if the significand is
243 treated as an integer, and normalizedExponent if the significand
244 is taken to have the decimal point after a single leading
247 If the value is zero, V->firstSigDigit points to a non-digit, and
248 the return exponent is zero.
251 const char *firstSigDigit
;
252 const char *lastSigDigit
;
254 int normalizedExponent
;
258 interpretDecimal(StringRef::iterator begin
, StringRef::iterator end
,
261 StringRef::iterator dot
= end
;
262 StringRef::iterator p
= skipLeadingZeroesAndAnyDot (begin
, end
, &dot
);
264 D
->firstSigDigit
= p
;
266 D
->normalizedExponent
= 0;
268 for (; p
!= end
; ++p
) {
270 assert(dot
== end
&& "String contains multiple dots");
275 if (decDigitValue(*p
) >= 10U)
280 assert((*p
== 'e' || *p
== 'E') && "Invalid character in significand");
281 assert(p
!= begin
&& "Significand has no digits");
282 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
284 /* p points to the first non-digit in the string */
285 D
->exponent
= readExponent(p
+ 1, end
);
287 /* Implied decimal point? */
292 /* If number is all zeroes accept any exponent. */
293 if (p
!= D
->firstSigDigit
) {
294 /* Drop insignificant trailing zeroes. */
299 while (p
!= begin
&& *p
== '0');
300 while (p
!= begin
&& *p
== '.');
303 /* Adjust the exponents for any decimal point. */
304 D
->exponent
+= static_cast<exponent_t
>((dot
- p
) - (dot
> p
));
305 D
->normalizedExponent
= (D
->exponent
+
306 static_cast<exponent_t
>((p
- D
->firstSigDigit
)
307 - (dot
> D
->firstSigDigit
&& dot
< p
)));
313 /* Return the trailing fraction of a hexadecimal number.
314 DIGITVALUE is the first hex digit of the fraction, P points to
317 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
318 unsigned int digitValue
)
320 unsigned int hexDigit
;
322 /* If the first trailing digit isn't 0 or 8 we can work out the
323 fraction immediately. */
325 return lfMoreThanHalf
;
326 else if(digitValue
< 8 && digitValue
> 0)
327 return lfLessThanHalf
;
329 /* Otherwise we need to find the first non-zero digit. */
333 assert(p
!= end
&& "Invalid trailing hexadecimal fraction!");
335 hexDigit
= hexDigitValue(*p
);
337 /* If we ran off the end it is exactly zero or one-half, otherwise
340 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
342 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
345 /* Return the fraction lost were a bignum truncated losing the least
346 significant BITS bits. */
348 lostFractionThroughTruncation(const integerPart
*parts
,
349 unsigned int partCount
,
354 lsb
= APInt::tcLSB(parts
, partCount
);
356 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
358 return lfExactlyZero
;
360 return lfExactlyHalf
;
361 if(bits
<= partCount
* integerPartWidth
362 && APInt::tcExtractBit(parts
, bits
- 1))
363 return lfMoreThanHalf
;
365 return lfLessThanHalf
;
368 /* Shift DST right BITS bits noting lost fraction. */
370 shiftRight(integerPart
*dst
, unsigned int parts
, unsigned int bits
)
372 lostFraction lost_fraction
;
374 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
376 APInt::tcShiftRight(dst
, parts
, bits
);
378 return lost_fraction
;
381 /* Combine the effect of two lost fractions. */
383 combineLostFractions(lostFraction moreSignificant
,
384 lostFraction lessSignificant
)
386 if(lessSignificant
!= lfExactlyZero
) {
387 if(moreSignificant
== lfExactlyZero
)
388 moreSignificant
= lfLessThanHalf
;
389 else if(moreSignificant
== lfExactlyHalf
)
390 moreSignificant
= lfMoreThanHalf
;
393 return moreSignificant
;
396 /* The error from the true value, in half-ulps, on multiplying two
397 floating point numbers, which differ from the value they
398 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
399 than the returned value.
401 See "How to Read Floating Point Numbers Accurately" by William D
404 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
406 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
408 if (HUerr1
+ HUerr2
== 0)
409 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
411 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
414 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
415 when the least significant BITS are truncated. BITS cannot be
418 ulpsFromBoundary(const integerPart
*parts
, unsigned int bits
, bool isNearest
)
420 unsigned int count
, partBits
;
421 integerPart part
, boundary
;
426 count
= bits
/ integerPartWidth
;
427 partBits
= bits
% integerPartWidth
+ 1;
429 part
= parts
[count
] & (~(integerPart
) 0 >> (integerPartWidth
- partBits
));
432 boundary
= (integerPart
) 1 << (partBits
- 1);
437 if (part
- boundary
<= boundary
- part
)
438 return part
- boundary
;
440 return boundary
- part
;
443 if (part
== boundary
) {
446 return ~(integerPart
) 0; /* A lot. */
449 } else if (part
== boundary
- 1) {
452 return ~(integerPart
) 0; /* A lot. */
457 return ~(integerPart
) 0; /* A lot. */
460 /* Place pow(5, power) in DST, and return the number of parts used.
461 DST must be at least one part larger than size of the answer. */
463 powerOf5(integerPart
*dst
, unsigned int power
)
465 static const integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125,
467 integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
468 pow5s
[0] = 78125 * 5;
470 unsigned int partsCount
[16] = { 1 };
471 integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
473 assert(power
<= maxExponent
);
478 *p1
= firstEightPowers
[power
& 7];
484 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
489 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
491 pc
= partsCount
[n
- 1];
492 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
494 if (pow5
[pc
- 1] == 0)
502 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
504 if (p2
[result
- 1] == 0)
507 /* Now result is in p1 with partsCount parts and p2 is scratch
509 tmp
= p1
, p1
= p2
, p2
= tmp
;
516 APInt::tcAssign(dst
, p1
, result
);
521 /* Zero at the end to avoid modular arithmetic when adding one; used
522 when rounding up during hexadecimal output. */
523 static const char hexDigitsLower
[] = "0123456789abcdef0";
524 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
525 static const char infinityL
[] = "infinity";
526 static const char infinityU
[] = "INFINITY";
527 static const char NaNL
[] = "nan";
528 static const char NaNU
[] = "NAN";
530 /* Write out an integerPart in hexadecimal, starting with the most
531 significant nibble. Write out exactly COUNT hexdigits, return
534 partAsHex (char *dst
, integerPart part
, unsigned int count
,
535 const char *hexDigitChars
)
537 unsigned int result
= count
;
539 assert (count
!= 0 && count
<= integerPartWidth
/ 4);
541 part
>>= (integerPartWidth
- 4 * count
);
543 dst
[count
] = hexDigitChars
[part
& 0xf];
550 /* Write out an unsigned decimal integer. */
552 writeUnsignedDecimal (char *dst
, unsigned int n
)
568 /* Write out a signed decimal integer. */
570 writeSignedDecimal (char *dst
, int value
)
574 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
576 dst
= writeUnsignedDecimal(dst
, value
);
583 APFloat::initialize(const fltSemantics
*ourSemantics
)
587 semantics
= ourSemantics
;
590 significand
.parts
= new integerPart
[count
];
594 APFloat::freeSignificand()
597 delete [] significand
.parts
;
601 APFloat::assign(const APFloat
&rhs
)
603 assert(semantics
== rhs
.semantics
);
606 category
= rhs
.category
;
607 exponent
= rhs
.exponent
;
609 exponent2
= rhs
.exponent2
;
610 if(category
== fcNormal
|| category
== fcNaN
)
611 copySignificand(rhs
);
615 APFloat::copySignificand(const APFloat
&rhs
)
617 assert(category
== fcNormal
|| category
== fcNaN
);
618 assert(rhs
.partCount() >= partCount());
620 APInt::tcAssign(significandParts(), rhs
.significandParts(),
624 /* Make this number a NaN, with an arbitrary but deterministic value
625 for the significand. If double or longer, this is a signalling NaN,
626 which may not be ideal. If float, this is QNaN(0). */
628 APFloat::makeNaN(unsigned type
)
631 // FIXME: Add double and long double support for QNaN(0).
632 if (semantics
->precision
== 24 && semantics
->maxExponent
== 127) {
634 type
&= ~0x80000000U
;
637 APInt::tcSet(significandParts(), type
, partCount());
641 APFloat::operator=(const APFloat
&rhs
)
644 if(semantics
!= rhs
.semantics
) {
646 initialize(rhs
.semantics
);
655 APFloat::bitwiseIsEqual(const APFloat
&rhs
) const {
658 if (semantics
!= rhs
.semantics
||
659 category
!= rhs
.category
||
662 if (semantics
==(const llvm::fltSemantics
*)&PPCDoubleDouble
&&
665 if (category
==fcZero
|| category
==fcInfinity
)
667 else if (category
==fcNormal
&& exponent
!=rhs
.exponent
)
669 else if (semantics
==(const llvm::fltSemantics
*)&PPCDoubleDouble
&&
670 exponent2
!=rhs
.exponent2
)
674 const integerPart
* p
=significandParts();
675 const integerPart
* q
=rhs
.significandParts();
676 for (; i
>0; i
--, p
++, q
++) {
684 APFloat::APFloat(const fltSemantics
&ourSemantics
, integerPart value
)
686 assertArithmeticOK(ourSemantics
);
687 initialize(&ourSemantics
);
690 exponent
= ourSemantics
.precision
- 1;
691 significandParts()[0] = value
;
692 normalize(rmNearestTiesToEven
, lfExactlyZero
);
695 APFloat::APFloat(const fltSemantics
&ourSemantics
,
696 fltCategory ourCategory
, bool negative
, unsigned type
)
698 assertArithmeticOK(ourSemantics
);
699 initialize(&ourSemantics
);
700 category
= ourCategory
;
702 if (category
== fcNormal
)
704 else if (ourCategory
== fcNaN
)
708 APFloat::APFloat(const fltSemantics
&ourSemantics
, const StringRef
& text
)
710 assertArithmeticOK(ourSemantics
);
711 initialize(&ourSemantics
);
712 convertFromString(text
, rmNearestTiesToEven
);
715 APFloat::APFloat(const APFloat
&rhs
)
717 initialize(rhs
.semantics
);
726 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
727 void APFloat::Profile(FoldingSetNodeID
& ID
) const {
728 ID
.Add(bitcastToAPInt());
732 APFloat::partCount() const
734 return partCountForBits(semantics
->precision
+ 1);
738 APFloat::semanticsPrecision(const fltSemantics
&semantics
)
740 return semantics
.precision
;
744 APFloat::significandParts() const
746 return const_cast<APFloat
*>(this)->significandParts();
750 APFloat::significandParts()
752 assert(category
== fcNormal
|| category
== fcNaN
);
755 return significand
.parts
;
757 return &significand
.part
;
761 APFloat::zeroSignificand()
764 APInt::tcSet(significandParts(), 0, partCount());
767 /* Increment an fcNormal floating point number's significand. */
769 APFloat::incrementSignificand()
773 carry
= APInt::tcIncrement(significandParts(), partCount());
775 /* Our callers should never cause us to overflow. */
779 /* Add the significand of the RHS. Returns the carry flag. */
781 APFloat::addSignificand(const APFloat
&rhs
)
785 parts
= significandParts();
787 assert(semantics
== rhs
.semantics
);
788 assert(exponent
== rhs
.exponent
);
790 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
793 /* Subtract the significand of the RHS with a borrow flag. Returns
796 APFloat::subtractSignificand(const APFloat
&rhs
, integerPart borrow
)
800 parts
= significandParts();
802 assert(semantics
== rhs
.semantics
);
803 assert(exponent
== rhs
.exponent
);
805 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
809 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
810 on to the full-precision result of the multiplication. Returns the
813 APFloat::multiplySignificand(const APFloat
&rhs
, const APFloat
*addend
)
815 unsigned int omsb
; // One, not zero, based MSB.
816 unsigned int partsCount
, newPartsCount
, precision
;
817 integerPart
*lhsSignificand
;
818 integerPart scratch
[4];
819 integerPart
*fullSignificand
;
820 lostFraction lost_fraction
;
823 assert(semantics
== rhs
.semantics
);
825 precision
= semantics
->precision
;
826 newPartsCount
= partCountForBits(precision
* 2);
828 if(newPartsCount
> 4)
829 fullSignificand
= new integerPart
[newPartsCount
];
831 fullSignificand
= scratch
;
833 lhsSignificand
= significandParts();
834 partsCount
= partCount();
836 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
837 rhs
.significandParts(), partsCount
, partsCount
);
839 lost_fraction
= lfExactlyZero
;
840 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
841 exponent
+= rhs
.exponent
;
844 Significand savedSignificand
= significand
;
845 const fltSemantics
*savedSemantics
= semantics
;
846 fltSemantics extendedSemantics
;
848 unsigned int extendedPrecision
;
850 /* Normalize our MSB. */
851 extendedPrecision
= precision
+ precision
- 1;
852 if(omsb
!= extendedPrecision
)
854 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
855 extendedPrecision
- omsb
);
856 exponent
-= extendedPrecision
- omsb
;
859 /* Create new semantics. */
860 extendedSemantics
= *semantics
;
861 extendedSemantics
.precision
= extendedPrecision
;
863 if(newPartsCount
== 1)
864 significand
.part
= fullSignificand
[0];
866 significand
.parts
= fullSignificand
;
867 semantics
= &extendedSemantics
;
869 APFloat
extendedAddend(*addend
);
870 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
871 assert(status
== opOK
);
872 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
874 /* Restore our state. */
875 if(newPartsCount
== 1)
876 fullSignificand
[0] = significand
.part
;
877 significand
= savedSignificand
;
878 semantics
= savedSemantics
;
880 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
883 exponent
-= (precision
- 1);
885 if(omsb
> precision
) {
886 unsigned int bits
, significantParts
;
889 bits
= omsb
- precision
;
890 significantParts
= partCountForBits(omsb
);
891 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
892 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
896 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
898 if(newPartsCount
> 4)
899 delete [] fullSignificand
;
901 return lost_fraction
;
904 /* Multiply the significands of LHS and RHS to DST. */
906 APFloat::divideSignificand(const APFloat
&rhs
)
908 unsigned int bit
, i
, partsCount
;
909 const integerPart
*rhsSignificand
;
910 integerPart
*lhsSignificand
, *dividend
, *divisor
;
911 integerPart scratch
[4];
912 lostFraction lost_fraction
;
914 assert(semantics
== rhs
.semantics
);
916 lhsSignificand
= significandParts();
917 rhsSignificand
= rhs
.significandParts();
918 partsCount
= partCount();
921 dividend
= new integerPart
[partsCount
* 2];
925 divisor
= dividend
+ partsCount
;
927 /* Copy the dividend and divisor as they will be modified in-place. */
928 for(i
= 0; i
< partsCount
; i
++) {
929 dividend
[i
] = lhsSignificand
[i
];
930 divisor
[i
] = rhsSignificand
[i
];
931 lhsSignificand
[i
] = 0;
934 exponent
-= rhs
.exponent
;
936 unsigned int precision
= semantics
->precision
;
938 /* Normalize the divisor. */
939 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
942 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
945 /* Normalize the dividend. */
946 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
949 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
952 /* Ensure the dividend >= divisor initially for the loop below.
953 Incidentally, this means that the division loop below is
954 guaranteed to set the integer bit to one. */
955 if(APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
957 APInt::tcShiftLeft(dividend
, partsCount
, 1);
958 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
962 for(bit
= precision
; bit
; bit
-= 1) {
963 if(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
964 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
965 APInt::tcSetBit(lhsSignificand
, bit
- 1);
968 APInt::tcShiftLeft(dividend
, partsCount
, 1);
971 /* Figure out the lost fraction. */
972 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
975 lost_fraction
= lfMoreThanHalf
;
977 lost_fraction
= lfExactlyHalf
;
978 else if(APInt::tcIsZero(dividend
, partsCount
))
979 lost_fraction
= lfExactlyZero
;
981 lost_fraction
= lfLessThanHalf
;
986 return lost_fraction
;
990 APFloat::significandMSB() const
992 return APInt::tcMSB(significandParts(), partCount());
996 APFloat::significandLSB() const
998 return APInt::tcLSB(significandParts(), partCount());
1001 /* Note that a zero result is NOT normalized to fcZero. */
1003 APFloat::shiftSignificandRight(unsigned int bits
)
1005 /* Our exponent should not overflow. */
1006 assert((exponent_t
) (exponent
+ bits
) >= exponent
);
1010 return shiftRight(significandParts(), partCount(), bits
);
1013 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1015 APFloat::shiftSignificandLeft(unsigned int bits
)
1017 assert(bits
< semantics
->precision
);
1020 unsigned int partsCount
= partCount();
1022 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1025 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1030 APFloat::compareAbsoluteValue(const APFloat
&rhs
) const
1034 assert(semantics
== rhs
.semantics
);
1035 assert(category
== fcNormal
);
1036 assert(rhs
.category
== fcNormal
);
1038 compare
= exponent
- rhs
.exponent
;
1040 /* If exponents are equal, do an unsigned bignum comparison of the
1043 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1047 return cmpGreaterThan
;
1048 else if(compare
< 0)
1054 /* Handle overflow. Sign is preserved. We either become infinity or
1055 the largest finite number. */
1057 APFloat::handleOverflow(roundingMode rounding_mode
)
1060 if(rounding_mode
== rmNearestTiesToEven
1061 || rounding_mode
== rmNearestTiesToAway
1062 || (rounding_mode
== rmTowardPositive
&& !sign
)
1063 || (rounding_mode
== rmTowardNegative
&& sign
))
1065 category
= fcInfinity
;
1066 return (opStatus
) (opOverflow
| opInexact
);
1069 /* Otherwise we become the largest finite number. */
1070 category
= fcNormal
;
1071 exponent
= semantics
->maxExponent
;
1072 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1073 semantics
->precision
);
1078 /* Returns TRUE if, when truncating the current number, with BIT the
1079 new LSB, with the given lost fraction and rounding mode, the result
1080 would need to be rounded away from zero (i.e., by increasing the
1081 signficand). This routine must work for fcZero of both signs, and
1082 fcNormal numbers. */
1084 APFloat::roundAwayFromZero(roundingMode rounding_mode
,
1085 lostFraction lost_fraction
,
1086 unsigned int bit
) const
1088 /* NaNs and infinities should not have lost fractions. */
1089 assert(category
== fcNormal
|| category
== fcZero
);
1091 /* Current callers never pass this so we don't handle it. */
1092 assert(lost_fraction
!= lfExactlyZero
);
1094 switch (rounding_mode
) {
1096 llvm_unreachable(0);
1098 case rmNearestTiesToAway
:
1099 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1101 case rmNearestTiesToEven
:
1102 if(lost_fraction
== lfMoreThanHalf
)
1105 /* Our zeroes don't have a significand to test. */
1106 if(lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1107 return APInt::tcExtractBit(significandParts(), bit
);
1114 case rmTowardPositive
:
1115 return sign
== false;
1117 case rmTowardNegative
:
1118 return sign
== true;
1123 APFloat::normalize(roundingMode rounding_mode
,
1124 lostFraction lost_fraction
)
1126 unsigned int omsb
; /* One, not zero, based MSB. */
1129 if(category
!= fcNormal
)
1132 /* Before rounding normalize the exponent of fcNormal numbers. */
1133 omsb
= significandMSB() + 1;
1136 /* OMSB is numbered from 1. We want to place it in the integer
1137 bit numbered PRECISON if possible, with a compensating change in
1139 exponentChange
= omsb
- semantics
->precision
;
1141 /* If the resulting exponent is too high, overflow according to
1142 the rounding mode. */
1143 if(exponent
+ exponentChange
> semantics
->maxExponent
)
1144 return handleOverflow(rounding_mode
);
1146 /* Subnormal numbers have exponent minExponent, and their MSB
1147 is forced based on that. */
1148 if(exponent
+ exponentChange
< semantics
->minExponent
)
1149 exponentChange
= semantics
->minExponent
- exponent
;
1151 /* Shifting left is easy as we don't lose precision. */
1152 if(exponentChange
< 0) {
1153 assert(lost_fraction
== lfExactlyZero
);
1155 shiftSignificandLeft(-exponentChange
);
1160 if(exponentChange
> 0) {
1163 /* Shift right and capture any new lost fraction. */
1164 lf
= shiftSignificandRight(exponentChange
);
1166 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1168 /* Keep OMSB up-to-date. */
1169 if(omsb
> (unsigned) exponentChange
)
1170 omsb
-= exponentChange
;
1176 /* Now round the number according to rounding_mode given the lost
1179 /* As specified in IEEE 754, since we do not trap we do not report
1180 underflow for exact results. */
1181 if(lost_fraction
== lfExactlyZero
) {
1182 /* Canonicalize zeroes. */
1189 /* Increment the significand if we're rounding away from zero. */
1190 if(roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1192 exponent
= semantics
->minExponent
;
1194 incrementSignificand();
1195 omsb
= significandMSB() + 1;
1197 /* Did the significand increment overflow? */
1198 if(omsb
== (unsigned) semantics
->precision
+ 1) {
1199 /* Renormalize by incrementing the exponent and shifting our
1200 significand right one. However if we already have the
1201 maximum exponent we overflow to infinity. */
1202 if(exponent
== semantics
->maxExponent
) {
1203 category
= fcInfinity
;
1205 return (opStatus
) (opOverflow
| opInexact
);
1208 shiftSignificandRight(1);
1214 /* The normal case - we were and are not denormal, and any
1215 significand increment above didn't overflow. */
1216 if(omsb
== semantics
->precision
)
1219 /* We have a non-zero denormal. */
1220 assert(omsb
< semantics
->precision
);
1222 /* Canonicalize zeroes. */
1226 /* The fcZero case is a denormal that underflowed to zero. */
1227 return (opStatus
) (opUnderflow
| opInexact
);
1231 APFloat::addOrSubtractSpecials(const APFloat
&rhs
, bool subtract
)
1233 switch (convolve(category
, rhs
.category
)) {
1235 llvm_unreachable(0);
1237 case convolve(fcNaN
, fcZero
):
1238 case convolve(fcNaN
, fcNormal
):
1239 case convolve(fcNaN
, fcInfinity
):
1240 case convolve(fcNaN
, fcNaN
):
1241 case convolve(fcNormal
, fcZero
):
1242 case convolve(fcInfinity
, fcNormal
):
1243 case convolve(fcInfinity
, fcZero
):
1246 case convolve(fcZero
, fcNaN
):
1247 case convolve(fcNormal
, fcNaN
):
1248 case convolve(fcInfinity
, fcNaN
):
1250 copySignificand(rhs
);
1253 case convolve(fcNormal
, fcInfinity
):
1254 case convolve(fcZero
, fcInfinity
):
1255 category
= fcInfinity
;
1256 sign
= rhs
.sign
^ subtract
;
1259 case convolve(fcZero
, fcNormal
):
1261 sign
= rhs
.sign
^ subtract
;
1264 case convolve(fcZero
, fcZero
):
1265 /* Sign depends on rounding mode; handled by caller. */
1268 case convolve(fcInfinity
, fcInfinity
):
1269 /* Differently signed infinities can only be validly
1271 if(((sign
^ rhs
.sign
)!=0) != subtract
) {
1278 case convolve(fcNormal
, fcNormal
):
1283 /* Add or subtract two normal numbers. */
1285 APFloat::addOrSubtractSignificand(const APFloat
&rhs
, bool subtract
)
1288 lostFraction lost_fraction
;
1291 /* Determine if the operation on the absolute values is effectively
1292 an addition or subtraction. */
1293 subtract
^= (sign
^ rhs
.sign
) ? true : false;
1295 /* Are we bigger exponent-wise than the RHS? */
1296 bits
= exponent
- rhs
.exponent
;
1298 /* Subtraction is more subtle than one might naively expect. */
1300 APFloat
temp_rhs(rhs
);
1304 reverse
= compareAbsoluteValue(temp_rhs
) == cmpLessThan
;
1305 lost_fraction
= lfExactlyZero
;
1306 } else if (bits
> 0) {
1307 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1308 shiftSignificandLeft(1);
1311 lost_fraction
= shiftSignificandRight(-bits
- 1);
1312 temp_rhs
.shiftSignificandLeft(1);
1317 carry
= temp_rhs
.subtractSignificand
1318 (*this, lost_fraction
!= lfExactlyZero
);
1319 copySignificand(temp_rhs
);
1322 carry
= subtractSignificand
1323 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1326 /* Invert the lost fraction - it was on the RHS and
1328 if(lost_fraction
== lfLessThanHalf
)
1329 lost_fraction
= lfMoreThanHalf
;
1330 else if(lost_fraction
== lfMoreThanHalf
)
1331 lost_fraction
= lfLessThanHalf
;
1333 /* The code above is intended to ensure that no borrow is
1338 APFloat
temp_rhs(rhs
);
1340 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1341 carry
= addSignificand(temp_rhs
);
1343 lost_fraction
= shiftSignificandRight(-bits
);
1344 carry
= addSignificand(rhs
);
1347 /* We have a guard bit; generating a carry cannot happen. */
1351 return lost_fraction
;
1355 APFloat::multiplySpecials(const APFloat
&rhs
)
1357 switch (convolve(category
, rhs
.category
)) {
1359 llvm_unreachable(0);
1361 case convolve(fcNaN
, fcZero
):
1362 case convolve(fcNaN
, fcNormal
):
1363 case convolve(fcNaN
, fcInfinity
):
1364 case convolve(fcNaN
, fcNaN
):
1367 case convolve(fcZero
, fcNaN
):
1368 case convolve(fcNormal
, fcNaN
):
1369 case convolve(fcInfinity
, fcNaN
):
1371 copySignificand(rhs
);
1374 case convolve(fcNormal
, fcInfinity
):
1375 case convolve(fcInfinity
, fcNormal
):
1376 case convolve(fcInfinity
, fcInfinity
):
1377 category
= fcInfinity
;
1380 case convolve(fcZero
, fcNormal
):
1381 case convolve(fcNormal
, fcZero
):
1382 case convolve(fcZero
, fcZero
):
1386 case convolve(fcZero
, fcInfinity
):
1387 case convolve(fcInfinity
, fcZero
):
1391 case convolve(fcNormal
, fcNormal
):
1397 APFloat::divideSpecials(const APFloat
&rhs
)
1399 switch (convolve(category
, rhs
.category
)) {
1401 llvm_unreachable(0);
1403 case convolve(fcNaN
, fcZero
):
1404 case convolve(fcNaN
, fcNormal
):
1405 case convolve(fcNaN
, fcInfinity
):
1406 case convolve(fcNaN
, fcNaN
):
1407 case convolve(fcInfinity
, fcZero
):
1408 case convolve(fcInfinity
, fcNormal
):
1409 case convolve(fcZero
, fcInfinity
):
1410 case convolve(fcZero
, fcNormal
):
1413 case convolve(fcZero
, fcNaN
):
1414 case convolve(fcNormal
, fcNaN
):
1415 case convolve(fcInfinity
, fcNaN
):
1417 copySignificand(rhs
);
1420 case convolve(fcNormal
, fcInfinity
):
1424 case convolve(fcNormal
, fcZero
):
1425 category
= fcInfinity
;
1428 case convolve(fcInfinity
, fcInfinity
):
1429 case convolve(fcZero
, fcZero
):
1433 case convolve(fcNormal
, fcNormal
):
1439 APFloat::modSpecials(const APFloat
&rhs
)
1441 switch (convolve(category
, rhs
.category
)) {
1443 llvm_unreachable(0);
1445 case convolve(fcNaN
, fcZero
):
1446 case convolve(fcNaN
, fcNormal
):
1447 case convolve(fcNaN
, fcInfinity
):
1448 case convolve(fcNaN
, fcNaN
):
1449 case convolve(fcZero
, fcInfinity
):
1450 case convolve(fcZero
, fcNormal
):
1451 case convolve(fcNormal
, fcInfinity
):
1454 case convolve(fcZero
, fcNaN
):
1455 case convolve(fcNormal
, fcNaN
):
1456 case convolve(fcInfinity
, fcNaN
):
1458 copySignificand(rhs
);
1461 case convolve(fcNormal
, fcZero
):
1462 case convolve(fcInfinity
, fcZero
):
1463 case convolve(fcInfinity
, fcNormal
):
1464 case convolve(fcInfinity
, fcInfinity
):
1465 case convolve(fcZero
, fcZero
):
1469 case convolve(fcNormal
, fcNormal
):
1476 APFloat::changeSign()
1478 /* Look mummy, this one's easy. */
1483 APFloat::clearSign()
1485 /* So is this one. */
1490 APFloat::copySign(const APFloat
&rhs
)
1496 /* Normalized addition or subtraction. */
1498 APFloat::addOrSubtract(const APFloat
&rhs
, roundingMode rounding_mode
,
1503 assertArithmeticOK(*semantics
);
1505 fs
= addOrSubtractSpecials(rhs
, subtract
);
1507 /* This return code means it was not a simple case. */
1508 if(fs
== opDivByZero
) {
1509 lostFraction lost_fraction
;
1511 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
1512 fs
= normalize(rounding_mode
, lost_fraction
);
1514 /* Can only be zero if we lost no fraction. */
1515 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
1518 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1519 positive zero unless rounding to minus infinity, except that
1520 adding two like-signed zeroes gives that zero. */
1521 if(category
== fcZero
) {
1522 if(rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
1523 sign
= (rounding_mode
== rmTowardNegative
);
1529 /* Normalized addition. */
1531 APFloat::add(const APFloat
&rhs
, roundingMode rounding_mode
)
1533 return addOrSubtract(rhs
, rounding_mode
, false);
1536 /* Normalized subtraction. */
1538 APFloat::subtract(const APFloat
&rhs
, roundingMode rounding_mode
)
1540 return addOrSubtract(rhs
, rounding_mode
, true);
1543 /* Normalized multiply. */
1545 APFloat::multiply(const APFloat
&rhs
, roundingMode rounding_mode
)
1549 assertArithmeticOK(*semantics
);
1551 fs
= multiplySpecials(rhs
);
1553 if(category
== fcNormal
) {
1554 lostFraction lost_fraction
= multiplySignificand(rhs
, 0);
1555 fs
= normalize(rounding_mode
, lost_fraction
);
1556 if(lost_fraction
!= lfExactlyZero
)
1557 fs
= (opStatus
) (fs
| opInexact
);
1563 /* Normalized divide. */
1565 APFloat::divide(const APFloat
&rhs
, roundingMode rounding_mode
)
1569 assertArithmeticOK(*semantics
);
1571 fs
= divideSpecials(rhs
);
1573 if(category
== fcNormal
) {
1574 lostFraction lost_fraction
= divideSignificand(rhs
);
1575 fs
= normalize(rounding_mode
, lost_fraction
);
1576 if(lost_fraction
!= lfExactlyZero
)
1577 fs
= (opStatus
) (fs
| opInexact
);
1583 /* Normalized remainder. This is not currently correct in all cases. */
1585 APFloat::remainder(const APFloat
&rhs
)
1589 unsigned int origSign
= sign
;
1591 assertArithmeticOK(*semantics
);
1592 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1593 if (fs
== opDivByZero
)
1596 int parts
= partCount();
1597 integerPart
*x
= new integerPart
[parts
];
1599 fs
= V
.convertToInteger(x
, parts
* integerPartWidth
, true,
1600 rmNearestTiesToEven
, &ignored
);
1601 if (fs
==opInvalidOp
)
1604 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1605 rmNearestTiesToEven
);
1606 assert(fs
==opOK
); // should always work
1608 fs
= V
.multiply(rhs
, rmNearestTiesToEven
);
1609 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1611 fs
= subtract(V
, rmNearestTiesToEven
);
1612 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1615 sign
= origSign
; // IEEE754 requires this
1620 /* Normalized llvm frem (C fmod).
1621 This is not currently correct in all cases. */
1623 APFloat::mod(const APFloat
&rhs
, roundingMode rounding_mode
)
1626 assertArithmeticOK(*semantics
);
1627 fs
= modSpecials(rhs
);
1629 if (category
== fcNormal
&& rhs
.category
== fcNormal
) {
1631 unsigned int origSign
= sign
;
1633 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1634 if (fs
== opDivByZero
)
1637 int parts
= partCount();
1638 integerPart
*x
= new integerPart
[parts
];
1640 fs
= V
.convertToInteger(x
, parts
* integerPartWidth
, true,
1641 rmTowardZero
, &ignored
);
1642 if (fs
==opInvalidOp
)
1645 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1646 rmNearestTiesToEven
);
1647 assert(fs
==opOK
); // should always work
1649 fs
= V
.multiply(rhs
, rounding_mode
);
1650 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1652 fs
= subtract(V
, rounding_mode
);
1653 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1656 sign
= origSign
; // IEEE754 requires this
1662 /* Normalized fused-multiply-add. */
1664 APFloat::fusedMultiplyAdd(const APFloat
&multiplicand
,
1665 const APFloat
&addend
,
1666 roundingMode rounding_mode
)
1670 assertArithmeticOK(*semantics
);
1672 /* Post-multiplication sign, before addition. */
1673 sign
^= multiplicand
.sign
;
1675 /* If and only if all arguments are normal do we need to do an
1676 extended-precision calculation. */
1677 if(category
== fcNormal
1678 && multiplicand
.category
== fcNormal
1679 && addend
.category
== fcNormal
) {
1680 lostFraction lost_fraction
;
1682 lost_fraction
= multiplySignificand(multiplicand
, &addend
);
1683 fs
= normalize(rounding_mode
, lost_fraction
);
1684 if(lost_fraction
!= lfExactlyZero
)
1685 fs
= (opStatus
) (fs
| opInexact
);
1687 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1688 positive zero unless rounding to minus infinity, except that
1689 adding two like-signed zeroes gives that zero. */
1690 if(category
== fcZero
&& sign
!= addend
.sign
)
1691 sign
= (rounding_mode
== rmTowardNegative
);
1693 fs
= multiplySpecials(multiplicand
);
1695 /* FS can only be opOK or opInvalidOp. There is no more work
1696 to do in the latter case. The IEEE-754R standard says it is
1697 implementation-defined in this case whether, if ADDEND is a
1698 quiet NaN, we raise invalid op; this implementation does so.
1700 If we need to do the addition we can do so with normal
1703 fs
= addOrSubtract(addend
, rounding_mode
, false);
1709 /* Comparison requires normalized numbers. */
1711 APFloat::compare(const APFloat
&rhs
) const
1715 assertArithmeticOK(*semantics
);
1716 assert(semantics
== rhs
.semantics
);
1718 switch (convolve(category
, rhs
.category
)) {
1720 llvm_unreachable(0);
1722 case convolve(fcNaN
, fcZero
):
1723 case convolve(fcNaN
, fcNormal
):
1724 case convolve(fcNaN
, fcInfinity
):
1725 case convolve(fcNaN
, fcNaN
):
1726 case convolve(fcZero
, fcNaN
):
1727 case convolve(fcNormal
, fcNaN
):
1728 case convolve(fcInfinity
, fcNaN
):
1729 return cmpUnordered
;
1731 case convolve(fcInfinity
, fcNormal
):
1732 case convolve(fcInfinity
, fcZero
):
1733 case convolve(fcNormal
, fcZero
):
1737 return cmpGreaterThan
;
1739 case convolve(fcNormal
, fcInfinity
):
1740 case convolve(fcZero
, fcInfinity
):
1741 case convolve(fcZero
, fcNormal
):
1743 return cmpGreaterThan
;
1747 case convolve(fcInfinity
, fcInfinity
):
1748 if(sign
== rhs
.sign
)
1753 return cmpGreaterThan
;
1755 case convolve(fcZero
, fcZero
):
1758 case convolve(fcNormal
, fcNormal
):
1762 /* Two normal numbers. Do they have the same sign? */
1763 if(sign
!= rhs
.sign
) {
1765 result
= cmpLessThan
;
1767 result
= cmpGreaterThan
;
1769 /* Compare absolute values; invert result if negative. */
1770 result
= compareAbsoluteValue(rhs
);
1773 if(result
== cmpLessThan
)
1774 result
= cmpGreaterThan
;
1775 else if(result
== cmpGreaterThan
)
1776 result
= cmpLessThan
;
1783 /// APFloat::convert - convert a value of one floating point type to another.
1784 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1785 /// records whether the transformation lost information, i.e. whether
1786 /// converting the result back to the original type will produce the
1787 /// original value (this is almost the same as return value==fsOK, but there
1788 /// are edge cases where this is not so).
1791 APFloat::convert(const fltSemantics
&toSemantics
,
1792 roundingMode rounding_mode
, bool *losesInfo
)
1794 lostFraction lostFraction
;
1795 unsigned int newPartCount
, oldPartCount
;
1798 assertArithmeticOK(*semantics
);
1799 assertArithmeticOK(toSemantics
);
1800 lostFraction
= lfExactlyZero
;
1801 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
1802 oldPartCount
= partCount();
1804 /* Handle storage complications. If our new form is wider,
1805 re-allocate our bit pattern into wider storage. If it is
1806 narrower, we ignore the excess parts, but if narrowing to a
1807 single part we need to free the old storage.
1808 Be careful not to reference significandParts for zeroes
1809 and infinities, since it aborts. */
1810 if (newPartCount
> oldPartCount
) {
1811 integerPart
*newParts
;
1812 newParts
= new integerPart
[newPartCount
];
1813 APInt::tcSet(newParts
, 0, newPartCount
);
1814 if (category
==fcNormal
|| category
==fcNaN
)
1815 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
1817 significand
.parts
= newParts
;
1818 } else if (newPartCount
< oldPartCount
) {
1819 /* Capture any lost fraction through truncation of parts so we get
1820 correct rounding whilst normalizing. */
1821 if (category
==fcNormal
)
1822 lostFraction
= lostFractionThroughTruncation
1823 (significandParts(), oldPartCount
, toSemantics
.precision
);
1824 if (newPartCount
== 1) {
1825 integerPart newPart
= 0;
1826 if (category
==fcNormal
|| category
==fcNaN
)
1827 newPart
= significandParts()[0];
1829 significand
.part
= newPart
;
1833 if(category
== fcNormal
) {
1834 /* Re-interpret our bit-pattern. */
1835 exponent
+= toSemantics
.precision
- semantics
->precision
;
1836 semantics
= &toSemantics
;
1837 fs
= normalize(rounding_mode
, lostFraction
);
1838 *losesInfo
= (fs
!= opOK
);
1839 } else if (category
== fcNaN
) {
1840 int shift
= toSemantics
.precision
- semantics
->precision
;
1841 // Do this now so significandParts gets the right answer
1842 const fltSemantics
*oldSemantics
= semantics
;
1843 semantics
= &toSemantics
;
1845 // No normalization here, just truncate
1847 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
1848 else if (shift
< 0) {
1849 unsigned ushift
= -shift
;
1850 // Figure out if we are losing information. This happens
1851 // if are shifting out something other than 0s, or if the x87 long
1852 // double input did not have its integer bit set (pseudo-NaN), or if the
1853 // x87 long double input did not have its QNan bit set (because the x87
1854 // hardware sets this bit when converting a lower-precision NaN to
1855 // x87 long double).
1856 if (APInt::tcLSB(significandParts(), newPartCount
) < ushift
)
1858 if (oldSemantics
== &APFloat::x87DoubleExtended
&&
1859 (!(*significandParts() & 0x8000000000000000ULL
) ||
1860 !(*significandParts() & 0x4000000000000000ULL
)))
1862 APInt::tcShiftRight(significandParts(), newPartCount
, ushift
);
1864 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1865 // does not give you back the same bits. This is dubious, and we
1866 // don't currently do it. You're really supposed to get
1867 // an invalid operation signal at runtime, but nobody does that.
1870 semantics
= &toSemantics
;
1878 /* Convert a floating point number to an integer according to the
1879 rounding mode. If the rounded integer value is out of range this
1880 returns an invalid operation exception and the contents of the
1881 destination parts are unspecified. If the rounded value is in
1882 range but the floating point number is not the exact integer, the C
1883 standard doesn't require an inexact exception to be raised. IEEE
1884 854 does require it so we do that.
1886 Note that for conversions to integer type the C standard requires
1887 round-to-zero to always be used. */
1889 APFloat::convertToSignExtendedInteger(integerPart
*parts
, unsigned int width
,
1891 roundingMode rounding_mode
,
1892 bool *isExact
) const
1894 lostFraction lost_fraction
;
1895 const integerPart
*src
;
1896 unsigned int dstPartsCount
, truncatedBits
;
1898 assertArithmeticOK(*semantics
);
1902 /* Handle the three special cases first. */
1903 if(category
== fcInfinity
|| category
== fcNaN
)
1906 dstPartsCount
= partCountForBits(width
);
1908 if(category
== fcZero
) {
1909 APInt::tcSet(parts
, 0, dstPartsCount
);
1910 // Negative zero can't be represented as an int.
1915 src
= significandParts();
1917 /* Step 1: place our absolute value, with any fraction truncated, in
1920 /* Our absolute value is less than one; truncate everything. */
1921 APInt::tcSet(parts
, 0, dstPartsCount
);
1922 /* For exponent -1 the integer bit represents .5, look at that.
1923 For smaller exponents leftmost truncated bit is 0. */
1924 truncatedBits
= semantics
->precision
-1U - exponent
;
1926 /* We want the most significant (exponent + 1) bits; the rest are
1928 unsigned int bits
= exponent
+ 1U;
1930 /* Hopelessly large in magnitude? */
1934 if (bits
< semantics
->precision
) {
1935 /* We truncate (semantics->precision - bits) bits. */
1936 truncatedBits
= semantics
->precision
- bits
;
1937 APInt::tcExtract(parts
, dstPartsCount
, src
, bits
, truncatedBits
);
1939 /* We want at least as many bits as are available. */
1940 APInt::tcExtract(parts
, dstPartsCount
, src
, semantics
->precision
, 0);
1941 APInt::tcShiftLeft(parts
, dstPartsCount
, bits
- semantics
->precision
);
1946 /* Step 2: work out any lost fraction, and increment the absolute
1947 value if we would round away from zero. */
1948 if (truncatedBits
) {
1949 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
1951 if (lost_fraction
!= lfExactlyZero
1952 && roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
1953 if (APInt::tcIncrement(parts
, dstPartsCount
))
1954 return opInvalidOp
; /* Overflow. */
1957 lost_fraction
= lfExactlyZero
;
1960 /* Step 3: check if we fit in the destination. */
1961 unsigned int omsb
= APInt::tcMSB(parts
, dstPartsCount
) + 1;
1965 /* Negative numbers cannot be represented as unsigned. */
1969 /* It takes omsb bits to represent the unsigned integer value.
1970 We lose a bit for the sign, but care is needed as the
1971 maximally negative integer is a special case. */
1972 if (omsb
== width
&& APInt::tcLSB(parts
, dstPartsCount
) + 1 != omsb
)
1975 /* This case can happen because of rounding. */
1980 APInt::tcNegate (parts
, dstPartsCount
);
1982 if (omsb
>= width
+ !isSigned
)
1986 if (lost_fraction
== lfExactlyZero
) {
1993 /* Same as convertToSignExtendedInteger, except we provide
1994 deterministic values in case of an invalid operation exception,
1995 namely zero for NaNs and the minimal or maximal value respectively
1996 for underflow or overflow.
1997 The *isExact output tells whether the result is exact, in the sense
1998 that converting it back to the original floating point type produces
1999 the original value. This is almost equivalent to result==opOK,
2000 except for negative zeroes.
2003 APFloat::convertToInteger(integerPart
*parts
, unsigned int width
,
2005 roundingMode rounding_mode
, bool *isExact
) const
2009 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2012 if (fs
== opInvalidOp
) {
2013 unsigned int bits
, dstPartsCount
;
2015 dstPartsCount
= partCountForBits(width
);
2017 if (category
== fcNaN
)
2022 bits
= width
- isSigned
;
2024 APInt::tcSetLeastSignificantBits(parts
, dstPartsCount
, bits
);
2025 if (sign
&& isSigned
)
2026 APInt::tcShiftLeft(parts
, dstPartsCount
, width
- 1);
2032 /* Convert an unsigned integer SRC to a floating point number,
2033 rounding according to ROUNDING_MODE. The sign of the floating
2034 point number is not modified. */
2036 APFloat::convertFromUnsignedParts(const integerPart
*src
,
2037 unsigned int srcCount
,
2038 roundingMode rounding_mode
)
2040 unsigned int omsb
, precision
, dstCount
;
2042 lostFraction lost_fraction
;
2044 assertArithmeticOK(*semantics
);
2045 category
= fcNormal
;
2046 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2047 dst
= significandParts();
2048 dstCount
= partCount();
2049 precision
= semantics
->precision
;
2051 /* We want the most significant PRECISON bits of SRC. There may not
2052 be that many; extract what we can. */
2053 if (precision
<= omsb
) {
2054 exponent
= omsb
- 1;
2055 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2057 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2059 exponent
= precision
- 1;
2060 lost_fraction
= lfExactlyZero
;
2061 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2064 return normalize(rounding_mode
, lost_fraction
);
2068 APFloat::convertFromAPInt(const APInt
&Val
,
2070 roundingMode rounding_mode
)
2072 unsigned int partCount
= Val
.getNumWords();
2076 if (isSigned
&& api
.isNegative()) {
2081 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2084 /* Convert a two's complement integer SRC to a floating point number,
2085 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2086 integer is signed, in which case it must be sign-extended. */
2088 APFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2089 unsigned int srcCount
,
2091 roundingMode rounding_mode
)
2095 assertArithmeticOK(*semantics
);
2097 && APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2100 /* If we're signed and negative negate a copy. */
2102 copy
= new integerPart
[srcCount
];
2103 APInt::tcAssign(copy
, src
, srcCount
);
2104 APInt::tcNegate(copy
, srcCount
);
2105 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2109 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2115 /* FIXME: should this just take a const APInt reference? */
2117 APFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2118 unsigned int width
, bool isSigned
,
2119 roundingMode rounding_mode
)
2121 unsigned int partCount
= partCountForBits(width
);
2122 APInt api
= APInt(width
, partCount
, parts
);
2125 if(isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2130 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2134 APFloat::convertFromHexadecimalString(const StringRef
&s
,
2135 roundingMode rounding_mode
)
2137 lostFraction lost_fraction
= lfExactlyZero
;
2138 integerPart
*significand
;
2139 unsigned int bitPos
, partsCount
;
2140 StringRef::iterator dot
, firstSignificantDigit
;
2144 category
= fcNormal
;
2146 significand
= significandParts();
2147 partsCount
= partCount();
2148 bitPos
= partsCount
* integerPartWidth
;
2150 /* Skip leading zeroes and any (hexa)decimal point. */
2151 StringRef::iterator begin
= s
.begin();
2152 StringRef::iterator end
= s
.end();
2153 StringRef::iterator p
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2154 firstSignificantDigit
= p
;
2157 integerPart hex_value
;
2160 assert(dot
== end
&& "String contains multiple dots");
2167 hex_value
= hexDigitValue(*p
);
2168 if(hex_value
== -1U) {
2177 /* Store the number whilst 4-bit nibbles remain. */
2180 hex_value
<<= bitPos
% integerPartWidth
;
2181 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2183 lost_fraction
= trailingHexadecimalFraction(p
, end
, hex_value
);
2184 while(p
!= end
&& hexDigitValue(*p
) != -1U)
2191 /* Hex floats require an exponent but not a hexadecimal point. */
2192 assert(p
!= end
&& "Hex strings require an exponent");
2193 assert((*p
== 'p' || *p
== 'P') && "Invalid character in significand");
2194 assert(p
!= begin
&& "Significand has no digits");
2195 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
2197 /* Ignore the exponent if we are zero. */
2198 if(p
!= firstSignificantDigit
) {
2201 /* Implicit hexadecimal point? */
2205 /* Calculate the exponent adjustment implicit in the number of
2206 significant digits. */
2207 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2208 if(expAdjustment
< 0)
2210 expAdjustment
= expAdjustment
* 4 - 1;
2212 /* Adjust for writing the significand starting at the most
2213 significant nibble. */
2214 expAdjustment
+= semantics
->precision
;
2215 expAdjustment
-= partsCount
* integerPartWidth
;
2217 /* Adjust for the given exponent. */
2218 exponent
= totalExponent(p
+ 1, end
, expAdjustment
);
2221 return normalize(rounding_mode
, lost_fraction
);
2225 APFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2226 unsigned sigPartCount
, int exp
,
2227 roundingMode rounding_mode
)
2229 unsigned int parts
, pow5PartCount
;
2230 fltSemantics calcSemantics
= { 32767, -32767, 0, true };
2231 integerPart pow5Parts
[maxPowerOfFiveParts
];
2234 isNearest
= (rounding_mode
== rmNearestTiesToEven
2235 || rounding_mode
== rmNearestTiesToAway
);
2237 parts
= partCountForBits(semantics
->precision
+ 11);
2239 /* Calculate pow(5, abs(exp)). */
2240 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2242 for (;; parts
*= 2) {
2243 opStatus sigStatus
, powStatus
;
2244 unsigned int excessPrecision
, truncatedBits
;
2246 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2247 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2248 truncatedBits
= excessPrecision
;
2250 APFloat
decSig(calcSemantics
, fcZero
, sign
);
2251 APFloat
pow5(calcSemantics
, fcZero
, false);
2253 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2254 rmNearestTiesToEven
);
2255 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2256 rmNearestTiesToEven
);
2257 /* Add exp, as 10^n = 5^n * 2^n. */
2258 decSig
.exponent
+= exp
;
2260 lostFraction calcLostFraction
;
2261 integerPart HUerr
, HUdistance
;
2262 unsigned int powHUerr
;
2265 /* multiplySignificand leaves the precision-th bit set to 1. */
2266 calcLostFraction
= decSig
.multiplySignificand(pow5
, NULL
);
2267 powHUerr
= powStatus
!= opOK
;
2269 calcLostFraction
= decSig
.divideSignificand(pow5
);
2270 /* Denormal numbers have less precision. */
2271 if (decSig
.exponent
< semantics
->minExponent
) {
2272 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2273 truncatedBits
= excessPrecision
;
2274 if (excessPrecision
> calcSemantics
.precision
)
2275 excessPrecision
= calcSemantics
.precision
;
2277 /* Extra half-ulp lost in reciprocal of exponent. */
2278 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2281 /* Both multiplySignificand and divideSignificand return the
2282 result with the integer bit set. */
2283 assert (APInt::tcExtractBit
2284 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2286 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2288 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2289 excessPrecision
, isNearest
);
2291 /* Are we guaranteed to round correctly if we truncate? */
2292 if (HUdistance
>= HUerr
) {
2293 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2294 calcSemantics
.precision
- excessPrecision
,
2296 /* Take the exponent of decSig. If we tcExtract-ed less bits
2297 above we must adjust our exponent to compensate for the
2298 implicit right shift. */
2299 exponent
= (decSig
.exponent
+ semantics
->precision
2300 - (calcSemantics
.precision
- excessPrecision
));
2301 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2304 return normalize(rounding_mode
, calcLostFraction
);
2310 APFloat::convertFromDecimalString(const StringRef
&str
, roundingMode rounding_mode
)
2315 /* Scan the text. */
2316 StringRef::iterator p
= str
.begin();
2317 interpretDecimal(p
, str
.end(), &D
);
2319 /* Handle the quick cases. First the case of no significant digits,
2320 i.e. zero, and then exponents that are obviously too large or too
2321 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2322 definitely overflows if
2324 (exp - 1) * L >= maxExponent
2326 and definitely underflows to zero where
2328 (exp + 1) * L <= minExponent - precision
2330 With integer arithmetic the tightest bounds for L are
2332 93/28 < L < 196/59 [ numerator <= 256 ]
2333 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2336 if (decDigitValue(*D
.firstSigDigit
) >= 10U) {
2339 } else if ((D
.normalizedExponent
+ 1) * 28738
2340 <= 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
2341 /* Underflow to zero and round. */
2343 fs
= normalize(rounding_mode
, lfLessThanHalf
);
2344 } else if ((D
.normalizedExponent
- 1) * 42039
2345 >= 12655 * semantics
->maxExponent
) {
2346 /* Overflow and round. */
2347 fs
= handleOverflow(rounding_mode
);
2349 integerPart
*decSignificand
;
2350 unsigned int partCount
;
2352 /* A tight upper bound on number of bits required to hold an
2353 N-digit decimal integer is N * 196 / 59. Allocate enough space
2354 to hold the full significand, and an extra part required by
2356 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
2357 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
2358 decSignificand
= new integerPart
[partCount
+ 1];
2361 /* Convert to binary efficiently - we do almost all multiplication
2362 in an integerPart. When this would overflow do we do a single
2363 bignum multiplication, and then revert again to multiplication
2364 in an integerPart. */
2366 integerPart decValue
, val
, multiplier
;
2374 if (p
== str
.end()) {
2378 decValue
= decDigitValue(*p
++);
2379 assert(decValue
< 10U && "Invalid character in significand");
2381 val
= val
* 10 + decValue
;
2382 /* The maximum number that can be multiplied by ten with any
2383 digit added without overflowing an integerPart. */
2384 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
2386 /* Multiply out the current part. */
2387 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
2388 partCount
, partCount
+ 1, false);
2390 /* If we used another part (likely but not guaranteed), increase
2392 if (decSignificand
[partCount
])
2394 } while (p
<= D
.lastSigDigit
);
2396 category
= fcNormal
;
2397 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
2398 D
.exponent
, rounding_mode
);
2400 delete [] decSignificand
;
2407 APFloat::convertFromString(const StringRef
&str
, roundingMode rounding_mode
)
2409 assertArithmeticOK(*semantics
);
2410 assert(!str
.empty() && "Invalid string length");
2412 /* Handle a leading minus sign. */
2413 StringRef::iterator p
= str
.begin();
2414 size_t slen
= str
.size();
2415 sign
= *p
== '-' ? 1 : 0;
2416 if(*p
== '-' || *p
== '+') {
2419 assert(slen
&& "String has no digits");
2422 if(slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2423 assert(slen
- 2 && "Invalid string");
2424 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
2428 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
2431 /* Write out a hexadecimal representation of the floating point value
2432 to DST, which must be of sufficient size, in the C99 form
2433 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2434 excluding the terminating NUL.
2436 If UPPERCASE, the output is in upper case, otherwise in lower case.
2438 HEXDIGITS digits appear altogether, rounding the value if
2439 necessary. If HEXDIGITS is 0, the minimal precision to display the
2440 number precisely is used instead. If nothing would appear after
2441 the decimal point it is suppressed.
2443 The decimal exponent is always printed and has at least one digit.
2444 Zero values display an exponent of zero. Infinities and NaNs
2445 appear as "infinity" or "nan" respectively.
2447 The above rules are as specified by C99. There is ambiguity about
2448 what the leading hexadecimal digit should be. This implementation
2449 uses whatever is necessary so that the exponent is displayed as
2450 stored. This implies the exponent will fall within the IEEE format
2451 range, and the leading hexadecimal digit will be 0 (for denormals),
2452 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2453 any other digits zero).
2456 APFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
2457 bool upperCase
, roundingMode rounding_mode
) const
2461 assertArithmeticOK(*semantics
);
2469 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
2470 dst
+= sizeof infinityL
- 1;
2474 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
2475 dst
+= sizeof NaNU
- 1;
2480 *dst
++ = upperCase
? 'X': 'x';
2482 if (hexDigits
> 1) {
2484 memset (dst
, '0', hexDigits
- 1);
2485 dst
+= hexDigits
- 1;
2487 *dst
++ = upperCase
? 'P': 'p';
2492 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
2498 return static_cast<unsigned int>(dst
- p
);
2501 /* Does the hard work of outputting the correctly rounded hexadecimal
2502 form of a normal floating point number with the specified number of
2503 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2504 digits necessary to print the value precisely is output. */
2506 APFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
2508 roundingMode rounding_mode
) const
2510 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
2511 const char *hexDigitChars
;
2512 const integerPart
*significand
;
2517 *dst
++ = upperCase
? 'X': 'x';
2520 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
2522 significand
= significandParts();
2523 partsCount
= partCount();
2525 /* +3 because the first digit only uses the single integer bit, so
2526 we have 3 virtual zero most-significant-bits. */
2527 valueBits
= semantics
->precision
+ 3;
2528 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
2530 /* The natural number of digits required ignoring trailing
2531 insignificant zeroes. */
2532 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
2534 /* hexDigits of zero means use the required number for the
2535 precision. Otherwise, see if we are truncating. If we are,
2536 find out if we need to round away from zero. */
2538 if (hexDigits
< outputDigits
) {
2539 /* We are dropping non-zero bits, so need to check how to round.
2540 "bits" is the number of dropped bits. */
2542 lostFraction fraction
;
2544 bits
= valueBits
- hexDigits
* 4;
2545 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
2546 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
2548 outputDigits
= hexDigits
;
2551 /* Write the digits consecutively, and start writing in the location
2552 of the hexadecimal point. We move the most significant digit
2553 left and add the hexadecimal point later. */
2556 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
2558 while (outputDigits
&& count
) {
2561 /* Put the most significant integerPartWidth bits in "part". */
2562 if (--count
== partsCount
)
2563 part
= 0; /* An imaginary higher zero part. */
2565 part
= significand
[count
] << shift
;
2568 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
2570 /* Convert as much of "part" to hexdigits as we can. */
2571 unsigned int curDigits
= integerPartWidth
/ 4;
2573 if (curDigits
> outputDigits
)
2574 curDigits
= outputDigits
;
2575 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
2576 outputDigits
-= curDigits
;
2582 /* Note that hexDigitChars has a trailing '0'. */
2585 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
2586 } while (*q
== '0');
2589 /* Add trailing zeroes. */
2590 memset (dst
, '0', outputDigits
);
2591 dst
+= outputDigits
;
2594 /* Move the most significant digit to before the point, and if there
2595 is something after the decimal point add it. This must come
2596 after rounding above. */
2603 /* Finally output the exponent. */
2604 *dst
++ = upperCase
? 'P': 'p';
2606 return writeSignedDecimal (dst
, exponent
);
2609 // For good performance it is desirable for different APFloats
2610 // to produce different integers.
2612 APFloat::getHashValue() const
2614 if (category
==fcZero
) return sign
<<8 | semantics
->precision
;
2615 else if (category
==fcInfinity
) return sign
<<9 | semantics
->precision
;
2616 else if (category
==fcNaN
) return 1<<10 | semantics
->precision
;
2618 uint32_t hash
= sign
<<11 | semantics
->precision
| exponent
<<12;
2619 const integerPart
* p
= significandParts();
2620 for (int i
=partCount(); i
>0; i
--, p
++)
2621 hash
^= ((uint32_t)*p
) ^ (uint32_t)((*p
)>>32);
2626 // Conversion from APFloat to/from host float/double. It may eventually be
2627 // possible to eliminate these and have everybody deal with APFloats, but that
2628 // will take a while. This approach will not easily extend to long double.
2629 // Current implementation requires integerPartWidth==64, which is correct at
2630 // the moment but could be made more general.
2632 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2633 // the actual IEEE respresentations. We compensate for that here.
2636 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2638 assert(semantics
== (const llvm::fltSemantics
*)&x87DoubleExtended
);
2639 assert (partCount()==2);
2641 uint64_t myexponent
, mysignificand
;
2643 if (category
==fcNormal
) {
2644 myexponent
= exponent
+16383; //bias
2645 mysignificand
= significandParts()[0];
2646 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
2647 myexponent
= 0; // denormal
2648 } else if (category
==fcZero
) {
2651 } else if (category
==fcInfinity
) {
2652 myexponent
= 0x7fff;
2653 mysignificand
= 0x8000000000000000ULL
;
2655 assert(category
== fcNaN
&& "Unknown category");
2656 myexponent
= 0x7fff;
2657 mysignificand
= significandParts()[0];
2661 words
[0] = mysignificand
;
2662 words
[1] = ((uint64_t)(sign
& 1) << 15) |
2663 (myexponent
& 0x7fffLL
);
2664 return APInt(80, 2, words
);
2668 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2670 assert(semantics
== (const llvm::fltSemantics
*)&PPCDoubleDouble
);
2671 assert (partCount()==2);
2673 uint64_t myexponent
, mysignificand
, myexponent2
, mysignificand2
;
2675 if (category
==fcNormal
) {
2676 myexponent
= exponent
+ 1023; //bias
2677 myexponent2
= exponent2
+ 1023;
2678 mysignificand
= significandParts()[0];
2679 mysignificand2
= significandParts()[1];
2680 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2681 myexponent
= 0; // denormal
2682 if (myexponent2
==1 && !(mysignificand2
& 0x10000000000000LL
))
2683 myexponent2
= 0; // denormal
2684 } else if (category
==fcZero
) {
2689 } else if (category
==fcInfinity
) {
2695 assert(category
== fcNaN
&& "Unknown category");
2697 mysignificand
= significandParts()[0];
2698 myexponent2
= exponent2
;
2699 mysignificand2
= significandParts()[1];
2703 words
[0] = ((uint64_t)(sign
& 1) << 63) |
2704 ((myexponent
& 0x7ff) << 52) |
2705 (mysignificand
& 0xfffffffffffffLL
);
2706 words
[1] = ((uint64_t)(sign2
& 1) << 63) |
2707 ((myexponent2
& 0x7ff) << 52) |
2708 (mysignificand2
& 0xfffffffffffffLL
);
2709 return APInt(128, 2, words
);
2713 APFloat::convertQuadrupleAPFloatToAPInt() const
2715 assert(semantics
== (const llvm::fltSemantics
*)&IEEEquad
);
2716 assert (partCount()==2);
2718 uint64_t myexponent
, mysignificand
, mysignificand2
;
2720 if (category
==fcNormal
) {
2721 myexponent
= exponent
+16383; //bias
2722 mysignificand
= significandParts()[0];
2723 mysignificand2
= significandParts()[1];
2724 if (myexponent
==1 && !(mysignificand2
& 0x1000000000000LL
))
2725 myexponent
= 0; // denormal
2726 } else if (category
==fcZero
) {
2728 mysignificand
= mysignificand2
= 0;
2729 } else if (category
==fcInfinity
) {
2730 myexponent
= 0x7fff;
2731 mysignificand
= mysignificand2
= 0;
2733 assert(category
== fcNaN
&& "Unknown category!");
2734 myexponent
= 0x7fff;
2735 mysignificand
= significandParts()[0];
2736 mysignificand2
= significandParts()[1];
2740 words
[0] = mysignificand
;
2741 words
[1] = ((uint64_t)(sign
& 1) << 63) |
2742 ((myexponent
& 0x7fff) << 48) |
2743 (mysignificand2
& 0xffffffffffffLL
);
2745 return APInt(128, 2, words
);
2749 APFloat::convertDoubleAPFloatToAPInt() const
2751 assert(semantics
== (const llvm::fltSemantics
*)&IEEEdouble
);
2752 assert (partCount()==1);
2754 uint64_t myexponent
, mysignificand
;
2756 if (category
==fcNormal
) {
2757 myexponent
= exponent
+1023; //bias
2758 mysignificand
= *significandParts();
2759 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2760 myexponent
= 0; // denormal
2761 } else if (category
==fcZero
) {
2764 } else if (category
==fcInfinity
) {
2768 assert(category
== fcNaN
&& "Unknown category!");
2770 mysignificand
= *significandParts();
2773 return APInt(64, ((((uint64_t)(sign
& 1) << 63) |
2774 ((myexponent
& 0x7ff) << 52) |
2775 (mysignificand
& 0xfffffffffffffLL
))));
2779 APFloat::convertFloatAPFloatToAPInt() const
2781 assert(semantics
== (const llvm::fltSemantics
*)&IEEEsingle
);
2782 assert (partCount()==1);
2784 uint32_t myexponent
, mysignificand
;
2786 if (category
==fcNormal
) {
2787 myexponent
= exponent
+127; //bias
2788 mysignificand
= (uint32_t)*significandParts();
2789 if (myexponent
== 1 && !(mysignificand
& 0x800000))
2790 myexponent
= 0; // denormal
2791 } else if (category
==fcZero
) {
2794 } else if (category
==fcInfinity
) {
2798 assert(category
== fcNaN
&& "Unknown category!");
2800 mysignificand
= (uint32_t)*significandParts();
2803 return APInt(32, (((sign
&1) << 31) | ((myexponent
&0xff) << 23) |
2804 (mysignificand
& 0x7fffff)));
2807 // This function creates an APInt that is just a bit map of the floating
2808 // point constant as it would appear in memory. It is not a conversion,
2809 // and treating the result as a normal integer is unlikely to be useful.
2812 APFloat::bitcastToAPInt() const
2814 if (semantics
== (const llvm::fltSemantics
*)&IEEEsingle
)
2815 return convertFloatAPFloatToAPInt();
2817 if (semantics
== (const llvm::fltSemantics
*)&IEEEdouble
)
2818 return convertDoubleAPFloatToAPInt();
2820 if (semantics
== (const llvm::fltSemantics
*)&IEEEquad
)
2821 return convertQuadrupleAPFloatToAPInt();
2823 if (semantics
== (const llvm::fltSemantics
*)&PPCDoubleDouble
)
2824 return convertPPCDoubleDoubleAPFloatToAPInt();
2826 assert(semantics
== (const llvm::fltSemantics
*)&x87DoubleExtended
&&
2828 return convertF80LongDoubleAPFloatToAPInt();
2832 APFloat::convertToFloat() const
2834 assert(semantics
== (const llvm::fltSemantics
*)&IEEEsingle
&& "Float semantics are not IEEEsingle");
2835 APInt api
= bitcastToAPInt();
2836 return api
.bitsToFloat();
2840 APFloat::convertToDouble() const
2842 assert(semantics
== (const llvm::fltSemantics
*)&IEEEdouble
&& "Float semantics are not IEEEdouble");
2843 APInt api
= bitcastToAPInt();
2844 return api
.bitsToDouble();
2847 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2848 /// does not support these bit patterns:
2849 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2850 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2851 /// exponent = 0, integer bit 1 ("pseudodenormal")
2852 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2853 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2855 APFloat::initFromF80LongDoubleAPInt(const APInt
&api
)
2857 assert(api
.getBitWidth()==80);
2858 uint64_t i1
= api
.getRawData()[0];
2859 uint64_t i2
= api
.getRawData()[1];
2860 uint64_t myexponent
= (i2
& 0x7fff);
2861 uint64_t mysignificand
= i1
;
2863 initialize(&APFloat::x87DoubleExtended
);
2864 assert(partCount()==2);
2866 sign
= static_cast<unsigned int>(i2
>>15);
2867 if (myexponent
==0 && mysignificand
==0) {
2868 // exponent, significand meaningless
2870 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
2871 // exponent, significand meaningless
2872 category
= fcInfinity
;
2873 } else if (myexponent
==0x7fff && mysignificand
!=0x8000000000000000ULL
) {
2874 // exponent meaningless
2876 significandParts()[0] = mysignificand
;
2877 significandParts()[1] = 0;
2879 category
= fcNormal
;
2880 exponent
= myexponent
- 16383;
2881 significandParts()[0] = mysignificand
;
2882 significandParts()[1] = 0;
2883 if (myexponent
==0) // denormal
2889 APFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
)
2891 assert(api
.getBitWidth()==128);
2892 uint64_t i1
= api
.getRawData()[0];
2893 uint64_t i2
= api
.getRawData()[1];
2894 uint64_t myexponent
= (i1
>> 52) & 0x7ff;
2895 uint64_t mysignificand
= i1
& 0xfffffffffffffLL
;
2896 uint64_t myexponent2
= (i2
>> 52) & 0x7ff;
2897 uint64_t mysignificand2
= i2
& 0xfffffffffffffLL
;
2899 initialize(&APFloat::PPCDoubleDouble
);
2900 assert(partCount()==2);
2902 sign
= static_cast<unsigned int>(i1
>>63);
2903 sign2
= static_cast<unsigned int>(i2
>>63);
2904 if (myexponent
==0 && mysignificand
==0) {
2905 // exponent, significand meaningless
2906 // exponent2 and significand2 are required to be 0; we don't check
2908 } else if (myexponent
==0x7ff && mysignificand
==0) {
2909 // exponent, significand meaningless
2910 // exponent2 and significand2 are required to be 0; we don't check
2911 category
= fcInfinity
;
2912 } else if (myexponent
==0x7ff && mysignificand
!=0) {
2913 // exponent meaningless. So is the whole second word, but keep it
2916 exponent2
= myexponent2
;
2917 significandParts()[0] = mysignificand
;
2918 significandParts()[1] = mysignificand2
;
2920 category
= fcNormal
;
2921 // Note there is no category2; the second word is treated as if it is
2922 // fcNormal, although it might be something else considered by itself.
2923 exponent
= myexponent
- 1023;
2924 exponent2
= myexponent2
- 1023;
2925 significandParts()[0] = mysignificand
;
2926 significandParts()[1] = mysignificand2
;
2927 if (myexponent
==0) // denormal
2930 significandParts()[0] |= 0x10000000000000LL
; // integer bit
2934 significandParts()[1] |= 0x10000000000000LL
; // integer bit
2939 APFloat::initFromQuadrupleAPInt(const APInt
&api
)
2941 assert(api
.getBitWidth()==128);
2942 uint64_t i1
= api
.getRawData()[0];
2943 uint64_t i2
= api
.getRawData()[1];
2944 uint64_t myexponent
= (i2
>> 48) & 0x7fff;
2945 uint64_t mysignificand
= i1
;
2946 uint64_t mysignificand2
= i2
& 0xffffffffffffLL
;
2948 initialize(&APFloat::IEEEquad
);
2949 assert(partCount()==2);
2951 sign
= static_cast<unsigned int>(i2
>>63);
2952 if (myexponent
==0 &&
2953 (mysignificand
==0 && mysignificand2
==0)) {
2954 // exponent, significand meaningless
2956 } else if (myexponent
==0x7fff &&
2957 (mysignificand
==0 && mysignificand2
==0)) {
2958 // exponent, significand meaningless
2959 category
= fcInfinity
;
2960 } else if (myexponent
==0x7fff &&
2961 (mysignificand
!=0 || mysignificand2
!=0)) {
2962 // exponent meaningless
2964 significandParts()[0] = mysignificand
;
2965 significandParts()[1] = mysignificand2
;
2967 category
= fcNormal
;
2968 exponent
= myexponent
- 16383;
2969 significandParts()[0] = mysignificand
;
2970 significandParts()[1] = mysignificand2
;
2971 if (myexponent
==0) // denormal
2974 significandParts()[1] |= 0x1000000000000LL
; // integer bit
2979 APFloat::initFromDoubleAPInt(const APInt
&api
)
2981 assert(api
.getBitWidth()==64);
2982 uint64_t i
= *api
.getRawData();
2983 uint64_t myexponent
= (i
>> 52) & 0x7ff;
2984 uint64_t mysignificand
= i
& 0xfffffffffffffLL
;
2986 initialize(&APFloat::IEEEdouble
);
2987 assert(partCount()==1);
2989 sign
= static_cast<unsigned int>(i
>>63);
2990 if (myexponent
==0 && mysignificand
==0) {
2991 // exponent, significand meaningless
2993 } else if (myexponent
==0x7ff && mysignificand
==0) {
2994 // exponent, significand meaningless
2995 category
= fcInfinity
;
2996 } else if (myexponent
==0x7ff && mysignificand
!=0) {
2997 // exponent meaningless
2999 *significandParts() = mysignificand
;
3001 category
= fcNormal
;
3002 exponent
= myexponent
- 1023;
3003 *significandParts() = mysignificand
;
3004 if (myexponent
==0) // denormal
3007 *significandParts() |= 0x10000000000000LL
; // integer bit
3012 APFloat::initFromFloatAPInt(const APInt
& api
)
3014 assert(api
.getBitWidth()==32);
3015 uint32_t i
= (uint32_t)*api
.getRawData();
3016 uint32_t myexponent
= (i
>> 23) & 0xff;
3017 uint32_t mysignificand
= i
& 0x7fffff;
3019 initialize(&APFloat::IEEEsingle
);
3020 assert(partCount()==1);
3023 if (myexponent
==0 && mysignificand
==0) {
3024 // exponent, significand meaningless
3026 } else if (myexponent
==0xff && mysignificand
==0) {
3027 // exponent, significand meaningless
3028 category
= fcInfinity
;
3029 } else if (myexponent
==0xff && mysignificand
!=0) {
3030 // sign, exponent, significand meaningless
3032 *significandParts() = mysignificand
;
3034 category
= fcNormal
;
3035 exponent
= myexponent
- 127; //bias
3036 *significandParts() = mysignificand
;
3037 if (myexponent
==0) // denormal
3040 *significandParts() |= 0x800000; // integer bit
3044 /// Treat api as containing the bits of a floating point number. Currently
3045 /// we infer the floating point type from the size of the APInt. The
3046 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3047 /// when the size is anything else).
3049 APFloat::initFromAPInt(const APInt
& api
, bool isIEEE
)
3051 if (api
.getBitWidth() == 32)
3052 return initFromFloatAPInt(api
);
3053 else if (api
.getBitWidth()==64)
3054 return initFromDoubleAPInt(api
);
3055 else if (api
.getBitWidth()==80)
3056 return initFromF80LongDoubleAPInt(api
);
3057 else if (api
.getBitWidth()==128)
3059 initFromQuadrupleAPInt(api
) : initFromPPCDoubleDoubleAPInt(api
));
3061 llvm_unreachable(0);
3064 APFloat::APFloat(const APInt
& api
, bool isIEEE
)
3066 initFromAPInt(api
, isIEEE
);
3069 APFloat::APFloat(float f
)
3071 APInt api
= APInt(32, 0);
3072 initFromAPInt(api
.floatToBits(f
));
3075 APFloat::APFloat(double d
)
3077 APInt api
= APInt(64, 0);
3078 initFromAPInt(api
.doubleToBits(d
));