1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
25 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
27 /* Assumed in hexadecimal significand parsing, and conversion to
28 hexadecimal strings. */
29 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
30 COMPILE_TIME_ASSERT(integerPartWidth
% 4 == 0);
34 /* Represents floating point arithmetic semantics. */
36 /* The largest E such that 2^E is representable; this matches the
37 definition of IEEE 754. */
38 exponent_t maxExponent
;
40 /* The smallest E such that 2^E is a normalized number; this
41 matches the definition of IEEE 754. */
42 exponent_t minExponent
;
44 /* Number of bits in the significand. This includes the integer
46 unsigned int precision
;
48 /* True if arithmetic is supported. */
49 unsigned int arithmeticOK
;
52 const fltSemantics
APFloat::IEEEhalf
= { 15, -14, 11, true };
53 const fltSemantics
APFloat::IEEEsingle
= { 127, -126, 24, true };
54 const fltSemantics
APFloat::IEEEdouble
= { 1023, -1022, 53, true };
55 const fltSemantics
APFloat::IEEEquad
= { 16383, -16382, 113, true };
56 const fltSemantics
APFloat::x87DoubleExtended
= { 16383, -16382, 64, true };
57 const fltSemantics
APFloat::Bogus
= { 0, 0, 0, true };
59 // The PowerPC format consists of two doubles. It does not map cleanly
60 // onto the usual format above. For now only storage of constants of
61 // this type is supported, no arithmetic.
62 const fltSemantics
APFloat::PPCDoubleDouble
= { 1023, -1022, 106, false };
64 /* A tight upper bound on number of parts required to hold the value
67 power * 815 / (351 * integerPartWidth) + 1
69 However, whilst the result may require only this many parts,
70 because we are multiplying two values to get it, the
71 multiplication may require an extra part with the excess part
72 being zero (consider the trivial case of 1 * 1, tcFullMultiply
73 requires two parts to hold the single-part result). So we add an
74 extra one to guarantee enough space whilst multiplying. */
75 const unsigned int maxExponent
= 16383;
76 const unsigned int maxPrecision
= 113;
77 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
78 const unsigned int maxPowerOfFiveParts
= 2 + ((maxPowerOfFiveExponent
* 815)
79 / (351 * integerPartWidth
));
82 /* A bunch of private, handy routines. */
84 static inline unsigned int
85 partCountForBits(unsigned int bits
)
87 return ((bits
) + integerPartWidth
- 1) / integerPartWidth
;
90 /* Returns 0U-9U. Return values >= 10U are not digits. */
91 static inline unsigned int
92 decDigitValue(unsigned int c
)
98 hexDigitValue(unsigned int c
)
118 assertArithmeticOK(const llvm::fltSemantics
&semantics
) {
119 assert(semantics
.arithmeticOK
&&
120 "Compile-time arithmetic does not support these semantics");
123 /* Return the value of a decimal exponent of the form
126 If the exponent overflows, returns a large exponent with the
129 readExponent(StringRef::iterator begin
, StringRef::iterator end
)
132 unsigned int absExponent
;
133 const unsigned int overlargeExponent
= 24000; /* FIXME. */
134 StringRef::iterator p
= begin
;
136 assert(p
!= end
&& "Exponent has no digits");
138 isNegative
= (*p
== '-');
139 if (*p
== '-' || *p
== '+') {
141 assert(p
!= end
&& "Exponent has no digits");
144 absExponent
= decDigitValue(*p
++);
145 assert(absExponent
< 10U && "Invalid character in exponent");
147 for (; p
!= end
; ++p
) {
150 value
= decDigitValue(*p
);
151 assert(value
< 10U && "Invalid character in exponent");
153 value
+= absExponent
* 10;
154 if (absExponent
>= overlargeExponent
) {
155 absExponent
= overlargeExponent
;
156 p
= end
; /* outwit assert below */
162 assert(p
== end
&& "Invalid exponent in exponent");
165 return -(int) absExponent
;
167 return (int) absExponent
;
170 /* This is ugly and needs cleaning up, but I don't immediately see
171 how whilst remaining safe. */
173 totalExponent(StringRef::iterator p
, StringRef::iterator end
,
174 int exponentAdjustment
)
176 int unsignedExponent
;
177 bool negative
, overflow
;
180 assert(p
!= end
&& "Exponent has no digits");
182 negative
= *p
== '-';
183 if (*p
== '-' || *p
== '+') {
185 assert(p
!= end
&& "Exponent has no digits");
188 unsignedExponent
= 0;
190 for (; p
!= end
; ++p
) {
193 value
= decDigitValue(*p
);
194 assert(value
< 10U && "Invalid character in exponent");
196 unsignedExponent
= unsignedExponent
* 10 + value
;
197 if (unsignedExponent
> 32767)
201 if (exponentAdjustment
> 32767 || exponentAdjustment
< -32768)
205 exponent
= unsignedExponent
;
207 exponent
= -exponent
;
208 exponent
+= exponentAdjustment
;
209 if (exponent
> 32767 || exponent
< -32768)
214 exponent
= negative
? -32768: 32767;
219 static StringRef::iterator
220 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
221 StringRef::iterator
*dot
)
223 StringRef::iterator p
= begin
;
225 while (*p
== '0' && p
!= end
)
231 assert(end
- begin
!= 1 && "Significand has no digits");
233 while (*p
== '0' && p
!= end
)
240 /* Given a normal decimal floating point number of the form
244 where the decimal point and exponent are optional, fill out the
245 structure D. Exponent is appropriate if the significand is
246 treated as an integer, and normalizedExponent if the significand
247 is taken to have the decimal point after a single leading
250 If the value is zero, V->firstSigDigit points to a non-digit, and
251 the return exponent is zero.
254 const char *firstSigDigit
;
255 const char *lastSigDigit
;
257 int normalizedExponent
;
261 interpretDecimal(StringRef::iterator begin
, StringRef::iterator end
,
264 StringRef::iterator dot
= end
;
265 StringRef::iterator p
= skipLeadingZeroesAndAnyDot (begin
, end
, &dot
);
267 D
->firstSigDigit
= p
;
269 D
->normalizedExponent
= 0;
271 for (; p
!= end
; ++p
) {
273 assert(dot
== end
&& "String contains multiple dots");
278 if (decDigitValue(*p
) >= 10U)
283 assert((*p
== 'e' || *p
== 'E') && "Invalid character in significand");
284 assert(p
!= begin
&& "Significand has no digits");
285 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
287 /* p points to the first non-digit in the string */
288 D
->exponent
= readExponent(p
+ 1, end
);
290 /* Implied decimal point? */
295 /* If number is all zeroes accept any exponent. */
296 if (p
!= D
->firstSigDigit
) {
297 /* Drop insignificant trailing zeroes. */
302 while (p
!= begin
&& *p
== '0');
303 while (p
!= begin
&& *p
== '.');
306 /* Adjust the exponents for any decimal point. */
307 D
->exponent
+= static_cast<exponent_t
>((dot
- p
) - (dot
> p
));
308 D
->normalizedExponent
= (D
->exponent
+
309 static_cast<exponent_t
>((p
- D
->firstSigDigit
)
310 - (dot
> D
->firstSigDigit
&& dot
< p
)));
316 /* Return the trailing fraction of a hexadecimal number.
317 DIGITVALUE is the first hex digit of the fraction, P points to
320 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
321 unsigned int digitValue
)
323 unsigned int hexDigit
;
325 /* If the first trailing digit isn't 0 or 8 we can work out the
326 fraction immediately. */
328 return lfMoreThanHalf
;
329 else if (digitValue
< 8 && digitValue
> 0)
330 return lfLessThanHalf
;
332 /* Otherwise we need to find the first non-zero digit. */
336 assert(p
!= end
&& "Invalid trailing hexadecimal fraction!");
338 hexDigit
= hexDigitValue(*p
);
340 /* If we ran off the end it is exactly zero or one-half, otherwise
343 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
345 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
348 /* Return the fraction lost were a bignum truncated losing the least
349 significant BITS bits. */
351 lostFractionThroughTruncation(const integerPart
*parts
,
352 unsigned int partCount
,
357 lsb
= APInt::tcLSB(parts
, partCount
);
359 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
361 return lfExactlyZero
;
363 return lfExactlyHalf
;
364 if (bits
<= partCount
* integerPartWidth
&&
365 APInt::tcExtractBit(parts
, bits
- 1))
366 return lfMoreThanHalf
;
368 return lfLessThanHalf
;
371 /* Shift DST right BITS bits noting lost fraction. */
373 shiftRight(integerPart
*dst
, unsigned int parts
, unsigned int bits
)
375 lostFraction lost_fraction
;
377 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
379 APInt::tcShiftRight(dst
, parts
, bits
);
381 return lost_fraction
;
384 /* Combine the effect of two lost fractions. */
386 combineLostFractions(lostFraction moreSignificant
,
387 lostFraction lessSignificant
)
389 if (lessSignificant
!= lfExactlyZero
) {
390 if (moreSignificant
== lfExactlyZero
)
391 moreSignificant
= lfLessThanHalf
;
392 else if (moreSignificant
== lfExactlyHalf
)
393 moreSignificant
= lfMoreThanHalf
;
396 return moreSignificant
;
399 /* The error from the true value, in half-ulps, on multiplying two
400 floating point numbers, which differ from the value they
401 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
402 than the returned value.
404 See "How to Read Floating Point Numbers Accurately" by William D
407 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
409 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
411 if (HUerr1
+ HUerr2
== 0)
412 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
414 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
417 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
418 when the least significant BITS are truncated. BITS cannot be
421 ulpsFromBoundary(const integerPart
*parts
, unsigned int bits
, bool isNearest
)
423 unsigned int count
, partBits
;
424 integerPart part
, boundary
;
429 count
= bits
/ integerPartWidth
;
430 partBits
= bits
% integerPartWidth
+ 1;
432 part
= parts
[count
] & (~(integerPart
) 0 >> (integerPartWidth
- partBits
));
435 boundary
= (integerPart
) 1 << (partBits
- 1);
440 if (part
- boundary
<= boundary
- part
)
441 return part
- boundary
;
443 return boundary
- part
;
446 if (part
== boundary
) {
449 return ~(integerPart
) 0; /* A lot. */
452 } else if (part
== boundary
- 1) {
455 return ~(integerPart
) 0; /* A lot. */
460 return ~(integerPart
) 0; /* A lot. */
463 /* Place pow(5, power) in DST, and return the number of parts used.
464 DST must be at least one part larger than size of the answer. */
466 powerOf5(integerPart
*dst
, unsigned int power
)
468 static const integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125,
470 integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
471 pow5s
[0] = 78125 * 5;
473 unsigned int partsCount
[16] = { 1 };
474 integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
476 assert(power
<= maxExponent
);
481 *p1
= firstEightPowers
[power
& 7];
487 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
492 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
494 pc
= partsCount
[n
- 1];
495 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
497 if (pow5
[pc
- 1] == 0)
505 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
507 if (p2
[result
- 1] == 0)
510 /* Now result is in p1 with partsCount parts and p2 is scratch
512 tmp
= p1
, p1
= p2
, p2
= tmp
;
519 APInt::tcAssign(dst
, p1
, result
);
524 /* Zero at the end to avoid modular arithmetic when adding one; used
525 when rounding up during hexadecimal output. */
526 static const char hexDigitsLower
[] = "0123456789abcdef0";
527 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
528 static const char infinityL
[] = "infinity";
529 static const char infinityU
[] = "INFINITY";
530 static const char NaNL
[] = "nan";
531 static const char NaNU
[] = "NAN";
533 /* Write out an integerPart in hexadecimal, starting with the most
534 significant nibble. Write out exactly COUNT hexdigits, return
537 partAsHex (char *dst
, integerPart part
, unsigned int count
,
538 const char *hexDigitChars
)
540 unsigned int result
= count
;
542 assert(count
!= 0 && count
<= integerPartWidth
/ 4);
544 part
>>= (integerPartWidth
- 4 * count
);
546 dst
[count
] = hexDigitChars
[part
& 0xf];
553 /* Write out an unsigned decimal integer. */
555 writeUnsignedDecimal (char *dst
, unsigned int n
)
571 /* Write out a signed decimal integer. */
573 writeSignedDecimal (char *dst
, int value
)
577 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
579 dst
= writeUnsignedDecimal(dst
, value
);
586 APFloat::initialize(const fltSemantics
*ourSemantics
)
590 semantics
= ourSemantics
;
593 significand
.parts
= new integerPart
[count
];
597 APFloat::freeSignificand()
600 delete [] significand
.parts
;
604 APFloat::assign(const APFloat
&rhs
)
606 assert(semantics
== rhs
.semantics
);
609 category
= rhs
.category
;
610 exponent
= rhs
.exponent
;
612 exponent2
= rhs
.exponent2
;
613 if (category
== fcNormal
|| category
== fcNaN
)
614 copySignificand(rhs
);
618 APFloat::copySignificand(const APFloat
&rhs
)
620 assert(category
== fcNormal
|| category
== fcNaN
);
621 assert(rhs
.partCount() >= partCount());
623 APInt::tcAssign(significandParts(), rhs
.significandParts(),
627 /* Make this number a NaN, with an arbitrary but deterministic value
628 for the significand. If double or longer, this is a signalling NaN,
629 which may not be ideal. If float, this is QNaN(0). */
630 void APFloat::makeNaN(bool SNaN
, bool Negative
, const APInt
*fill
)
635 integerPart
*significand
= significandParts();
636 unsigned numParts
= partCount();
638 // Set the significand bits to the fill.
639 if (!fill
|| fill
->getNumWords() < numParts
)
640 APInt::tcSet(significand
, 0, numParts
);
642 APInt::tcAssign(significand
, fill
->getRawData(),
643 std::min(fill
->getNumWords(), numParts
));
645 // Zero out the excess bits of the significand.
646 unsigned bitsToPreserve
= semantics
->precision
- 1;
647 unsigned part
= bitsToPreserve
/ 64;
648 bitsToPreserve
%= 64;
649 significand
[part
] &= ((1ULL << bitsToPreserve
) - 1);
650 for (part
++; part
!= numParts
; ++part
)
651 significand
[part
] = 0;
654 unsigned QNaNBit
= semantics
->precision
- 2;
657 // We always have to clear the QNaN bit to make it an SNaN.
658 APInt::tcClearBit(significand
, QNaNBit
);
660 // If there are no bits set in the payload, we have to set
661 // *something* to make it a NaN instead of an infinity;
662 // conventionally, this is the next bit down from the QNaN bit.
663 if (APInt::tcIsZero(significand
, numParts
))
664 APInt::tcSetBit(significand
, QNaNBit
- 1);
666 // We always have to set the QNaN bit to make it a QNaN.
667 APInt::tcSetBit(significand
, QNaNBit
);
670 // For x87 extended precision, we want to make a NaN, not a
671 // pseudo-NaN. Maybe we should expose the ability to make
673 if (semantics
== &APFloat::x87DoubleExtended
)
674 APInt::tcSetBit(significand
, QNaNBit
+ 1);
677 APFloat
APFloat::makeNaN(const fltSemantics
&Sem
, bool SNaN
, bool Negative
,
679 APFloat
value(Sem
, uninitialized
);
680 value
.makeNaN(SNaN
, Negative
, fill
);
685 APFloat::operator=(const APFloat
&rhs
)
688 if (semantics
!= rhs
.semantics
) {
690 initialize(rhs
.semantics
);
699 APFloat::bitwiseIsEqual(const APFloat
&rhs
) const {
702 if (semantics
!= rhs
.semantics
||
703 category
!= rhs
.category
||
706 if (semantics
==(const llvm::fltSemantics
*)&PPCDoubleDouble
&&
709 if (category
==fcZero
|| category
==fcInfinity
)
711 else if (category
==fcNormal
&& exponent
!=rhs
.exponent
)
713 else if (semantics
==(const llvm::fltSemantics
*)&PPCDoubleDouble
&&
714 exponent2
!=rhs
.exponent2
)
718 const integerPart
* p
=significandParts();
719 const integerPart
* q
=rhs
.significandParts();
720 for (; i
>0; i
--, p
++, q
++) {
728 APFloat::APFloat(const fltSemantics
&ourSemantics
, integerPart value
)
729 : exponent2(0), sign2(0) {
730 assertArithmeticOK(ourSemantics
);
731 initialize(&ourSemantics
);
734 exponent
= ourSemantics
.precision
- 1;
735 significandParts()[0] = value
;
736 normalize(rmNearestTiesToEven
, lfExactlyZero
);
739 APFloat::APFloat(const fltSemantics
&ourSemantics
) : exponent2(0), sign2(0) {
740 assertArithmeticOK(ourSemantics
);
741 initialize(&ourSemantics
);
746 APFloat::APFloat(const fltSemantics
&ourSemantics
, uninitializedTag tag
)
747 : exponent2(0), sign2(0) {
748 assertArithmeticOK(ourSemantics
);
749 // Allocates storage if necessary but does not initialize it.
750 initialize(&ourSemantics
);
753 APFloat::APFloat(const fltSemantics
&ourSemantics
,
754 fltCategory ourCategory
, bool negative
)
755 : exponent2(0), sign2(0) {
756 assertArithmeticOK(ourSemantics
);
757 initialize(&ourSemantics
);
758 category
= ourCategory
;
760 if (category
== fcNormal
)
762 else if (ourCategory
== fcNaN
)
766 APFloat::APFloat(const fltSemantics
&ourSemantics
, StringRef text
)
767 : exponent2(0), sign2(0) {
768 assertArithmeticOK(ourSemantics
);
769 initialize(&ourSemantics
);
770 convertFromString(text
, rmNearestTiesToEven
);
773 APFloat::APFloat(const APFloat
&rhs
) : exponent2(0), sign2(0) {
774 initialize(rhs
.semantics
);
783 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
784 void APFloat::Profile(FoldingSetNodeID
& ID
) const {
785 ID
.Add(bitcastToAPInt());
789 APFloat::partCount() const
791 return partCountForBits(semantics
->precision
+ 1);
795 APFloat::semanticsPrecision(const fltSemantics
&semantics
)
797 return semantics
.precision
;
801 APFloat::significandParts() const
803 return const_cast<APFloat
*>(this)->significandParts();
807 APFloat::significandParts()
809 assert(category
== fcNormal
|| category
== fcNaN
);
812 return significand
.parts
;
814 return &significand
.part
;
818 APFloat::zeroSignificand()
821 APInt::tcSet(significandParts(), 0, partCount());
824 /* Increment an fcNormal floating point number's significand. */
826 APFloat::incrementSignificand()
830 carry
= APInt::tcIncrement(significandParts(), partCount());
832 /* Our callers should never cause us to overflow. */
836 /* Add the significand of the RHS. Returns the carry flag. */
838 APFloat::addSignificand(const APFloat
&rhs
)
842 parts
= significandParts();
844 assert(semantics
== rhs
.semantics
);
845 assert(exponent
== rhs
.exponent
);
847 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
850 /* Subtract the significand of the RHS with a borrow flag. Returns
853 APFloat::subtractSignificand(const APFloat
&rhs
, integerPart borrow
)
857 parts
= significandParts();
859 assert(semantics
== rhs
.semantics
);
860 assert(exponent
== rhs
.exponent
);
862 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
866 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
867 on to the full-precision result of the multiplication. Returns the
870 APFloat::multiplySignificand(const APFloat
&rhs
, const APFloat
*addend
)
872 unsigned int omsb
; // One, not zero, based MSB.
873 unsigned int partsCount
, newPartsCount
, precision
;
874 integerPart
*lhsSignificand
;
875 integerPart scratch
[4];
876 integerPart
*fullSignificand
;
877 lostFraction lost_fraction
;
880 assert(semantics
== rhs
.semantics
);
882 precision
= semantics
->precision
;
883 newPartsCount
= partCountForBits(precision
* 2);
885 if (newPartsCount
> 4)
886 fullSignificand
= new integerPart
[newPartsCount
];
888 fullSignificand
= scratch
;
890 lhsSignificand
= significandParts();
891 partsCount
= partCount();
893 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
894 rhs
.significandParts(), partsCount
, partsCount
);
896 lost_fraction
= lfExactlyZero
;
897 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
898 exponent
+= rhs
.exponent
;
901 Significand savedSignificand
= significand
;
902 const fltSemantics
*savedSemantics
= semantics
;
903 fltSemantics extendedSemantics
;
905 unsigned int extendedPrecision
;
907 /* Normalize our MSB. */
908 extendedPrecision
= precision
+ precision
- 1;
909 if (omsb
!= extendedPrecision
) {
910 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
911 extendedPrecision
- omsb
);
912 exponent
-= extendedPrecision
- omsb
;
915 /* Create new semantics. */
916 extendedSemantics
= *semantics
;
917 extendedSemantics
.precision
= extendedPrecision
;
919 if (newPartsCount
== 1)
920 significand
.part
= fullSignificand
[0];
922 significand
.parts
= fullSignificand
;
923 semantics
= &extendedSemantics
;
925 APFloat
extendedAddend(*addend
);
926 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
927 assert(status
== opOK
);
928 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
930 /* Restore our state. */
931 if (newPartsCount
== 1)
932 fullSignificand
[0] = significand
.part
;
933 significand
= savedSignificand
;
934 semantics
= savedSemantics
;
936 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
939 exponent
-= (precision
- 1);
941 if (omsb
> precision
) {
942 unsigned int bits
, significantParts
;
945 bits
= omsb
- precision
;
946 significantParts
= partCountForBits(omsb
);
947 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
948 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
952 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
954 if (newPartsCount
> 4)
955 delete [] fullSignificand
;
957 return lost_fraction
;
960 /* Multiply the significands of LHS and RHS to DST. */
962 APFloat::divideSignificand(const APFloat
&rhs
)
964 unsigned int bit
, i
, partsCount
;
965 const integerPart
*rhsSignificand
;
966 integerPart
*lhsSignificand
, *dividend
, *divisor
;
967 integerPart scratch
[4];
968 lostFraction lost_fraction
;
970 assert(semantics
== rhs
.semantics
);
972 lhsSignificand
= significandParts();
973 rhsSignificand
= rhs
.significandParts();
974 partsCount
= partCount();
977 dividend
= new integerPart
[partsCount
* 2];
981 divisor
= dividend
+ partsCount
;
983 /* Copy the dividend and divisor as they will be modified in-place. */
984 for (i
= 0; i
< partsCount
; i
++) {
985 dividend
[i
] = lhsSignificand
[i
];
986 divisor
[i
] = rhsSignificand
[i
];
987 lhsSignificand
[i
] = 0;
990 exponent
-= rhs
.exponent
;
992 unsigned int precision
= semantics
->precision
;
994 /* Normalize the divisor. */
995 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
998 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
1001 /* Normalize the dividend. */
1002 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
1005 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
1008 /* Ensure the dividend >= divisor initially for the loop below.
1009 Incidentally, this means that the division loop below is
1010 guaranteed to set the integer bit to one. */
1011 if (APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
1013 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1014 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
1017 /* Long division. */
1018 for (bit
= precision
; bit
; bit
-= 1) {
1019 if (APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
1020 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
1021 APInt::tcSetBit(lhsSignificand
, bit
- 1);
1024 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1027 /* Figure out the lost fraction. */
1028 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
1031 lost_fraction
= lfMoreThanHalf
;
1033 lost_fraction
= lfExactlyHalf
;
1034 else if (APInt::tcIsZero(dividend
, partsCount
))
1035 lost_fraction
= lfExactlyZero
;
1037 lost_fraction
= lfLessThanHalf
;
1042 return lost_fraction
;
1046 APFloat::significandMSB() const
1048 return APInt::tcMSB(significandParts(), partCount());
1052 APFloat::significandLSB() const
1054 return APInt::tcLSB(significandParts(), partCount());
1057 /* Note that a zero result is NOT normalized to fcZero. */
1059 APFloat::shiftSignificandRight(unsigned int bits
)
1061 /* Our exponent should not overflow. */
1062 assert((exponent_t
) (exponent
+ bits
) >= exponent
);
1066 return shiftRight(significandParts(), partCount(), bits
);
1069 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1071 APFloat::shiftSignificandLeft(unsigned int bits
)
1073 assert(bits
< semantics
->precision
);
1076 unsigned int partsCount
= partCount();
1078 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1081 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1086 APFloat::compareAbsoluteValue(const APFloat
&rhs
) const
1090 assert(semantics
== rhs
.semantics
);
1091 assert(category
== fcNormal
);
1092 assert(rhs
.category
== fcNormal
);
1094 compare
= exponent
- rhs
.exponent
;
1096 /* If exponents are equal, do an unsigned bignum comparison of the
1099 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1103 return cmpGreaterThan
;
1104 else if (compare
< 0)
1110 /* Handle overflow. Sign is preserved. We either become infinity or
1111 the largest finite number. */
1113 APFloat::handleOverflow(roundingMode rounding_mode
)
1116 if (rounding_mode
== rmNearestTiesToEven
||
1117 rounding_mode
== rmNearestTiesToAway
||
1118 (rounding_mode
== rmTowardPositive
&& !sign
) ||
1119 (rounding_mode
== rmTowardNegative
&& sign
)) {
1120 category
= fcInfinity
;
1121 return (opStatus
) (opOverflow
| opInexact
);
1124 /* Otherwise we become the largest finite number. */
1125 category
= fcNormal
;
1126 exponent
= semantics
->maxExponent
;
1127 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1128 semantics
->precision
);
1133 /* Returns TRUE if, when truncating the current number, with BIT the
1134 new LSB, with the given lost fraction and rounding mode, the result
1135 would need to be rounded away from zero (i.e., by increasing the
1136 signficand). This routine must work for fcZero of both signs, and
1137 fcNormal numbers. */
1139 APFloat::roundAwayFromZero(roundingMode rounding_mode
,
1140 lostFraction lost_fraction
,
1141 unsigned int bit
) const
1143 /* NaNs and infinities should not have lost fractions. */
1144 assert(category
== fcNormal
|| category
== fcZero
);
1146 /* Current callers never pass this so we don't handle it. */
1147 assert(lost_fraction
!= lfExactlyZero
);
1149 switch (rounding_mode
) {
1151 llvm_unreachable(0);
1153 case rmNearestTiesToAway
:
1154 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1156 case rmNearestTiesToEven
:
1157 if (lost_fraction
== lfMoreThanHalf
)
1160 /* Our zeroes don't have a significand to test. */
1161 if (lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1162 return APInt::tcExtractBit(significandParts(), bit
);
1169 case rmTowardPositive
:
1170 return sign
== false;
1172 case rmTowardNegative
:
1173 return sign
== true;
1178 APFloat::normalize(roundingMode rounding_mode
,
1179 lostFraction lost_fraction
)
1181 unsigned int omsb
; /* One, not zero, based MSB. */
1184 if (category
!= fcNormal
)
1187 /* Before rounding normalize the exponent of fcNormal numbers. */
1188 omsb
= significandMSB() + 1;
1191 /* OMSB is numbered from 1. We want to place it in the integer
1192 bit numbered PRECISON if possible, with a compensating change in
1194 exponentChange
= omsb
- semantics
->precision
;
1196 /* If the resulting exponent is too high, overflow according to
1197 the rounding mode. */
1198 if (exponent
+ exponentChange
> semantics
->maxExponent
)
1199 return handleOverflow(rounding_mode
);
1201 /* Subnormal numbers have exponent minExponent, and their MSB
1202 is forced based on that. */
1203 if (exponent
+ exponentChange
< semantics
->minExponent
)
1204 exponentChange
= semantics
->minExponent
- exponent
;
1206 /* Shifting left is easy as we don't lose precision. */
1207 if (exponentChange
< 0) {
1208 assert(lost_fraction
== lfExactlyZero
);
1210 shiftSignificandLeft(-exponentChange
);
1215 if (exponentChange
> 0) {
1218 /* Shift right and capture any new lost fraction. */
1219 lf
= shiftSignificandRight(exponentChange
);
1221 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1223 /* Keep OMSB up-to-date. */
1224 if (omsb
> (unsigned) exponentChange
)
1225 omsb
-= exponentChange
;
1231 /* Now round the number according to rounding_mode given the lost
1234 /* As specified in IEEE 754, since we do not trap we do not report
1235 underflow for exact results. */
1236 if (lost_fraction
== lfExactlyZero
) {
1237 /* Canonicalize zeroes. */
1244 /* Increment the significand if we're rounding away from zero. */
1245 if (roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1247 exponent
= semantics
->minExponent
;
1249 incrementSignificand();
1250 omsb
= significandMSB() + 1;
1252 /* Did the significand increment overflow? */
1253 if (omsb
== (unsigned) semantics
->precision
+ 1) {
1254 /* Renormalize by incrementing the exponent and shifting our
1255 significand right one. However if we already have the
1256 maximum exponent we overflow to infinity. */
1257 if (exponent
== semantics
->maxExponent
) {
1258 category
= fcInfinity
;
1260 return (opStatus
) (opOverflow
| opInexact
);
1263 shiftSignificandRight(1);
1269 /* The normal case - we were and are not denormal, and any
1270 significand increment above didn't overflow. */
1271 if (omsb
== semantics
->precision
)
1274 /* We have a non-zero denormal. */
1275 assert(omsb
< semantics
->precision
);
1277 /* Canonicalize zeroes. */
1281 /* The fcZero case is a denormal that underflowed to zero. */
1282 return (opStatus
) (opUnderflow
| opInexact
);
1286 APFloat::addOrSubtractSpecials(const APFloat
&rhs
, bool subtract
)
1288 switch (convolve(category
, rhs
.category
)) {
1290 llvm_unreachable(0);
1292 case convolve(fcNaN
, fcZero
):
1293 case convolve(fcNaN
, fcNormal
):
1294 case convolve(fcNaN
, fcInfinity
):
1295 case convolve(fcNaN
, fcNaN
):
1296 case convolve(fcNormal
, fcZero
):
1297 case convolve(fcInfinity
, fcNormal
):
1298 case convolve(fcInfinity
, fcZero
):
1301 case convolve(fcZero
, fcNaN
):
1302 case convolve(fcNormal
, fcNaN
):
1303 case convolve(fcInfinity
, fcNaN
):
1305 copySignificand(rhs
);
1308 case convolve(fcNormal
, fcInfinity
):
1309 case convolve(fcZero
, fcInfinity
):
1310 category
= fcInfinity
;
1311 sign
= rhs
.sign
^ subtract
;
1314 case convolve(fcZero
, fcNormal
):
1316 sign
= rhs
.sign
^ subtract
;
1319 case convolve(fcZero
, fcZero
):
1320 /* Sign depends on rounding mode; handled by caller. */
1323 case convolve(fcInfinity
, fcInfinity
):
1324 /* Differently signed infinities can only be validly
1326 if (((sign
^ rhs
.sign
)!=0) != subtract
) {
1333 case convolve(fcNormal
, fcNormal
):
1338 /* Add or subtract two normal numbers. */
1340 APFloat::addOrSubtractSignificand(const APFloat
&rhs
, bool subtract
)
1343 lostFraction lost_fraction
;
1346 /* Determine if the operation on the absolute values is effectively
1347 an addition or subtraction. */
1348 subtract
^= (sign
^ rhs
.sign
) ? true : false;
1350 /* Are we bigger exponent-wise than the RHS? */
1351 bits
= exponent
- rhs
.exponent
;
1353 /* Subtraction is more subtle than one might naively expect. */
1355 APFloat
temp_rhs(rhs
);
1359 reverse
= compareAbsoluteValue(temp_rhs
) == cmpLessThan
;
1360 lost_fraction
= lfExactlyZero
;
1361 } else if (bits
> 0) {
1362 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1363 shiftSignificandLeft(1);
1366 lost_fraction
= shiftSignificandRight(-bits
- 1);
1367 temp_rhs
.shiftSignificandLeft(1);
1372 carry
= temp_rhs
.subtractSignificand
1373 (*this, lost_fraction
!= lfExactlyZero
);
1374 copySignificand(temp_rhs
);
1377 carry
= subtractSignificand
1378 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1381 /* Invert the lost fraction - it was on the RHS and
1383 if (lost_fraction
== lfLessThanHalf
)
1384 lost_fraction
= lfMoreThanHalf
;
1385 else if (lost_fraction
== lfMoreThanHalf
)
1386 lost_fraction
= lfLessThanHalf
;
1388 /* The code above is intended to ensure that no borrow is
1393 APFloat
temp_rhs(rhs
);
1395 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1396 carry
= addSignificand(temp_rhs
);
1398 lost_fraction
= shiftSignificandRight(-bits
);
1399 carry
= addSignificand(rhs
);
1402 /* We have a guard bit; generating a carry cannot happen. */
1406 return lost_fraction
;
1410 APFloat::multiplySpecials(const APFloat
&rhs
)
1412 switch (convolve(category
, rhs
.category
)) {
1414 llvm_unreachable(0);
1416 case convolve(fcNaN
, fcZero
):
1417 case convolve(fcNaN
, fcNormal
):
1418 case convolve(fcNaN
, fcInfinity
):
1419 case convolve(fcNaN
, fcNaN
):
1422 case convolve(fcZero
, fcNaN
):
1423 case convolve(fcNormal
, fcNaN
):
1424 case convolve(fcInfinity
, fcNaN
):
1426 copySignificand(rhs
);
1429 case convolve(fcNormal
, fcInfinity
):
1430 case convolve(fcInfinity
, fcNormal
):
1431 case convolve(fcInfinity
, fcInfinity
):
1432 category
= fcInfinity
;
1435 case convolve(fcZero
, fcNormal
):
1436 case convolve(fcNormal
, fcZero
):
1437 case convolve(fcZero
, fcZero
):
1441 case convolve(fcZero
, fcInfinity
):
1442 case convolve(fcInfinity
, fcZero
):
1446 case convolve(fcNormal
, fcNormal
):
1452 APFloat::divideSpecials(const APFloat
&rhs
)
1454 switch (convolve(category
, rhs
.category
)) {
1456 llvm_unreachable(0);
1458 case convolve(fcNaN
, fcZero
):
1459 case convolve(fcNaN
, fcNormal
):
1460 case convolve(fcNaN
, fcInfinity
):
1461 case convolve(fcNaN
, fcNaN
):
1462 case convolve(fcInfinity
, fcZero
):
1463 case convolve(fcInfinity
, fcNormal
):
1464 case convolve(fcZero
, fcInfinity
):
1465 case convolve(fcZero
, fcNormal
):
1468 case convolve(fcZero
, fcNaN
):
1469 case convolve(fcNormal
, fcNaN
):
1470 case convolve(fcInfinity
, fcNaN
):
1472 copySignificand(rhs
);
1475 case convolve(fcNormal
, fcInfinity
):
1479 case convolve(fcNormal
, fcZero
):
1480 category
= fcInfinity
;
1483 case convolve(fcInfinity
, fcInfinity
):
1484 case convolve(fcZero
, fcZero
):
1488 case convolve(fcNormal
, fcNormal
):
1494 APFloat::modSpecials(const APFloat
&rhs
)
1496 switch (convolve(category
, rhs
.category
)) {
1498 llvm_unreachable(0);
1500 case convolve(fcNaN
, fcZero
):
1501 case convolve(fcNaN
, fcNormal
):
1502 case convolve(fcNaN
, fcInfinity
):
1503 case convolve(fcNaN
, fcNaN
):
1504 case convolve(fcZero
, fcInfinity
):
1505 case convolve(fcZero
, fcNormal
):
1506 case convolve(fcNormal
, fcInfinity
):
1509 case convolve(fcZero
, fcNaN
):
1510 case convolve(fcNormal
, fcNaN
):
1511 case convolve(fcInfinity
, fcNaN
):
1513 copySignificand(rhs
);
1516 case convolve(fcNormal
, fcZero
):
1517 case convolve(fcInfinity
, fcZero
):
1518 case convolve(fcInfinity
, fcNormal
):
1519 case convolve(fcInfinity
, fcInfinity
):
1520 case convolve(fcZero
, fcZero
):
1524 case convolve(fcNormal
, fcNormal
):
1531 APFloat::changeSign()
1533 /* Look mummy, this one's easy. */
1538 APFloat::clearSign()
1540 /* So is this one. */
1545 APFloat::copySign(const APFloat
&rhs
)
1551 /* Normalized addition or subtraction. */
1553 APFloat::addOrSubtract(const APFloat
&rhs
, roundingMode rounding_mode
,
1558 assertArithmeticOK(*semantics
);
1560 fs
= addOrSubtractSpecials(rhs
, subtract
);
1562 /* This return code means it was not a simple case. */
1563 if (fs
== opDivByZero
) {
1564 lostFraction lost_fraction
;
1566 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
1567 fs
= normalize(rounding_mode
, lost_fraction
);
1569 /* Can only be zero if we lost no fraction. */
1570 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
1573 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1574 positive zero unless rounding to minus infinity, except that
1575 adding two like-signed zeroes gives that zero. */
1576 if (category
== fcZero
) {
1577 if (rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
1578 sign
= (rounding_mode
== rmTowardNegative
);
1584 /* Normalized addition. */
1586 APFloat::add(const APFloat
&rhs
, roundingMode rounding_mode
)
1588 return addOrSubtract(rhs
, rounding_mode
, false);
1591 /* Normalized subtraction. */
1593 APFloat::subtract(const APFloat
&rhs
, roundingMode rounding_mode
)
1595 return addOrSubtract(rhs
, rounding_mode
, true);
1598 /* Normalized multiply. */
1600 APFloat::multiply(const APFloat
&rhs
, roundingMode rounding_mode
)
1604 assertArithmeticOK(*semantics
);
1606 fs
= multiplySpecials(rhs
);
1608 if (category
== fcNormal
) {
1609 lostFraction lost_fraction
= multiplySignificand(rhs
, 0);
1610 fs
= normalize(rounding_mode
, lost_fraction
);
1611 if (lost_fraction
!= lfExactlyZero
)
1612 fs
= (opStatus
) (fs
| opInexact
);
1618 /* Normalized divide. */
1620 APFloat::divide(const APFloat
&rhs
, roundingMode rounding_mode
)
1624 assertArithmeticOK(*semantics
);
1626 fs
= divideSpecials(rhs
);
1628 if (category
== fcNormal
) {
1629 lostFraction lost_fraction
= divideSignificand(rhs
);
1630 fs
= normalize(rounding_mode
, lost_fraction
);
1631 if (lost_fraction
!= lfExactlyZero
)
1632 fs
= (opStatus
) (fs
| opInexact
);
1638 /* Normalized remainder. This is not currently correct in all cases. */
1640 APFloat::remainder(const APFloat
&rhs
)
1644 unsigned int origSign
= sign
;
1646 assertArithmeticOK(*semantics
);
1647 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1648 if (fs
== opDivByZero
)
1651 int parts
= partCount();
1652 integerPart
*x
= new integerPart
[parts
];
1654 fs
= V
.convertToInteger(x
, parts
* integerPartWidth
, true,
1655 rmNearestTiesToEven
, &ignored
);
1656 if (fs
==opInvalidOp
)
1659 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1660 rmNearestTiesToEven
);
1661 assert(fs
==opOK
); // should always work
1663 fs
= V
.multiply(rhs
, rmNearestTiesToEven
);
1664 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1666 fs
= subtract(V
, rmNearestTiesToEven
);
1667 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1670 sign
= origSign
; // IEEE754 requires this
1675 /* Normalized llvm frem (C fmod).
1676 This is not currently correct in all cases. */
1678 APFloat::mod(const APFloat
&rhs
, roundingMode rounding_mode
)
1681 assertArithmeticOK(*semantics
);
1682 fs
= modSpecials(rhs
);
1684 if (category
== fcNormal
&& rhs
.category
== fcNormal
) {
1686 unsigned int origSign
= sign
;
1688 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1689 if (fs
== opDivByZero
)
1692 int parts
= partCount();
1693 integerPart
*x
= new integerPart
[parts
];
1695 fs
= V
.convertToInteger(x
, parts
* integerPartWidth
, true,
1696 rmTowardZero
, &ignored
);
1697 if (fs
==opInvalidOp
)
1700 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1701 rmNearestTiesToEven
);
1702 assert(fs
==opOK
); // should always work
1704 fs
= V
.multiply(rhs
, rounding_mode
);
1705 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1707 fs
= subtract(V
, rounding_mode
);
1708 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1711 sign
= origSign
; // IEEE754 requires this
1717 /* Normalized fused-multiply-add. */
1719 APFloat::fusedMultiplyAdd(const APFloat
&multiplicand
,
1720 const APFloat
&addend
,
1721 roundingMode rounding_mode
)
1725 assertArithmeticOK(*semantics
);
1727 /* Post-multiplication sign, before addition. */
1728 sign
^= multiplicand
.sign
;
1730 /* If and only if all arguments are normal do we need to do an
1731 extended-precision calculation. */
1732 if (category
== fcNormal
&&
1733 multiplicand
.category
== fcNormal
&&
1734 addend
.category
== fcNormal
) {
1735 lostFraction lost_fraction
;
1737 lost_fraction
= multiplySignificand(multiplicand
, &addend
);
1738 fs
= normalize(rounding_mode
, lost_fraction
);
1739 if (lost_fraction
!= lfExactlyZero
)
1740 fs
= (opStatus
) (fs
| opInexact
);
1742 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1743 positive zero unless rounding to minus infinity, except that
1744 adding two like-signed zeroes gives that zero. */
1745 if (category
== fcZero
&& sign
!= addend
.sign
)
1746 sign
= (rounding_mode
== rmTowardNegative
);
1748 fs
= multiplySpecials(multiplicand
);
1750 /* FS can only be opOK or opInvalidOp. There is no more work
1751 to do in the latter case. The IEEE-754R standard says it is
1752 implementation-defined in this case whether, if ADDEND is a
1753 quiet NaN, we raise invalid op; this implementation does so.
1755 If we need to do the addition we can do so with normal
1758 fs
= addOrSubtract(addend
, rounding_mode
, false);
1764 /* Comparison requires normalized numbers. */
1766 APFloat::compare(const APFloat
&rhs
) const
1770 assertArithmeticOK(*semantics
);
1771 assert(semantics
== rhs
.semantics
);
1773 switch (convolve(category
, rhs
.category
)) {
1775 llvm_unreachable(0);
1777 case convolve(fcNaN
, fcZero
):
1778 case convolve(fcNaN
, fcNormal
):
1779 case convolve(fcNaN
, fcInfinity
):
1780 case convolve(fcNaN
, fcNaN
):
1781 case convolve(fcZero
, fcNaN
):
1782 case convolve(fcNormal
, fcNaN
):
1783 case convolve(fcInfinity
, fcNaN
):
1784 return cmpUnordered
;
1786 case convolve(fcInfinity
, fcNormal
):
1787 case convolve(fcInfinity
, fcZero
):
1788 case convolve(fcNormal
, fcZero
):
1792 return cmpGreaterThan
;
1794 case convolve(fcNormal
, fcInfinity
):
1795 case convolve(fcZero
, fcInfinity
):
1796 case convolve(fcZero
, fcNormal
):
1798 return cmpGreaterThan
;
1802 case convolve(fcInfinity
, fcInfinity
):
1803 if (sign
== rhs
.sign
)
1808 return cmpGreaterThan
;
1810 case convolve(fcZero
, fcZero
):
1813 case convolve(fcNormal
, fcNormal
):
1817 /* Two normal numbers. Do they have the same sign? */
1818 if (sign
!= rhs
.sign
) {
1820 result
= cmpLessThan
;
1822 result
= cmpGreaterThan
;
1824 /* Compare absolute values; invert result if negative. */
1825 result
= compareAbsoluteValue(rhs
);
1828 if (result
== cmpLessThan
)
1829 result
= cmpGreaterThan
;
1830 else if (result
== cmpGreaterThan
)
1831 result
= cmpLessThan
;
1838 /// APFloat::convert - convert a value of one floating point type to another.
1839 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1840 /// records whether the transformation lost information, i.e. whether
1841 /// converting the result back to the original type will produce the
1842 /// original value (this is almost the same as return value==fsOK, but there
1843 /// are edge cases where this is not so).
1846 APFloat::convert(const fltSemantics
&toSemantics
,
1847 roundingMode rounding_mode
, bool *losesInfo
)
1849 lostFraction lostFraction
;
1850 unsigned int newPartCount
, oldPartCount
;
1853 assertArithmeticOK(*semantics
);
1854 assertArithmeticOK(toSemantics
);
1855 lostFraction
= lfExactlyZero
;
1856 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
1857 oldPartCount
= partCount();
1859 /* Handle storage complications. If our new form is wider,
1860 re-allocate our bit pattern into wider storage. If it is
1861 narrower, we ignore the excess parts, but if narrowing to a
1862 single part we need to free the old storage.
1863 Be careful not to reference significandParts for zeroes
1864 and infinities, since it aborts. */
1865 if (newPartCount
> oldPartCount
) {
1866 integerPart
*newParts
;
1867 newParts
= new integerPart
[newPartCount
];
1868 APInt::tcSet(newParts
, 0, newPartCount
);
1869 if (category
==fcNormal
|| category
==fcNaN
)
1870 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
1872 significand
.parts
= newParts
;
1873 } else if (newPartCount
< oldPartCount
) {
1874 /* Capture any lost fraction through truncation of parts so we get
1875 correct rounding whilst normalizing. */
1876 if (category
==fcNormal
)
1877 lostFraction
= lostFractionThroughTruncation
1878 (significandParts(), oldPartCount
, toSemantics
.precision
);
1879 if (newPartCount
== 1) {
1880 integerPart newPart
= 0;
1881 if (category
==fcNormal
|| category
==fcNaN
)
1882 newPart
= significandParts()[0];
1884 significand
.part
= newPart
;
1888 if (category
== fcNormal
) {
1889 /* Re-interpret our bit-pattern. */
1890 exponent
+= toSemantics
.precision
- semantics
->precision
;
1891 semantics
= &toSemantics
;
1892 fs
= normalize(rounding_mode
, lostFraction
);
1893 *losesInfo
= (fs
!= opOK
);
1894 } else if (category
== fcNaN
) {
1895 int shift
= toSemantics
.precision
- semantics
->precision
;
1896 // Do this now so significandParts gets the right answer
1897 const fltSemantics
*oldSemantics
= semantics
;
1898 semantics
= &toSemantics
;
1900 // No normalization here, just truncate
1902 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
1903 else if (shift
< 0) {
1904 unsigned ushift
= -shift
;
1905 // Figure out if we are losing information. This happens
1906 // if are shifting out something other than 0s, or if the x87 long
1907 // double input did not have its integer bit set (pseudo-NaN), or if the
1908 // x87 long double input did not have its QNan bit set (because the x87
1909 // hardware sets this bit when converting a lower-precision NaN to
1910 // x87 long double).
1911 if (APInt::tcLSB(significandParts(), newPartCount
) < ushift
)
1913 if (oldSemantics
== &APFloat::x87DoubleExtended
&&
1914 (!(*significandParts() & 0x8000000000000000ULL
) ||
1915 !(*significandParts() & 0x4000000000000000ULL
)))
1917 APInt::tcShiftRight(significandParts(), newPartCount
, ushift
);
1919 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1920 // does not give you back the same bits. This is dubious, and we
1921 // don't currently do it. You're really supposed to get
1922 // an invalid operation signal at runtime, but nobody does that.
1925 semantics
= &toSemantics
;
1933 /* Convert a floating point number to an integer according to the
1934 rounding mode. If the rounded integer value is out of range this
1935 returns an invalid operation exception and the contents of the
1936 destination parts are unspecified. If the rounded value is in
1937 range but the floating point number is not the exact integer, the C
1938 standard doesn't require an inexact exception to be raised. IEEE
1939 854 does require it so we do that.
1941 Note that for conversions to integer type the C standard requires
1942 round-to-zero to always be used. */
1944 APFloat::convertToSignExtendedInteger(integerPart
*parts
, unsigned int width
,
1946 roundingMode rounding_mode
,
1947 bool *isExact
) const
1949 lostFraction lost_fraction
;
1950 const integerPart
*src
;
1951 unsigned int dstPartsCount
, truncatedBits
;
1953 assertArithmeticOK(*semantics
);
1957 /* Handle the three special cases first. */
1958 if (category
== fcInfinity
|| category
== fcNaN
)
1961 dstPartsCount
= partCountForBits(width
);
1963 if (category
== fcZero
) {
1964 APInt::tcSet(parts
, 0, dstPartsCount
);
1965 // Negative zero can't be represented as an int.
1970 src
= significandParts();
1972 /* Step 1: place our absolute value, with any fraction truncated, in
1975 /* Our absolute value is less than one; truncate everything. */
1976 APInt::tcSet(parts
, 0, dstPartsCount
);
1977 /* For exponent -1 the integer bit represents .5, look at that.
1978 For smaller exponents leftmost truncated bit is 0. */
1979 truncatedBits
= semantics
->precision
-1U - exponent
;
1981 /* We want the most significant (exponent + 1) bits; the rest are
1983 unsigned int bits
= exponent
+ 1U;
1985 /* Hopelessly large in magnitude? */
1989 if (bits
< semantics
->precision
) {
1990 /* We truncate (semantics->precision - bits) bits. */
1991 truncatedBits
= semantics
->precision
- bits
;
1992 APInt::tcExtract(parts
, dstPartsCount
, src
, bits
, truncatedBits
);
1994 /* We want at least as many bits as are available. */
1995 APInt::tcExtract(parts
, dstPartsCount
, src
, semantics
->precision
, 0);
1996 APInt::tcShiftLeft(parts
, dstPartsCount
, bits
- semantics
->precision
);
2001 /* Step 2: work out any lost fraction, and increment the absolute
2002 value if we would round away from zero. */
2003 if (truncatedBits
) {
2004 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
2006 if (lost_fraction
!= lfExactlyZero
&&
2007 roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
2008 if (APInt::tcIncrement(parts
, dstPartsCount
))
2009 return opInvalidOp
; /* Overflow. */
2012 lost_fraction
= lfExactlyZero
;
2015 /* Step 3: check if we fit in the destination. */
2016 unsigned int omsb
= APInt::tcMSB(parts
, dstPartsCount
) + 1;
2020 /* Negative numbers cannot be represented as unsigned. */
2024 /* It takes omsb bits to represent the unsigned integer value.
2025 We lose a bit for the sign, but care is needed as the
2026 maximally negative integer is a special case. */
2027 if (omsb
== width
&& APInt::tcLSB(parts
, dstPartsCount
) + 1 != omsb
)
2030 /* This case can happen because of rounding. */
2035 APInt::tcNegate (parts
, dstPartsCount
);
2037 if (omsb
>= width
+ !isSigned
)
2041 if (lost_fraction
== lfExactlyZero
) {
2048 /* Same as convertToSignExtendedInteger, except we provide
2049 deterministic values in case of an invalid operation exception,
2050 namely zero for NaNs and the minimal or maximal value respectively
2051 for underflow or overflow.
2052 The *isExact output tells whether the result is exact, in the sense
2053 that converting it back to the original floating point type produces
2054 the original value. This is almost equivalent to result==opOK,
2055 except for negative zeroes.
2058 APFloat::convertToInteger(integerPart
*parts
, unsigned int width
,
2060 roundingMode rounding_mode
, bool *isExact
) const
2064 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2067 if (fs
== opInvalidOp
) {
2068 unsigned int bits
, dstPartsCount
;
2070 dstPartsCount
= partCountForBits(width
);
2072 if (category
== fcNaN
)
2077 bits
= width
- isSigned
;
2079 APInt::tcSetLeastSignificantBits(parts
, dstPartsCount
, bits
);
2080 if (sign
&& isSigned
)
2081 APInt::tcShiftLeft(parts
, dstPartsCount
, width
- 1);
2087 /* Convert an unsigned integer SRC to a floating point number,
2088 rounding according to ROUNDING_MODE. The sign of the floating
2089 point number is not modified. */
2091 APFloat::convertFromUnsignedParts(const integerPart
*src
,
2092 unsigned int srcCount
,
2093 roundingMode rounding_mode
)
2095 unsigned int omsb
, precision
, dstCount
;
2097 lostFraction lost_fraction
;
2099 assertArithmeticOK(*semantics
);
2100 category
= fcNormal
;
2101 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2102 dst
= significandParts();
2103 dstCount
= partCount();
2104 precision
= semantics
->precision
;
2106 /* We want the most significant PRECISON bits of SRC. There may not
2107 be that many; extract what we can. */
2108 if (precision
<= omsb
) {
2109 exponent
= omsb
- 1;
2110 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2112 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2114 exponent
= precision
- 1;
2115 lost_fraction
= lfExactlyZero
;
2116 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2119 return normalize(rounding_mode
, lost_fraction
);
2123 APFloat::convertFromAPInt(const APInt
&Val
,
2125 roundingMode rounding_mode
)
2127 unsigned int partCount
= Val
.getNumWords();
2131 if (isSigned
&& api
.isNegative()) {
2136 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2139 /* Convert a two's complement integer SRC to a floating point number,
2140 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2141 integer is signed, in which case it must be sign-extended. */
2143 APFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2144 unsigned int srcCount
,
2146 roundingMode rounding_mode
)
2150 assertArithmeticOK(*semantics
);
2152 APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2155 /* If we're signed and negative negate a copy. */
2157 copy
= new integerPart
[srcCount
];
2158 APInt::tcAssign(copy
, src
, srcCount
);
2159 APInt::tcNegate(copy
, srcCount
);
2160 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2164 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2170 /* FIXME: should this just take a const APInt reference? */
2172 APFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2173 unsigned int width
, bool isSigned
,
2174 roundingMode rounding_mode
)
2176 unsigned int partCount
= partCountForBits(width
);
2177 APInt api
= APInt(width
, partCount
, parts
);
2180 if (isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2185 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2189 APFloat::convertFromHexadecimalString(StringRef s
, roundingMode rounding_mode
)
2191 lostFraction lost_fraction
= lfExactlyZero
;
2192 integerPart
*significand
;
2193 unsigned int bitPos
, partsCount
;
2194 StringRef::iterator dot
, firstSignificantDigit
;
2198 category
= fcNormal
;
2200 significand
= significandParts();
2201 partsCount
= partCount();
2202 bitPos
= partsCount
* integerPartWidth
;
2204 /* Skip leading zeroes and any (hexa)decimal point. */
2205 StringRef::iterator begin
= s
.begin();
2206 StringRef::iterator end
= s
.end();
2207 StringRef::iterator p
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2208 firstSignificantDigit
= p
;
2211 integerPart hex_value
;
2214 assert(dot
== end
&& "String contains multiple dots");
2221 hex_value
= hexDigitValue(*p
);
2222 if (hex_value
== -1U) {
2231 /* Store the number whilst 4-bit nibbles remain. */
2234 hex_value
<<= bitPos
% integerPartWidth
;
2235 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2237 lost_fraction
= trailingHexadecimalFraction(p
, end
, hex_value
);
2238 while (p
!= end
&& hexDigitValue(*p
) != -1U)
2245 /* Hex floats require an exponent but not a hexadecimal point. */
2246 assert(p
!= end
&& "Hex strings require an exponent");
2247 assert((*p
== 'p' || *p
== 'P') && "Invalid character in significand");
2248 assert(p
!= begin
&& "Significand has no digits");
2249 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
2251 /* Ignore the exponent if we are zero. */
2252 if (p
!= firstSignificantDigit
) {
2255 /* Implicit hexadecimal point? */
2259 /* Calculate the exponent adjustment implicit in the number of
2260 significant digits. */
2261 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2262 if (expAdjustment
< 0)
2264 expAdjustment
= expAdjustment
* 4 - 1;
2266 /* Adjust for writing the significand starting at the most
2267 significant nibble. */
2268 expAdjustment
+= semantics
->precision
;
2269 expAdjustment
-= partsCount
* integerPartWidth
;
2271 /* Adjust for the given exponent. */
2272 exponent
= totalExponent(p
+ 1, end
, expAdjustment
);
2275 return normalize(rounding_mode
, lost_fraction
);
2279 APFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2280 unsigned sigPartCount
, int exp
,
2281 roundingMode rounding_mode
)
2283 unsigned int parts
, pow5PartCount
;
2284 fltSemantics calcSemantics
= { 32767, -32767, 0, true };
2285 integerPart pow5Parts
[maxPowerOfFiveParts
];
2288 isNearest
= (rounding_mode
== rmNearestTiesToEven
||
2289 rounding_mode
== rmNearestTiesToAway
);
2291 parts
= partCountForBits(semantics
->precision
+ 11);
2293 /* Calculate pow(5, abs(exp)). */
2294 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2296 for (;; parts
*= 2) {
2297 opStatus sigStatus
, powStatus
;
2298 unsigned int excessPrecision
, truncatedBits
;
2300 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2301 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2302 truncatedBits
= excessPrecision
;
2304 APFloat
decSig(calcSemantics
, fcZero
, sign
);
2305 APFloat
pow5(calcSemantics
, fcZero
, false);
2307 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2308 rmNearestTiesToEven
);
2309 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2310 rmNearestTiesToEven
);
2311 /* Add exp, as 10^n = 5^n * 2^n. */
2312 decSig
.exponent
+= exp
;
2314 lostFraction calcLostFraction
;
2315 integerPart HUerr
, HUdistance
;
2316 unsigned int powHUerr
;
2319 /* multiplySignificand leaves the precision-th bit set to 1. */
2320 calcLostFraction
= decSig
.multiplySignificand(pow5
, NULL
);
2321 powHUerr
= powStatus
!= opOK
;
2323 calcLostFraction
= decSig
.divideSignificand(pow5
);
2324 /* Denormal numbers have less precision. */
2325 if (decSig
.exponent
< semantics
->minExponent
) {
2326 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2327 truncatedBits
= excessPrecision
;
2328 if (excessPrecision
> calcSemantics
.precision
)
2329 excessPrecision
= calcSemantics
.precision
;
2331 /* Extra half-ulp lost in reciprocal of exponent. */
2332 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2335 /* Both multiplySignificand and divideSignificand return the
2336 result with the integer bit set. */
2337 assert(APInt::tcExtractBit
2338 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2340 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2342 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2343 excessPrecision
, isNearest
);
2345 /* Are we guaranteed to round correctly if we truncate? */
2346 if (HUdistance
>= HUerr
) {
2347 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2348 calcSemantics
.precision
- excessPrecision
,
2350 /* Take the exponent of decSig. If we tcExtract-ed less bits
2351 above we must adjust our exponent to compensate for the
2352 implicit right shift. */
2353 exponent
= (decSig
.exponent
+ semantics
->precision
2354 - (calcSemantics
.precision
- excessPrecision
));
2355 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2358 return normalize(rounding_mode
, calcLostFraction
);
2364 APFloat::convertFromDecimalString(StringRef str
, roundingMode rounding_mode
)
2369 /* Scan the text. */
2370 StringRef::iterator p
= str
.begin();
2371 interpretDecimal(p
, str
.end(), &D
);
2373 /* Handle the quick cases. First the case of no significant digits,
2374 i.e. zero, and then exponents that are obviously too large or too
2375 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2376 definitely overflows if
2378 (exp - 1) * L >= maxExponent
2380 and definitely underflows to zero where
2382 (exp + 1) * L <= minExponent - precision
2384 With integer arithmetic the tightest bounds for L are
2386 93/28 < L < 196/59 [ numerator <= 256 ]
2387 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2390 if (decDigitValue(*D
.firstSigDigit
) >= 10U) {
2394 /* Check whether the normalized exponent is high enough to overflow
2395 max during the log-rebasing in the max-exponent check below. */
2396 } else if (D
.normalizedExponent
- 1 > INT_MAX
/ 42039) {
2397 fs
= handleOverflow(rounding_mode
);
2399 /* If it wasn't, then it also wasn't high enough to overflow max
2400 during the log-rebasing in the min-exponent check. Check that it
2401 won't overflow min in either check, then perform the min-exponent
2403 } else if (D
.normalizedExponent
- 1 < INT_MIN
/ 42039 ||
2404 (D
.normalizedExponent
+ 1) * 28738 <=
2405 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
2406 /* Underflow to zero and round. */
2408 fs
= normalize(rounding_mode
, lfLessThanHalf
);
2410 /* We can finally safely perform the max-exponent check. */
2411 } else if ((D
.normalizedExponent
- 1) * 42039
2412 >= 12655 * semantics
->maxExponent
) {
2413 /* Overflow and round. */
2414 fs
= handleOverflow(rounding_mode
);
2416 integerPart
*decSignificand
;
2417 unsigned int partCount
;
2419 /* A tight upper bound on number of bits required to hold an
2420 N-digit decimal integer is N * 196 / 59. Allocate enough space
2421 to hold the full significand, and an extra part required by
2423 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
2424 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
2425 decSignificand
= new integerPart
[partCount
+ 1];
2428 /* Convert to binary efficiently - we do almost all multiplication
2429 in an integerPart. When this would overflow do we do a single
2430 bignum multiplication, and then revert again to multiplication
2431 in an integerPart. */
2433 integerPart decValue
, val
, multiplier
;
2441 if (p
== str
.end()) {
2445 decValue
= decDigitValue(*p
++);
2446 assert(decValue
< 10U && "Invalid character in significand");
2448 val
= val
* 10 + decValue
;
2449 /* The maximum number that can be multiplied by ten with any
2450 digit added without overflowing an integerPart. */
2451 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
2453 /* Multiply out the current part. */
2454 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
2455 partCount
, partCount
+ 1, false);
2457 /* If we used another part (likely but not guaranteed), increase
2459 if (decSignificand
[partCount
])
2461 } while (p
<= D
.lastSigDigit
);
2463 category
= fcNormal
;
2464 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
2465 D
.exponent
, rounding_mode
);
2467 delete [] decSignificand
;
2474 APFloat::convertFromString(StringRef str
, roundingMode rounding_mode
)
2476 assertArithmeticOK(*semantics
);
2477 assert(!str
.empty() && "Invalid string length");
2479 /* Handle a leading minus sign. */
2480 StringRef::iterator p
= str
.begin();
2481 size_t slen
= str
.size();
2482 sign
= *p
== '-' ? 1 : 0;
2483 if (*p
== '-' || *p
== '+') {
2486 assert(slen
&& "String has no digits");
2489 if (slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2490 assert(slen
- 2 && "Invalid string");
2491 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
2495 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
2498 /* Write out a hexadecimal representation of the floating point value
2499 to DST, which must be of sufficient size, in the C99 form
2500 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2501 excluding the terminating NUL.
2503 If UPPERCASE, the output is in upper case, otherwise in lower case.
2505 HEXDIGITS digits appear altogether, rounding the value if
2506 necessary. If HEXDIGITS is 0, the minimal precision to display the
2507 number precisely is used instead. If nothing would appear after
2508 the decimal point it is suppressed.
2510 The decimal exponent is always printed and has at least one digit.
2511 Zero values display an exponent of zero. Infinities and NaNs
2512 appear as "infinity" or "nan" respectively.
2514 The above rules are as specified by C99. There is ambiguity about
2515 what the leading hexadecimal digit should be. This implementation
2516 uses whatever is necessary so that the exponent is displayed as
2517 stored. This implies the exponent will fall within the IEEE format
2518 range, and the leading hexadecimal digit will be 0 (for denormals),
2519 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2520 any other digits zero).
2523 APFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
2524 bool upperCase
, roundingMode rounding_mode
) const
2528 assertArithmeticOK(*semantics
);
2536 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
2537 dst
+= sizeof infinityL
- 1;
2541 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
2542 dst
+= sizeof NaNU
- 1;
2547 *dst
++ = upperCase
? 'X': 'x';
2549 if (hexDigits
> 1) {
2551 memset (dst
, '0', hexDigits
- 1);
2552 dst
+= hexDigits
- 1;
2554 *dst
++ = upperCase
? 'P': 'p';
2559 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
2565 return static_cast<unsigned int>(dst
- p
);
2568 /* Does the hard work of outputting the correctly rounded hexadecimal
2569 form of a normal floating point number with the specified number of
2570 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2571 digits necessary to print the value precisely is output. */
2573 APFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
2575 roundingMode rounding_mode
) const
2577 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
2578 const char *hexDigitChars
;
2579 const integerPart
*significand
;
2584 *dst
++ = upperCase
? 'X': 'x';
2587 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
2589 significand
= significandParts();
2590 partsCount
= partCount();
2592 /* +3 because the first digit only uses the single integer bit, so
2593 we have 3 virtual zero most-significant-bits. */
2594 valueBits
= semantics
->precision
+ 3;
2595 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
2597 /* The natural number of digits required ignoring trailing
2598 insignificant zeroes. */
2599 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
2601 /* hexDigits of zero means use the required number for the
2602 precision. Otherwise, see if we are truncating. If we are,
2603 find out if we need to round away from zero. */
2605 if (hexDigits
< outputDigits
) {
2606 /* We are dropping non-zero bits, so need to check how to round.
2607 "bits" is the number of dropped bits. */
2609 lostFraction fraction
;
2611 bits
= valueBits
- hexDigits
* 4;
2612 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
2613 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
2615 outputDigits
= hexDigits
;
2618 /* Write the digits consecutively, and start writing in the location
2619 of the hexadecimal point. We move the most significant digit
2620 left and add the hexadecimal point later. */
2623 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
2625 while (outputDigits
&& count
) {
2628 /* Put the most significant integerPartWidth bits in "part". */
2629 if (--count
== partsCount
)
2630 part
= 0; /* An imaginary higher zero part. */
2632 part
= significand
[count
] << shift
;
2635 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
2637 /* Convert as much of "part" to hexdigits as we can. */
2638 unsigned int curDigits
= integerPartWidth
/ 4;
2640 if (curDigits
> outputDigits
)
2641 curDigits
= outputDigits
;
2642 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
2643 outputDigits
-= curDigits
;
2649 /* Note that hexDigitChars has a trailing '0'. */
2652 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
2653 } while (*q
== '0');
2656 /* Add trailing zeroes. */
2657 memset (dst
, '0', outputDigits
);
2658 dst
+= outputDigits
;
2661 /* Move the most significant digit to before the point, and if there
2662 is something after the decimal point add it. This must come
2663 after rounding above. */
2670 /* Finally output the exponent. */
2671 *dst
++ = upperCase
? 'P': 'p';
2673 return writeSignedDecimal (dst
, exponent
);
2676 // For good performance it is desirable for different APFloats
2677 // to produce different integers.
2679 APFloat::getHashValue() const
2681 if (category
==fcZero
) return sign
<<8 | semantics
->precision
;
2682 else if (category
==fcInfinity
) return sign
<<9 | semantics
->precision
;
2683 else if (category
==fcNaN
) return 1<<10 | semantics
->precision
;
2685 uint32_t hash
= sign
<<11 | semantics
->precision
| exponent
<<12;
2686 const integerPart
* p
= significandParts();
2687 for (int i
=partCount(); i
>0; i
--, p
++)
2688 hash
^= ((uint32_t)*p
) ^ (uint32_t)((*p
)>>32);
2693 // Conversion from APFloat to/from host float/double. It may eventually be
2694 // possible to eliminate these and have everybody deal with APFloats, but that
2695 // will take a while. This approach will not easily extend to long double.
2696 // Current implementation requires integerPartWidth==64, which is correct at
2697 // the moment but could be made more general.
2699 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2700 // the actual IEEE respresentations. We compensate for that here.
2703 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2705 assert(semantics
== (const llvm::fltSemantics
*)&x87DoubleExtended
);
2706 assert(partCount()==2);
2708 uint64_t myexponent
, mysignificand
;
2710 if (category
==fcNormal
) {
2711 myexponent
= exponent
+16383; //bias
2712 mysignificand
= significandParts()[0];
2713 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
2714 myexponent
= 0; // denormal
2715 } else if (category
==fcZero
) {
2718 } else if (category
==fcInfinity
) {
2719 myexponent
= 0x7fff;
2720 mysignificand
= 0x8000000000000000ULL
;
2722 assert(category
== fcNaN
&& "Unknown category");
2723 myexponent
= 0x7fff;
2724 mysignificand
= significandParts()[0];
2728 words
[0] = mysignificand
;
2729 words
[1] = ((uint64_t)(sign
& 1) << 15) |
2730 (myexponent
& 0x7fffLL
);
2731 return APInt(80, 2, words
);
2735 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2737 assert(semantics
== (const llvm::fltSemantics
*)&PPCDoubleDouble
);
2738 assert(partCount()==2);
2740 uint64_t myexponent
, mysignificand
, myexponent2
, mysignificand2
;
2742 if (category
==fcNormal
) {
2743 myexponent
= exponent
+ 1023; //bias
2744 myexponent2
= exponent2
+ 1023;
2745 mysignificand
= significandParts()[0];
2746 mysignificand2
= significandParts()[1];
2747 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2748 myexponent
= 0; // denormal
2749 if (myexponent2
==1 && !(mysignificand2
& 0x10000000000000LL
))
2750 myexponent2
= 0; // denormal
2751 } else if (category
==fcZero
) {
2756 } else if (category
==fcInfinity
) {
2762 assert(category
== fcNaN
&& "Unknown category");
2764 mysignificand
= significandParts()[0];
2765 myexponent2
= exponent2
;
2766 mysignificand2
= significandParts()[1];
2770 words
[0] = ((uint64_t)(sign
& 1) << 63) |
2771 ((myexponent
& 0x7ff) << 52) |
2772 (mysignificand
& 0xfffffffffffffLL
);
2773 words
[1] = ((uint64_t)(sign2
& 1) << 63) |
2774 ((myexponent2
& 0x7ff) << 52) |
2775 (mysignificand2
& 0xfffffffffffffLL
);
2776 return APInt(128, 2, words
);
2780 APFloat::convertQuadrupleAPFloatToAPInt() const
2782 assert(semantics
== (const llvm::fltSemantics
*)&IEEEquad
);
2783 assert(partCount()==2);
2785 uint64_t myexponent
, mysignificand
, mysignificand2
;
2787 if (category
==fcNormal
) {
2788 myexponent
= exponent
+16383; //bias
2789 mysignificand
= significandParts()[0];
2790 mysignificand2
= significandParts()[1];
2791 if (myexponent
==1 && !(mysignificand2
& 0x1000000000000LL
))
2792 myexponent
= 0; // denormal
2793 } else if (category
==fcZero
) {
2795 mysignificand
= mysignificand2
= 0;
2796 } else if (category
==fcInfinity
) {
2797 myexponent
= 0x7fff;
2798 mysignificand
= mysignificand2
= 0;
2800 assert(category
== fcNaN
&& "Unknown category!");
2801 myexponent
= 0x7fff;
2802 mysignificand
= significandParts()[0];
2803 mysignificand2
= significandParts()[1];
2807 words
[0] = mysignificand
;
2808 words
[1] = ((uint64_t)(sign
& 1) << 63) |
2809 ((myexponent
& 0x7fff) << 48) |
2810 (mysignificand2
& 0xffffffffffffLL
);
2812 return APInt(128, 2, words
);
2816 APFloat::convertDoubleAPFloatToAPInt() const
2818 assert(semantics
== (const llvm::fltSemantics
*)&IEEEdouble
);
2819 assert(partCount()==1);
2821 uint64_t myexponent
, mysignificand
;
2823 if (category
==fcNormal
) {
2824 myexponent
= exponent
+1023; //bias
2825 mysignificand
= *significandParts();
2826 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2827 myexponent
= 0; // denormal
2828 } else if (category
==fcZero
) {
2831 } else if (category
==fcInfinity
) {
2835 assert(category
== fcNaN
&& "Unknown category!");
2837 mysignificand
= *significandParts();
2840 return APInt(64, ((((uint64_t)(sign
& 1) << 63) |
2841 ((myexponent
& 0x7ff) << 52) |
2842 (mysignificand
& 0xfffffffffffffLL
))));
2846 APFloat::convertFloatAPFloatToAPInt() const
2848 assert(semantics
== (const llvm::fltSemantics
*)&IEEEsingle
);
2849 assert(partCount()==1);
2851 uint32_t myexponent
, mysignificand
;
2853 if (category
==fcNormal
) {
2854 myexponent
= exponent
+127; //bias
2855 mysignificand
= (uint32_t)*significandParts();
2856 if (myexponent
== 1 && !(mysignificand
& 0x800000))
2857 myexponent
= 0; // denormal
2858 } else if (category
==fcZero
) {
2861 } else if (category
==fcInfinity
) {
2865 assert(category
== fcNaN
&& "Unknown category!");
2867 mysignificand
= (uint32_t)*significandParts();
2870 return APInt(32, (((sign
&1) << 31) | ((myexponent
&0xff) << 23) |
2871 (mysignificand
& 0x7fffff)));
2875 APFloat::convertHalfAPFloatToAPInt() const
2877 assert(semantics
== (const llvm::fltSemantics
*)&IEEEhalf
);
2878 assert(partCount()==1);
2880 uint32_t myexponent
, mysignificand
;
2882 if (category
==fcNormal
) {
2883 myexponent
= exponent
+15; //bias
2884 mysignificand
= (uint32_t)*significandParts();
2885 if (myexponent
== 1 && !(mysignificand
& 0x400))
2886 myexponent
= 0; // denormal
2887 } else if (category
==fcZero
) {
2890 } else if (category
==fcInfinity
) {
2894 assert(category
== fcNaN
&& "Unknown category!");
2896 mysignificand
= (uint32_t)*significandParts();
2899 return APInt(16, (((sign
&1) << 15) | ((myexponent
&0x1f) << 10) |
2900 (mysignificand
& 0x3ff)));
2903 // This function creates an APInt that is just a bit map of the floating
2904 // point constant as it would appear in memory. It is not a conversion,
2905 // and treating the result as a normal integer is unlikely to be useful.
2908 APFloat::bitcastToAPInt() const
2910 if (semantics
== (const llvm::fltSemantics
*)&IEEEhalf
)
2911 return convertHalfAPFloatToAPInt();
2913 if (semantics
== (const llvm::fltSemantics
*)&IEEEsingle
)
2914 return convertFloatAPFloatToAPInt();
2916 if (semantics
== (const llvm::fltSemantics
*)&IEEEdouble
)
2917 return convertDoubleAPFloatToAPInt();
2919 if (semantics
== (const llvm::fltSemantics
*)&IEEEquad
)
2920 return convertQuadrupleAPFloatToAPInt();
2922 if (semantics
== (const llvm::fltSemantics
*)&PPCDoubleDouble
)
2923 return convertPPCDoubleDoubleAPFloatToAPInt();
2925 assert(semantics
== (const llvm::fltSemantics
*)&x87DoubleExtended
&&
2927 return convertF80LongDoubleAPFloatToAPInt();
2931 APFloat::convertToFloat() const
2933 assert(semantics
== (const llvm::fltSemantics
*)&IEEEsingle
&&
2934 "Float semantics are not IEEEsingle");
2935 APInt api
= bitcastToAPInt();
2936 return api
.bitsToFloat();
2940 APFloat::convertToDouble() const
2942 assert(semantics
== (const llvm::fltSemantics
*)&IEEEdouble
&&
2943 "Float semantics are not IEEEdouble");
2944 APInt api
= bitcastToAPInt();
2945 return api
.bitsToDouble();
2948 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2949 /// does not support these bit patterns:
2950 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2951 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2952 /// exponent = 0, integer bit 1 ("pseudodenormal")
2953 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2954 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2956 APFloat::initFromF80LongDoubleAPInt(const APInt
&api
)
2958 assert(api
.getBitWidth()==80);
2959 uint64_t i1
= api
.getRawData()[0];
2960 uint64_t i2
= api
.getRawData()[1];
2961 uint64_t myexponent
= (i2
& 0x7fff);
2962 uint64_t mysignificand
= i1
;
2964 initialize(&APFloat::x87DoubleExtended
);
2965 assert(partCount()==2);
2967 sign
= static_cast<unsigned int>(i2
>>15);
2968 if (myexponent
==0 && mysignificand
==0) {
2969 // exponent, significand meaningless
2971 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
2972 // exponent, significand meaningless
2973 category
= fcInfinity
;
2974 } else if (myexponent
==0x7fff && mysignificand
!=0x8000000000000000ULL
) {
2975 // exponent meaningless
2977 significandParts()[0] = mysignificand
;
2978 significandParts()[1] = 0;
2980 category
= fcNormal
;
2981 exponent
= myexponent
- 16383;
2982 significandParts()[0] = mysignificand
;
2983 significandParts()[1] = 0;
2984 if (myexponent
==0) // denormal
2990 APFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
)
2992 assert(api
.getBitWidth()==128);
2993 uint64_t i1
= api
.getRawData()[0];
2994 uint64_t i2
= api
.getRawData()[1];
2995 uint64_t myexponent
= (i1
>> 52) & 0x7ff;
2996 uint64_t mysignificand
= i1
& 0xfffffffffffffLL
;
2997 uint64_t myexponent2
= (i2
>> 52) & 0x7ff;
2998 uint64_t mysignificand2
= i2
& 0xfffffffffffffLL
;
3000 initialize(&APFloat::PPCDoubleDouble
);
3001 assert(partCount()==2);
3003 sign
= static_cast<unsigned int>(i1
>>63);
3004 sign2
= static_cast<unsigned int>(i2
>>63);
3005 if (myexponent
==0 && mysignificand
==0) {
3006 // exponent, significand meaningless
3007 // exponent2 and significand2 are required to be 0; we don't check
3009 } else if (myexponent
==0x7ff && mysignificand
==0) {
3010 // exponent, significand meaningless
3011 // exponent2 and significand2 are required to be 0; we don't check
3012 category
= fcInfinity
;
3013 } else if (myexponent
==0x7ff && mysignificand
!=0) {
3014 // exponent meaningless. So is the whole second word, but keep it
3017 exponent2
= myexponent2
;
3018 significandParts()[0] = mysignificand
;
3019 significandParts()[1] = mysignificand2
;
3021 category
= fcNormal
;
3022 // Note there is no category2; the second word is treated as if it is
3023 // fcNormal, although it might be something else considered by itself.
3024 exponent
= myexponent
- 1023;
3025 exponent2
= myexponent2
- 1023;
3026 significandParts()[0] = mysignificand
;
3027 significandParts()[1] = mysignificand2
;
3028 if (myexponent
==0) // denormal
3031 significandParts()[0] |= 0x10000000000000LL
; // integer bit
3035 significandParts()[1] |= 0x10000000000000LL
; // integer bit
3040 APFloat::initFromQuadrupleAPInt(const APInt
&api
)
3042 assert(api
.getBitWidth()==128);
3043 uint64_t i1
= api
.getRawData()[0];
3044 uint64_t i2
= api
.getRawData()[1];
3045 uint64_t myexponent
= (i2
>> 48) & 0x7fff;
3046 uint64_t mysignificand
= i1
;
3047 uint64_t mysignificand2
= i2
& 0xffffffffffffLL
;
3049 initialize(&APFloat::IEEEquad
);
3050 assert(partCount()==2);
3052 sign
= static_cast<unsigned int>(i2
>>63);
3053 if (myexponent
==0 &&
3054 (mysignificand
==0 && mysignificand2
==0)) {
3055 // exponent, significand meaningless
3057 } else if (myexponent
==0x7fff &&
3058 (mysignificand
==0 && mysignificand2
==0)) {
3059 // exponent, significand meaningless
3060 category
= fcInfinity
;
3061 } else if (myexponent
==0x7fff &&
3062 (mysignificand
!=0 || mysignificand2
!=0)) {
3063 // exponent meaningless
3065 significandParts()[0] = mysignificand
;
3066 significandParts()[1] = mysignificand2
;
3068 category
= fcNormal
;
3069 exponent
= myexponent
- 16383;
3070 significandParts()[0] = mysignificand
;
3071 significandParts()[1] = mysignificand2
;
3072 if (myexponent
==0) // denormal
3075 significandParts()[1] |= 0x1000000000000LL
; // integer bit
3080 APFloat::initFromDoubleAPInt(const APInt
&api
)
3082 assert(api
.getBitWidth()==64);
3083 uint64_t i
= *api
.getRawData();
3084 uint64_t myexponent
= (i
>> 52) & 0x7ff;
3085 uint64_t mysignificand
= i
& 0xfffffffffffffLL
;
3087 initialize(&APFloat::IEEEdouble
);
3088 assert(partCount()==1);
3090 sign
= static_cast<unsigned int>(i
>>63);
3091 if (myexponent
==0 && mysignificand
==0) {
3092 // exponent, significand meaningless
3094 } else if (myexponent
==0x7ff && mysignificand
==0) {
3095 // exponent, significand meaningless
3096 category
= fcInfinity
;
3097 } else if (myexponent
==0x7ff && mysignificand
!=0) {
3098 // exponent meaningless
3100 *significandParts() = mysignificand
;
3102 category
= fcNormal
;
3103 exponent
= myexponent
- 1023;
3104 *significandParts() = mysignificand
;
3105 if (myexponent
==0) // denormal
3108 *significandParts() |= 0x10000000000000LL
; // integer bit
3113 APFloat::initFromFloatAPInt(const APInt
& api
)
3115 assert(api
.getBitWidth()==32);
3116 uint32_t i
= (uint32_t)*api
.getRawData();
3117 uint32_t myexponent
= (i
>> 23) & 0xff;
3118 uint32_t mysignificand
= i
& 0x7fffff;
3120 initialize(&APFloat::IEEEsingle
);
3121 assert(partCount()==1);
3124 if (myexponent
==0 && mysignificand
==0) {
3125 // exponent, significand meaningless
3127 } else if (myexponent
==0xff && mysignificand
==0) {
3128 // exponent, significand meaningless
3129 category
= fcInfinity
;
3130 } else if (myexponent
==0xff && mysignificand
!=0) {
3131 // sign, exponent, significand meaningless
3133 *significandParts() = mysignificand
;
3135 category
= fcNormal
;
3136 exponent
= myexponent
- 127; //bias
3137 *significandParts() = mysignificand
;
3138 if (myexponent
==0) // denormal
3141 *significandParts() |= 0x800000; // integer bit
3146 APFloat::initFromHalfAPInt(const APInt
& api
)
3148 assert(api
.getBitWidth()==16);
3149 uint32_t i
= (uint32_t)*api
.getRawData();
3150 uint32_t myexponent
= (i
>> 10) & 0x1f;
3151 uint32_t mysignificand
= i
& 0x3ff;
3153 initialize(&APFloat::IEEEhalf
);
3154 assert(partCount()==1);
3157 if (myexponent
==0 && mysignificand
==0) {
3158 // exponent, significand meaningless
3160 } else if (myexponent
==0x1f && mysignificand
==0) {
3161 // exponent, significand meaningless
3162 category
= fcInfinity
;
3163 } else if (myexponent
==0x1f && mysignificand
!=0) {
3164 // sign, exponent, significand meaningless
3166 *significandParts() = mysignificand
;
3168 category
= fcNormal
;
3169 exponent
= myexponent
- 15; //bias
3170 *significandParts() = mysignificand
;
3171 if (myexponent
==0) // denormal
3174 *significandParts() |= 0x400; // integer bit
3178 /// Treat api as containing the bits of a floating point number. Currently
3179 /// we infer the floating point type from the size of the APInt. The
3180 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3181 /// when the size is anything else).
3183 APFloat::initFromAPInt(const APInt
& api
, bool isIEEE
)
3185 if (api
.getBitWidth() == 16)
3186 return initFromHalfAPInt(api
);
3187 else if (api
.getBitWidth() == 32)
3188 return initFromFloatAPInt(api
);
3189 else if (api
.getBitWidth()==64)
3190 return initFromDoubleAPInt(api
);
3191 else if (api
.getBitWidth()==80)
3192 return initFromF80LongDoubleAPInt(api
);
3193 else if (api
.getBitWidth()==128)
3195 initFromQuadrupleAPInt(api
) : initFromPPCDoubleDoubleAPInt(api
));
3197 llvm_unreachable(0);
3201 APFloat::getAllOnesValue(unsigned BitWidth
, bool isIEEE
)
3203 return APFloat(APInt::getAllOnesValue(BitWidth
), isIEEE
);
3206 APFloat
APFloat::getLargest(const fltSemantics
&Sem
, bool Negative
) {
3207 APFloat
Val(Sem
, fcNormal
, Negative
);
3209 // We want (in interchange format):
3210 // sign = {Negative}
3212 // significand = 1..1
3214 Val
.exponent
= Sem
.maxExponent
; // unbiased
3216 // 1-initialize all bits....
3217 Val
.zeroSignificand();
3218 integerPart
*significand
= Val
.significandParts();
3219 unsigned N
= partCountForBits(Sem
.precision
);
3220 for (unsigned i
= 0; i
!= N
; ++i
)
3221 significand
[i
] = ~((integerPart
) 0);
3223 // ...and then clear the top bits for internal consistency.
3225 (((integerPart
) 1) << ((Sem
.precision
% integerPartWidth
) - 1)) - 1;
3230 APFloat
APFloat::getSmallest(const fltSemantics
&Sem
, bool Negative
) {
3231 APFloat
Val(Sem
, fcNormal
, Negative
);
3233 // We want (in interchange format):
3234 // sign = {Negative}
3236 // significand = 0..01
3238 Val
.exponent
= Sem
.minExponent
; // unbiased
3239 Val
.zeroSignificand();
3240 Val
.significandParts()[0] = 1;
3244 APFloat
APFloat::getSmallestNormalized(const fltSemantics
&Sem
, bool Negative
) {
3245 APFloat
Val(Sem
, fcNormal
, Negative
);
3247 // We want (in interchange format):
3248 // sign = {Negative}
3250 // significand = 10..0
3252 Val
.exponent
= Sem
.minExponent
;
3253 Val
.zeroSignificand();
3254 Val
.significandParts()[partCountForBits(Sem
.precision
)-1] |=
3255 (((integerPart
) 1) << ((Sem
.precision
% integerPartWidth
) - 1));
3260 APFloat::APFloat(const APInt
& api
, bool isIEEE
) : exponent2(0), sign2(0) {
3261 initFromAPInt(api
, isIEEE
);
3264 APFloat::APFloat(float f
) : exponent2(0), sign2(0) {
3265 initFromAPInt(APInt::floatToBits(f
));
3268 APFloat::APFloat(double d
) : exponent2(0), sign2(0) {
3269 initFromAPInt(APInt::doubleToBits(d
));
3273 static void append(SmallVectorImpl
<char> &Buffer
,
3274 unsigned N
, const char *Str
) {
3275 unsigned Start
= Buffer
.size();
3276 Buffer
.set_size(Start
+ N
);
3277 memcpy(&Buffer
[Start
], Str
, N
);
3280 template <unsigned N
>
3281 void append(SmallVectorImpl
<char> &Buffer
, const char (&Str
)[N
]) {
3282 append(Buffer
, N
, Str
);
3285 /// Removes data from the given significand until it is no more
3286 /// precise than is required for the desired precision.
3287 void AdjustToPrecision(APInt
&significand
,
3288 int &exp
, unsigned FormatPrecision
) {
3289 unsigned bits
= significand
.getActiveBits();
3291 // 196/59 is a very slight overestimate of lg_2(10).
3292 unsigned bitsRequired
= (FormatPrecision
* 196 + 58) / 59;
3294 if (bits
<= bitsRequired
) return;
3296 unsigned tensRemovable
= (bits
- bitsRequired
) * 59 / 196;
3297 if (!tensRemovable
) return;
3299 exp
+= tensRemovable
;
3301 APInt
divisor(significand
.getBitWidth(), 1);
3302 APInt
powten(significand
.getBitWidth(), 10);
3304 if (tensRemovable
& 1)
3306 tensRemovable
>>= 1;
3307 if (!tensRemovable
) break;
3311 significand
= significand
.udiv(divisor
);
3313 // Truncate the significand down to its active bit count, but
3314 // don't try to drop below 32.
3315 unsigned newPrecision
= std::max(32U, significand
.getActiveBits());
3316 significand
= significand
.trunc(newPrecision
);
3320 void AdjustToPrecision(SmallVectorImpl
<char> &buffer
,
3321 int &exp
, unsigned FormatPrecision
) {
3322 unsigned N
= buffer
.size();
3323 if (N
<= FormatPrecision
) return;
3325 // The most significant figures are the last ones in the buffer.
3326 unsigned FirstSignificant
= N
- FormatPrecision
;
3329 // FIXME: this probably shouldn't use 'round half up'.
3331 // Rounding down is just a truncation, except we also want to drop
3332 // trailing zeros from the new result.
3333 if (buffer
[FirstSignificant
- 1] < '5') {
3334 while (buffer
[FirstSignificant
] == '0')
3337 exp
+= FirstSignificant
;
3338 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3342 // Rounding up requires a decimal add-with-carry. If we continue
3343 // the carry, the newly-introduced zeros will just be truncated.
3344 for (unsigned I
= FirstSignificant
; I
!= N
; ++I
) {
3345 if (buffer
[I
] == '9') {
3353 // If we carried through, we have exactly one digit of precision.
3354 if (FirstSignificant
== N
) {
3355 exp
+= FirstSignificant
;
3357 buffer
.push_back('1');
3361 exp
+= FirstSignificant
;
3362 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3366 void APFloat::toString(SmallVectorImpl
<char> &Str
,
3367 unsigned FormatPrecision
,
3368 unsigned FormatMaxPadding
) const {
3372 return append(Str
, "-Inf");
3374 return append(Str
, "+Inf");
3376 case fcNaN
: return append(Str
, "NaN");
3382 if (!FormatMaxPadding
)
3383 append(Str
, "0.0E+0");
3395 // Decompose the number into an APInt and an exponent.
3396 int exp
= exponent
- ((int) semantics
->precision
- 1);
3397 APInt
significand(semantics
->precision
,
3398 partCountForBits(semantics
->precision
),
3399 significandParts());
3401 // Set FormatPrecision if zero. We want to do this before we
3402 // truncate trailing zeros, as those are part of the precision.
3403 if (!FormatPrecision
) {
3404 // It's an interesting question whether to use the nominal
3405 // precision or the active precision here for denormals.
3407 // FormatPrecision = ceil(significandBits / lg_2(10))
3408 FormatPrecision
= (semantics
->precision
* 59 + 195) / 196;
3411 // Ignore trailing binary zeros.
3412 int trailingZeros
= significand
.countTrailingZeros();
3413 exp
+= trailingZeros
;
3414 significand
= significand
.lshr(trailingZeros
);
3416 // Change the exponent from 2^e to 10^e.
3419 } else if (exp
> 0) {
3421 significand
= significand
.zext(semantics
->precision
+ exp
);
3422 significand
<<= exp
;
3424 } else { /* exp < 0 */
3427 // We transform this using the identity:
3428 // (N)(2^-e) == (N)(5^e)(10^-e)
3429 // This means we have to multiply N (the significand) by 5^e.
3430 // To avoid overflow, we have to operate on numbers large
3431 // enough to store N * 5^e:
3432 // log2(N * 5^e) == log2(N) + e * log2(5)
3433 // <= semantics->precision + e * 137 / 59
3434 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3436 unsigned precision
= semantics
->precision
+ 137 * texp
/ 59;
3438 // Multiply significand by 5^e.
3439 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3440 significand
= significand
.zext(precision
);
3441 APInt
five_to_the_i(precision
, 5);
3443 if (texp
& 1) significand
*= five_to_the_i
;
3447 five_to_the_i
*= five_to_the_i
;
3451 AdjustToPrecision(significand
, exp
, FormatPrecision
);
3453 llvm::SmallVector
<char, 256> buffer
;
3456 unsigned precision
= significand
.getBitWidth();
3457 APInt
ten(precision
, 10);
3458 APInt
digit(precision
, 0);
3460 bool inTrail
= true;
3461 while (significand
!= 0) {
3462 // digit <- significand % 10
3463 // significand <- significand / 10
3464 APInt::udivrem(significand
, ten
, significand
, digit
);
3466 unsigned d
= digit
.getZExtValue();
3468 // Drop trailing zeros.
3469 if (inTrail
&& !d
) exp
++;
3471 buffer
.push_back((char) ('0' + d
));
3476 assert(!buffer
.empty() && "no characters in buffer!");
3478 // Drop down to FormatPrecision.
3479 // TODO: don't do more precise calculations above than are required.
3480 AdjustToPrecision(buffer
, exp
, FormatPrecision
);
3482 unsigned NDigits
= buffer
.size();
3484 // Check whether we should use scientific notation.
3485 bool FormatScientific
;
3486 if (!FormatMaxPadding
)
3487 FormatScientific
= true;
3492 // But we shouldn't make the number look more precise than it is.
3493 FormatScientific
= ((unsigned) exp
> FormatMaxPadding
||
3494 NDigits
+ (unsigned) exp
> FormatPrecision
);
3496 // Power of the most significant digit.
3497 int MSD
= exp
+ (int) (NDigits
- 1);
3500 FormatScientific
= false;
3502 // 765e-5 == 0.00765
3504 FormatScientific
= ((unsigned) -MSD
) > FormatMaxPadding
;
3509 // Scientific formatting is pretty straightforward.
3510 if (FormatScientific
) {
3511 exp
+= (NDigits
- 1);
3513 Str
.push_back(buffer
[NDigits
-1]);
3518 for (unsigned I
= 1; I
!= NDigits
; ++I
)
3519 Str
.push_back(buffer
[NDigits
-1-I
]);
3522 Str
.push_back(exp
>= 0 ? '+' : '-');
3523 if (exp
< 0) exp
= -exp
;
3524 SmallVector
<char, 6> expbuf
;
3526 expbuf
.push_back((char) ('0' + (exp
% 10)));
3529 for (unsigned I
= 0, E
= expbuf
.size(); I
!= E
; ++I
)
3530 Str
.push_back(expbuf
[E
-1-I
]);
3534 // Non-scientific, positive exponents.
3536 for (unsigned I
= 0; I
!= NDigits
; ++I
)
3537 Str
.push_back(buffer
[NDigits
-1-I
]);
3538 for (unsigned I
= 0; I
!= (unsigned) exp
; ++I
)
3543 // Non-scientific, negative exponents.
3545 // The number of digits to the left of the decimal point.
3546 int NWholeDigits
= exp
+ (int) NDigits
;
3549 if (NWholeDigits
> 0) {
3550 for (; I
!= (unsigned) NWholeDigits
; ++I
)
3551 Str
.push_back(buffer
[NDigits
-I
-1]);
3554 unsigned NZeros
= 1 + (unsigned) -NWholeDigits
;
3558 for (unsigned Z
= 1; Z
!= NZeros
; ++Z
)
3562 for (; I
!= NDigits
; ++I
)
3563 Str
.push_back(buffer
[NDigits
-I
-1]);
3566 bool APFloat::getExactInverse(APFloat
*inv
) const {
3567 // We can only guarantee the existence of an exact inverse for IEEE floats.
3568 if (semantics
!= &IEEEhalf
&& semantics
!= &IEEEsingle
&&
3569 semantics
!= &IEEEdouble
&& semantics
!= &IEEEquad
)
3572 // Special floats and denormals have no exact inverse.
3573 if (category
!= fcNormal
)
3576 // Check that the number is a power of two by making sure that only the
3577 // integer bit is set in the significand.
3578 if (significandLSB() != semantics
->precision
- 1)
3582 APFloat
reciprocal(*semantics
, 1ULL);
3583 if (reciprocal
.divide(*this, rmNearestTiesToEven
) != opOK
)
3586 // Avoid multiplication with a denormal, it is not safe on all platforms and
3587 // may be slower than a normal division.
3588 if (reciprocal
.significandMSB() + 1 < reciprocal
.semantics
->precision
)
3591 assert(reciprocal
.category
== fcNormal
&&
3592 reciprocal
.significandLSB() == reciprocal
.semantics
->precision
- 1);