1 //===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
10 #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
12 // This is a helper class for use in floating-point conversions between
13 // binary and decimal representations. It holds a multiple-precision
14 // integer value using digits of a radix that is a large even power of ten
15 // (10,000,000,000,000,000 by default, 10**16). These digits are accompanied
16 // by a signed exponent that denotes multiplication by a power of ten.
17 // The effective radix point is to the right of the digits (i.e., they do
18 // not represent a fraction).
20 // The operations supported by this class are limited to those required
21 // for conversions between binary and decimal representations; it is not
22 // a general-purpose facility.
24 #include "flang/Common/bit-population-count.h"
25 #include "flang/Common/leading-zero-bit-count.h"
26 #include "flang/Common/uint128.h"
27 #include "flang/Decimal/binary-floating-point.h"
28 #include "flang/Decimal/decimal.h"
31 #include <type_traits>
33 namespace Fortran::decimal
{
35 static constexpr std::uint64_t TenToThe(int power
) {
36 return power
<= 0 ? 1 : 10 * TenToThe(power
- 1);
39 // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be
40 // even, so that pairs of decimal digits do not straddle Digits.
41 // So LOG10RADIX must be 16 or 6.
42 template <int PREC
, int LOG10RADIX
= 16> class BigRadixFloatingPointNumber
{
44 using Real
= BinaryFloatingPointNumber
<PREC
>;
45 static constexpr int log10Radix
{LOG10RADIX
};
48 static constexpr std::uint64_t uint64Radix
{TenToThe(log10Radix
)};
49 static constexpr int minDigitBits
{
50 64 - common::LeadingZeroBitCount(uint64Radix
)};
51 using Digit
= common::HostUnsignedIntType
<minDigitBits
>;
52 static constexpr Digit radix
{uint64Radix
};
53 static_assert(radix
< std::numeric_limits
<Digit
>::max() / 1000,
54 "radix is somehow too big");
55 static_assert(radix
> std::numeric_limits
<Digit
>::max() / 10000,
56 "radix is somehow too small");
58 // The base-2 logarithm of the least significant bit that can arise
59 // in a subnormal IEEE floating-point number.
60 static constexpr int minLog2AnyBit
{
61 -Real::exponentBias
- Real::binaryPrecision
};
63 // The number of Digits needed to represent the smallest subnormal.
64 static constexpr int maxDigits
{3 - minLog2AnyBit
/ log10Radix
};
67 explicit BigRadixFloatingPointNumber(
68 enum FortranRounding rounding
= RoundNearest
)
69 : rounding_
{rounding
} {}
71 // Converts a binary floating point value.
72 explicit BigRadixFloatingPointNumber(
73 Real
, enum FortranRounding
= RoundNearest
);
75 BigRadixFloatingPointNumber
&SetToZero() {
82 // Converts decimal floating-point to binary.
83 ConversionToBinaryResult
<PREC
> ConvertToBinary();
85 // Parses and converts to binary. Handles leading spaces,
86 // "NaN", & optionally-signed "Inf". Does not skip internal
88 // The argument is a reference to a pointer that is left
89 // pointing to the first character that wasn't parsed.
90 ConversionToBinaryResult
<PREC
> ConvertToBinary(
91 const char *&, const char *end
= nullptr);
93 // Formats a decimal floating-point number to a user buffer.
94 // May emit "NaN" or "Inf", or an possibly-signed integer.
95 // No decimal point is written, but if it were, it would be
96 // after the last digit; the effective decimal exponent is
97 // returned as part of the result structure so that it can be
98 // formatted by the client.
99 ConversionToDecimalResult
ConvertToDecimal(
100 char *, std::size_t, enum DecimalConversionFlags
, int digits
) const;
102 // Discard decimal digits not needed to distinguish this value
103 // from the decimal encodings of two others (viz., the nearest binary
104 // floating-point numbers immediately below and above this one).
105 // The last decimal digit may not be uniquely determined in all
106 // cases, and will be the mean value when that is so (e.g., if
107 // last decimal digit values 6-8 would all work, it'll be a 7).
108 // This minimization necessarily assumes that the value will be
109 // emitted and read back into the same (or less precise) format
110 // with default rounding to the nearest value.
112 BigRadixFloatingPointNumber
&&less
, BigRadixFloatingPointNumber
&&more
);
114 template <typename STREAM
> STREAM
&Dump(STREAM
&) const;
117 BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber
&that
)
118 : digits_
{that
.digits_
}, exponent_
{that
.exponent_
},
119 isNegative_
{that
.isNegative_
}, rounding_
{that
.rounding_
} {
120 for (int j
{0}; j
< digits_
; ++j
) {
121 digit_
[j
] = that
.digit_
[j
];
125 bool IsZero() const {
126 // Don't assume normalization.
127 for (int j
{0}; j
< digits_
; ++j
) {
128 if (digit_
[j
] != 0) {
135 // Predicate: true when 10*value would cause a carry.
136 // (When this happens during decimal-to-binary conversion,
137 // there are more digits in the input string than can be
138 // represented precisely.)
139 bool IsFull() const {
140 return digits_
== digitLimit_
&& digit_
[digits_
- 1] >= radix
/ 10;
143 // Sets *this to an unsigned integer value.
144 // Returns any remainder.
145 template <typename UINT
> UINT
SetTo(UINT n
) {
147 std::is_same_v
<UINT
, common::uint128_t
> || std::is_unsigned_v
<UINT
>);
157 if constexpr (sizeof n
< sizeof(Digit
)) {
159 digit_
[digits_
++] = n
;
163 while (n
!= 0 && digits_
< digitLimit_
) {
165 digit_
[digits_
++] = static_cast<Digit
>(n
- q
* radix
);
172 int RemoveLeastOrderZeroDigits() {
174 if (digits_
> 0 && digit_
[0] == 0) {
175 while (remove
< digits_
&& digit_
[remove
] == 0) {
178 if (remove
>= digits_
) {
180 } else if (remove
> 0) {
181 #if defined __GNUC__ && __GNUC__ < 8
182 // (&& j + remove < maxDigits) was added to avoid GCC < 8 build failure
183 // on -Werror=array-bounds. This can be removed if -Werror is disable.
184 for (int j
{0}; j
+ remove
< digits_
&& (j
+ remove
< maxDigits
); ++j
) {
186 for (int j
{0}; j
+ remove
< digits_
; ++j
) {
188 digit_
[j
] = digit_
[j
+ remove
];
196 void RemoveLeadingZeroDigits() {
197 while (digits_
> 0 && digit_
[digits_
- 1] == 0) {
203 RemoveLeadingZeroDigits();
204 exponent_
+= RemoveLeastOrderZeroDigits() * log10Radix
;
207 // This limited divisibility test only works for even divisors of the radix,
208 // which is fine since it's only ever used with 2 and 5.
209 template <int N
> bool IsDivisibleBy() const {
210 static_assert(N
> 1 && radix
% N
== 0, "bad modulus");
211 return digits_
== 0 || (digit_
[0] % N
) == 0;
214 template <unsigned DIVISOR
> int DivideBy() {
216 for (int j
{digits_
- 1}; j
>= 0; --j
) {
217 Digit q
{digit_
[j
] / DIVISOR
};
218 Digit nrem
{digit_
[j
] - DIVISOR
* q
};
219 digit_
[j
] = q
+ (radix
/ DIVISOR
) * remainder
;
225 void DivideByPowerOfTwo(int twoPow
) { // twoPow <= log10Radix
227 auto mask
{(Digit
{1} << twoPow
) - 1};
228 auto coeff
{radix
>> twoPow
};
229 for (int j
{digits_
- 1}; j
>= 0; --j
) {
230 auto nrem
{digit_
[j
] & mask
};
231 digit_
[j
] = (digit_
[j
] >> twoPow
) + coeff
* remainder
;
236 // Returns true on overflow
237 bool DivideByPowerOfTwoInPlace(int twoPow
) {
240 int chunk
{twoPow
> log10Radix
? log10Radix
: twoPow
};
241 if ((digit_
[0] & ((Digit
{1} << chunk
) - 1)) == 0) {
242 DivideByPowerOfTwo(chunk
);
247 if (digit_
[digits_
- 1] >> chunk
!= 0) {
248 if (digits_
== digitLimit_
) {
249 return true; // overflow
251 digit_
[digits_
++] = 0;
253 auto remainder
{digit_
[digits_
- 1]};
254 exponent_
-= log10Radix
;
255 auto coeff
{radix
>> chunk
}; // precise; radix is (5*2)**log10Radix
256 auto mask
{(Digit
{1} << chunk
) - 1};
257 for (int j
{digits_
- 1}; j
>= 1; --j
) {
258 digit_
[j
] = (digit_
[j
- 1] >> chunk
) + coeff
* remainder
;
259 remainder
= digit_
[j
- 1] & mask
;
261 digit_
[0] = coeff
* remainder
;
264 return false; // no overflow
267 int AddCarry(int position
= 0, int carry
= 1) {
268 for (; position
< digits_
; ++position
) {
269 Digit v
{digit_
[position
] + carry
};
271 digit_
[position
] = v
;
274 digit_
[position
] = v
- radix
;
277 if (digits_
< digitLimit_
) {
278 digit_
[digits_
++] = carry
;
282 if (digits_
< digitLimit_
) {
283 digit_
[digits_
++] = carry
;
290 for (int j
{0}; digit_
[j
]-- == 0; ++j
) {
291 digit_
[j
] = radix
- 1;
295 template <int N
> int MultiplyByHelper(int carry
= 0) {
296 for (int j
{0}; j
< digits_
; ++j
) {
297 auto v
{N
* digit_
[j
] + carry
};
299 digit_
[j
] = v
- carry
* radix
; // i.e., v % radix
304 template <int N
> int MultiplyBy(int carry
= 0) {
305 if (int newCarry
{MultiplyByHelper
<N
>(carry
)}) {
306 return AddCarry(digits_
, newCarry
);
312 template <int N
> int MultiplyWithoutNormalization() {
313 if (int carry
{MultiplyByHelper
<N
>(0)}) {
314 if (digits_
< digitLimit_
) {
315 digit_
[digits_
++] = carry
;
325 void LoseLeastSignificantDigit(); // with rounding
327 void PushCarry(int carry
) {
328 if (digits_
== maxDigits
&& RemoveLeastOrderZeroDigits() == 0) {
329 LoseLeastSignificantDigit();
330 digit_
[digits_
- 1] += carry
;
332 digit_
[digits_
++] = carry
;
336 // Adds another number and then divides by two.
337 // Assumes same exponent and sign.
338 // Returns true when the the result has effectively been rounded down.
339 bool Mean(const BigRadixFloatingPointNumber
&);
341 // Parses a floating-point number; leaves the pointer reference
342 // argument pointing at the next character after what was recognized.
343 // The "end" argument can be left null if the caller is sure that the
344 // string is properly terminated with an addressable character that
345 // can't be in a valid floating-point character.
346 bool ParseNumber(const char *&, bool &inexact
, const char *end
);
348 using Raw
= typename
Real::RawType
;
349 constexpr Raw
SignBit() const { return Raw
{isNegative_
} << (Real::bits
- 1); }
350 constexpr Raw
Infinity() const {
351 return (Raw
{Real::maxExponent
} << Real::significandBits
) | SignBit();
353 static constexpr Raw
NaN() {
354 return (Raw
{Real::maxExponent
} << Real::significandBits
) |
355 (Raw
{1} << (Real::significandBits
- 2));
358 Digit digit_
[maxDigits
]; // in little-endian order: digit_[0] is LSD
359 int digits_
{0}; // # of elements in digit_[] array; zero when zero
360 int digitLimit_
{maxDigits
}; // precision clamp
361 int exponent_
{0}; // signed power of ten
362 bool isNegative_
{false};
363 enum FortranRounding rounding_
{ RoundNearest
};
365 } // namespace Fortran::decimal