1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "big-radix-floating-point.h"
10 #include "flang/Common/bit-population-count.h"
11 #include "flang/Common/leading-zero-bit-count.h"
12 #include "flang/Decimal/binary-floating-point.h"
13 #include "flang/Decimal/decimal.h"
18 namespace Fortran::decimal
{
20 template <int PREC
, int LOG10RADIX
>
21 bool BigRadixFloatingPointNumber
<PREC
, LOG10RADIX
>::ParseNumber(
22 const char *&p
, bool &inexact
, const char *end
) {
24 if (end
&& p
>= end
) {
27 // Skip leading spaces
28 for (; p
!= end
&& *p
== ' '; ++p
) {
34 isNegative_
= *q
== '-';
35 if (*q
== '-' || *q
== '+') {
39 for (; q
!= end
&& *q
== '0'; ++q
) {
41 const char *firstDigit
{q
};
42 for (; q
!= end
&& *q
>= '0' && *q
<= '9'; ++q
) {
44 const char *point
{nullptr};
45 if (q
!= end
&& *q
== '.') {
47 for (++q
; q
!= end
&& *q
>= '0' && *q
<= '9'; ++q
) {
50 if (q
== start
|| (q
== start
+ 1 && start
== point
)) {
51 return false; // require at least one digit
53 // There's a valid number here; set the reference argument to point to
54 // the first character afterward, which might be an exponent part.
56 // Strip off trailing zeroes
58 while (q
[-1] == '0') {
67 while (q
> firstDigit
&& q
[-1] == '0') {
72 // Trim any excess digits
73 const char *limit
{firstDigit
+ maxDigits
* log10Radix
+ (point
!= nullptr)};
81 exponent_
+= q
- limit
;
86 exponent_
-= static_cast<int>(q
- point
- 1);
88 if (q
== firstDigit
) {
89 exponent_
= 0; // all zeros
91 // Rack the decimal digits up into big Digits.
92 for (auto times
{radix
}; q
-- > firstDigit
;) {
95 digit_
[digits_
++] = *q
- '0';
98 digit_
[digits_
- 1] += times
* (*q
- '0');
103 // Look for an optional exponent field.
118 bool negExpo
{*q
== '-'};
119 if (*q
== '-' || *q
== '+') {
122 if (q
!= end
&& *q
>= '0' && *q
<= '9') {
124 for (; q
!= end
&& *q
== '0'; ++q
) {
126 const char *expDig
{q
};
127 for (; q
!= end
&& *q
>= '0' && *q
<= '9'; ++q
) {
128 expo
= 10 * expo
+ *q
- '0';
130 if (q
>= expDig
+ 8) {
131 // There's a ridiculous number of nonzero exponent digits.
132 // The decimal->binary conversion routine will cope with
133 // returning 0 or Inf, but we must ensure that "expo" didn't
134 // overflow back around to something legal.
135 expo
= 10 * Real::decimalRange
;
138 p
= q
; // exponent is valid; advance the termination pointer
152 template <int PREC
, int LOG10RADIX
>
153 void BigRadixFloatingPointNumber
<PREC
,
154 LOG10RADIX
>::LoseLeastSignificantDigit() {
155 Digit LSD
{digit_
[0]};
156 for (int j
{0}; j
< digits_
- 1; ++j
) {
157 digit_
[j
] = digit_
[j
+ 1];
159 digit_
[digits_
- 1] = 0;
163 incr
= LSD
> radix
/ 2 || (LSD
== radix
/ 2 && digit_
[0] % 2 != 0);
166 incr
= LSD
> 0 && !isNegative_
;
169 incr
= LSD
> 0 && isNegative_
;
173 case RoundCompatible
:
174 incr
= LSD
>= radix
/ 2;
177 for (int j
{0}; (digit_
[j
] += incr
) == radix
; ++j
) {
182 // This local utility class represents an unrounded nonnegative
183 // binary floating-point value with an unbiased (i.e., signed)
184 // binary exponent, an integer value (not a fraction) with an implied
185 // binary point to its *right*, and some guard bits for rounding.
186 template <int PREC
> class IntermediateFloat
{
188 static constexpr int precision
{PREC
};
189 using IntType
= common::HostUnsignedIntType
<precision
>;
190 static constexpr IntType topBit
{IntType
{1} << (precision
- 1)};
191 static constexpr IntType mask
{topBit
+ (topBit
- 1)};
193 IntermediateFloat() {}
194 IntermediateFloat(const IntermediateFloat
&) = default;
196 // Assumes that exponent_ is valid on entry, and may increment it.
197 // Returns the number of guard_ bits that have been determined.
198 template <typename UINT
> bool SetTo(UINT n
) {
199 static constexpr int nBits
{CHAR_BIT
* sizeof n
};
200 if constexpr (precision
>= nBits
) {
205 int shift
{common::BitsNeededFor(n
) - precision
};
214 guard_
= (n
>> (nBits
- guardBits
)) | ((n
<< guardBits
) != 0);
220 void ShiftIn(int bit
= 0) { value_
= value_
+ value_
+ bit
; }
221 bool IsFull() const { return value_
>= topBit
; }
222 void AdjustExponent(int by
) { exponent_
+= by
; }
223 void SetGuard(int g
) {
224 guard_
|= (static_cast<GuardType
>(g
& 6) << (guardBits
- 3)) | (g
& 1);
227 ConversionToBinaryResult
<PREC
> ToBinary(
228 bool isNegative
, FortranRounding
) const;
231 static constexpr int guardBits
{3}; // guard, round, sticky
232 using GuardType
= int;
233 static constexpr GuardType oneHalf
{GuardType
{1} << (guardBits
- 1)};
241 ConversionToBinaryResult
<PREC
> IntermediateFloat
<PREC
>::ToBinary(
242 bool isNegative
, FortranRounding rounding
) const {
243 using Binary
= BinaryFloatingPointNumber
<PREC
>;
244 // Create a fraction with a binary point to the left of the integer
245 // value_, and bias the exponent.
246 IntType fraction
{value_
};
247 GuardType guard
{guard_
};
248 int expo
{exponent_
+ Binary::exponentBias
+ (precision
- 1)};
249 while (expo
< 1 && (fraction
> 0 || guard
> oneHalf
)) {
250 guard
= (guard
& 1) | (guard
>> 1) |
251 ((static_cast<GuardType
>(fraction
) & 1) << (guardBits
- 1));
259 if (fraction
== 0 && guard
<= oneHalf
) {
260 return {Binary
{}, static_cast<enum ConversionResultFlags
>(flags
)};
262 // The value is nonzero; normalize it.
263 while (fraction
< topBit
&& expo
> 1) {
265 fraction
= fraction
* 2 + (guard
>> (guardBits
- 2));
266 guard
= (((guard
>> (guardBits
- 2)) & 1) << (guardBits
- 1)) | (guard
& 1);
272 incr
= guard
> oneHalf
|| (guard
== oneHalf
&& (fraction
& 1));
275 incr
= guard
!= 0 && !isNegative
;
278 incr
= guard
!= 0 && isNegative
;
282 case RoundCompatible
:
283 incr
= guard
>= oneHalf
;
287 if (fraction
== mask
) {
288 // rounding causes a carry
295 if (expo
== 1 && fraction
< topBit
) {
296 expo
= 0; // subnormal
298 if (expo
>= Binary::maxExponent
) {
299 expo
= Binary::maxExponent
; // Inf
303 using Raw
= typename
Binary::RawType
;
304 Raw raw
= static_cast<Raw
>(isNegative
) << (Binary::bits
- 1);
305 raw
|= static_cast<Raw
>(expo
) << Binary::significandBits
;
306 if constexpr (Binary::isImplicitMSB
) {
310 return {Binary(raw
), static_cast<enum ConversionResultFlags
>(flags
)};
313 template <int PREC
, int LOG10RADIX
>
314 ConversionToBinaryResult
<PREC
>
315 BigRadixFloatingPointNumber
<PREC
, LOG10RADIX
>::ConvertToBinary() {
316 // On entry, *this holds a multi-precision integer value in a radix of a
317 // large power of ten. Its radix point is defined to be to the right of its
318 // digits, and "exponent_" is the power of ten by which it is to be scaled.
320 if (digits_
== 0) { // zero value
321 return {Real
{SignBit()}};
323 // The value is not zero: x = D. * 10.**E
324 // Shift our perspective on the radix (& decimal) point so that
325 // it sits to the *left* of the digits: i.e., x = .D * 10.**E
326 exponent_
+= digits_
* log10Radix
;
327 // Sanity checks for ridiculous exponents
328 static constexpr int crazy
{2 * Real::decimalRange
+ log10Radix
};
329 if (exponent_
< -crazy
) { // underflow to +/-0.
330 return {Real
{SignBit()}, Inexact
};
331 } else if (exponent_
> crazy
) { // overflow to +/-Inf.
332 return {Real
{Infinity()}, Overflow
};
334 // Apply any negative decimal exponent by multiplication
335 // by a power of two, adjusting the binary exponent to compensate.
336 IntermediateFloat
<PREC
> f
;
337 while (exponent_
< log10Radix
) {
338 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
339 f
.AdjustExponent(-9);
340 digitLimit_
= digits_
;
341 if (int carry
{MultiplyWithoutNormalization
<512>()}) {
342 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
344 exponent_
+= log10Radix
;
347 // Apply any positive decimal exponent greater than
348 // is needed to treat the topmost digit as an integer
349 // part by multiplying by 10 or 10000 repeatedly.
350 while (exponent_
> log10Radix
) {
351 digitLimit_
= digits_
;
353 if (exponent_
>= log10Radix
+ 4) {
354 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
356 carry
= MultiplyWithoutNormalization
<(5 * 5 * 5 * 5)>();
359 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
361 carry
= MultiplyWithoutNormalization
<5>();
365 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
367 exponent_
+= log10Radix
;
370 // So exponent_ is now log10Radix, meaning that the
371 // MSD can be taken as an integer part and transferred
372 // to the binary result.
373 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
374 int guardShift
{f
.SetTo(digit_
[--digits_
])};
375 // Transfer additional bits until the result is normal.
376 digitLimit_
= digits_
;
377 while (!f
.IsFull()) {
378 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
379 f
.AdjustExponent(-1);
380 std::uint32_t carry
= MultiplyWithoutNormalization
<2>();
383 // Get the next few bits for rounding. Allow for some guard bits
384 // that may have already been set in f.SetTo() above.
386 if (guardShift
== 0) {
387 guard
= MultiplyWithoutNormalization
<4>();
388 } else if (guardShift
== 1) {
389 guard
= MultiplyWithoutNormalization
<2>();
391 guard
= guard
+ guard
+ !IsZero();
393 return f
.ToBinary(isNegative_
, rounding_
);
396 template <int PREC
, int LOG10RADIX
>
397 ConversionToBinaryResult
<PREC
>
398 BigRadixFloatingPointNumber
<PREC
, LOG10RADIX
>::ConvertToBinary(
399 const char *&p
, const char *limit
) {
401 if (ParseNumber(p
, inexact
, limit
)) {
402 auto result
{ConvertToBinary()};
405 static_cast<enum ConversionResultFlags
>(result
.flags
| Inexact
);
409 // Could not parse a decimal floating-point number. p has been
410 // advanced over any leading spaces.
411 if ((!limit
|| limit
>= p
+ 3) && toupper(p
[0]) == 'N' &&
412 toupper(p
[1]) == 'A' && toupper(p
[2]) == 'N') {
415 if ((!limit
|| p
< limit
) && *p
== '(') {
419 if (limit
&& p
>= limit
) {
421 return {Real
{NaN()}, Invalid
};
422 } else if (*p
== '(') {
424 } else if (*p
== ')') {
430 return {Real
{NaN()}};
432 // Try to parse Inf, maybe with a sign
434 if (!limit
|| q
< limit
) {
435 isNegative_
= *q
== '-';
436 if (isNegative_
|| *q
== '+') {
440 if ((!limit
|| limit
>= q
+ 3) && toupper(q
[0]) == 'I' &&
441 toupper(q
[1]) == 'N' && toupper(q
[2]) == 'F') {
442 if ((!limit
|| limit
>= q
+ 8) && toupper(q
[3]) == 'I' &&
443 toupper(q
[4]) == 'N' && toupper(q
[5]) == 'I' &&
444 toupper(q
[6]) == 'T' && toupper(q
[7]) == 'Y') {
449 return {Real
{Infinity()}};
452 return {Real
{NaN()}, Invalid
};
459 ConversionToBinaryResult
<PREC
> ConvertToBinary(
460 const char *&p
, enum FortranRounding rounding
, const char *end
) {
461 return BigRadixFloatingPointNumber
<PREC
>{rounding
}.ConvertToBinary(p
, end
);
464 template ConversionToBinaryResult
<8> ConvertToBinary
<8>(
465 const char *&, enum FortranRounding
, const char *end
);
466 template ConversionToBinaryResult
<11> ConvertToBinary
<11>(
467 const char *&, enum FortranRounding
, const char *end
);
468 template ConversionToBinaryResult
<24> ConvertToBinary
<24>(
469 const char *&, enum FortranRounding
, const char *end
);
470 template ConversionToBinaryResult
<53> ConvertToBinary
<53>(
471 const char *&, enum FortranRounding
, const char *end
);
472 template ConversionToBinaryResult
<64> ConvertToBinary
<64>(
473 const char *&, enum FortranRounding
, const char *end
);
474 template ConversionToBinaryResult
<113> ConvertToBinary
<113>(
475 const char *&, enum FortranRounding
, const char *end
);
478 enum ConversionResultFlags
ConvertDecimalToFloat(
479 const char **p
, float *f
, enum FortranRounding rounding
) {
480 auto result
{Fortran::decimal::ConvertToBinary
<24>(*p
, rounding
)};
481 std::memcpy(reinterpret_cast<void *>(f
),
482 reinterpret_cast<const void *>(&result
.binary
), sizeof *f
);
485 enum ConversionResultFlags
ConvertDecimalToDouble(
486 const char **p
, double *d
, enum FortranRounding rounding
) {
487 auto result
{Fortran::decimal::ConvertToBinary
<53>(*p
, rounding
)};
488 std::memcpy(reinterpret_cast<void *>(d
),
489 reinterpret_cast<const void *>(&result
.binary
), sizeof *d
);
492 enum ConversionResultFlags
ConvertDecimalToLongDouble(
493 const char **p
, long double *ld
, enum FortranRounding rounding
) {
494 auto result
{Fortran::decimal::ConvertToBinary
<64>(*p
, rounding
)};
495 std::memcpy(reinterpret_cast<void *>(ld
),
496 reinterpret_cast<const void *>(&result
.binary
), sizeof *ld
);
500 } // namespace Fortran::decimal