1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "big-radix-floating-point.h"
10 #include "flang/Common/bit-population-count.h"
11 #include "flang/Common/leading-zero-bit-count.h"
12 #include "flang/Decimal/binary-floating-point.h"
13 #include "flang/Decimal/decimal.h"
14 #include "flang/Runtime/freestanding-tools.h"
19 // Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE
20 // to leak out of <math.h>.
23 namespace Fortran::decimal
{
25 template <int PREC
, int LOG10RADIX
>
26 bool BigRadixFloatingPointNumber
<PREC
, LOG10RADIX
>::ParseNumber(
27 const char *&p
, bool &inexact
, const char *end
) {
29 if (end
&& p
>= end
) {
32 // Skip leading spaces
33 for (; p
!= end
&& *p
== ' '; ++p
) {
39 isNegative_
= *q
== '-';
40 if (*q
== '-' || *q
== '+') {
44 for (; q
!= end
&& *q
== '0'; ++q
) {
46 const char *firstDigit
{q
};
47 for (; q
!= end
&& *q
>= '0' && *q
<= '9'; ++q
) {
49 const char *point
{nullptr};
50 if (q
!= end
&& *q
== '.') {
52 for (++q
; q
!= end
&& *q
>= '0' && *q
<= '9'; ++q
) {
55 if (q
== start
|| (q
== start
+ 1 && start
== point
)) {
56 return false; // require at least one digit
58 // There's a valid number here; set the reference argument to point to
59 // the first character afterward, which might be an exponent part.
61 // Strip off trailing zeroes
63 while (q
[-1] == '0') {
72 while (q
> firstDigit
&& q
[-1] == '0') {
77 // Trim any excess digits
78 const char *limit
{firstDigit
+ maxDigits
* log10Radix
+ (point
!= nullptr)};
86 exponent_
+= q
- limit
;
91 exponent_
-= static_cast<int>(q
- point
- 1);
93 if (q
== firstDigit
) {
94 exponent_
= 0; // all zeros
96 // Rack the decimal digits up into big Digits.
97 for (auto times
{radix
}; q
-- > firstDigit
;) {
100 digit_
[digits_
++] = *q
- '0';
103 digit_
[digits_
- 1] += times
* (*q
- '0');
108 // Look for an optional exponent field.
123 bool negExpo
{*q
== '-'};
124 if (*q
== '-' || *q
== '+') {
127 if (q
!= end
&& *q
>= '0' && *q
<= '9') {
129 for (; q
!= end
&& *q
== '0'; ++q
) {
131 const char *expDig
{q
};
132 for (; q
!= end
&& *q
>= '0' && *q
<= '9'; ++q
) {
133 expo
= 10 * expo
+ *q
- '0';
135 if (q
>= expDig
+ 8) {
136 // There's a ridiculous number of nonzero exponent digits.
137 // The decimal->binary conversion routine will cope with
138 // returning 0 or Inf, but we must ensure that "expo" didn't
139 // overflow back around to something legal.
140 expo
= 10 * Real::decimalRange
;
143 p
= q
; // exponent is valid; advance the termination pointer
157 template <int PREC
, int LOG10RADIX
>
158 void BigRadixFloatingPointNumber
<PREC
,
159 LOG10RADIX
>::LoseLeastSignificantDigit() {
160 Digit LSD
{digit_
[0]};
161 for (int j
{0}; j
< digits_
- 1; ++j
) {
162 digit_
[j
] = digit_
[j
+ 1];
164 digit_
[digits_
- 1] = 0;
168 incr
= LSD
> radix
/ 2 || (LSD
== radix
/ 2 && digit_
[0] % 2 != 0);
171 incr
= LSD
> 0 && !isNegative_
;
174 incr
= LSD
> 0 && isNegative_
;
178 case RoundCompatible
:
179 incr
= LSD
>= radix
/ 2;
182 for (int j
{0}; (digit_
[j
] += incr
) == radix
; ++j
) {
187 // This local utility class represents an unrounded nonnegative
188 // binary floating-point value with an unbiased (i.e., signed)
189 // binary exponent, an integer value (not a fraction) with an implied
190 // binary point to its *right*, and some guard bits for rounding.
191 template <int PREC
> class IntermediateFloat
{
193 static constexpr int precision
{PREC
};
194 using IntType
= common::HostUnsignedIntType
<precision
>;
195 static constexpr IntType topBit
{IntType
{1} << (precision
- 1)};
196 static constexpr IntType mask
{topBit
+ (topBit
- 1)};
198 RT_API_ATTRS
IntermediateFloat() {}
199 IntermediateFloat(const IntermediateFloat
&) = default;
201 // Assumes that exponent_ is valid on entry, and may increment it.
202 // Returns the number of guard_ bits that have been determined.
203 template <typename UINT
> RT_API_ATTRS
bool SetTo(UINT n
) {
204 static constexpr int nBits
{CHAR_BIT
* sizeof n
};
205 if constexpr (precision
>= nBits
) {
210 int shift
{common::BitsNeededFor(n
) - precision
};
219 guard_
= (n
>> (nBits
- guardBits
)) | ((n
<< guardBits
) != 0);
225 RT_API_ATTRS
void ShiftIn(int bit
= 0) { value_
= value_
+ value_
+ bit
; }
226 RT_API_ATTRS
bool IsFull() const { return value_
>= topBit
; }
227 RT_API_ATTRS
void AdjustExponent(int by
) { exponent_
+= by
; }
228 RT_API_ATTRS
void SetGuard(int g
) {
229 guard_
|= (static_cast<GuardType
>(g
& 6) << (guardBits
- 3)) | (g
& 1);
232 RT_API_ATTRS ConversionToBinaryResult
<PREC
> ToBinary(
233 bool isNegative
, FortranRounding
) const;
236 static constexpr int guardBits
{3}; // guard, round, sticky
237 using GuardType
= int;
238 static constexpr GuardType oneHalf
{GuardType
{1} << (guardBits
- 1)};
245 // The standard says that these overflow cases round to "representable"
246 // numbers, and some popular compilers interpret that to mean +/-HUGE()
247 // rather than +/-Inf.
248 static inline RT_API_ATTRS
constexpr bool RoundOverflowToHuge(
249 enum FortranRounding rounding
, bool isNegative
) {
250 return rounding
== RoundToZero
|| (!isNegative
&& rounding
== RoundDown
) ||
251 (isNegative
&& rounding
== RoundUp
);
255 ConversionToBinaryResult
<PREC
> IntermediateFloat
<PREC
>::ToBinary(
256 bool isNegative
, FortranRounding rounding
) const {
257 using Binary
= BinaryFloatingPointNumber
<PREC
>;
258 // Create a fraction with a binary point to the left of the integer
259 // value_, and bias the exponent.
260 IntType fraction
{value_
};
261 GuardType guard
{guard_
};
262 int expo
{exponent_
+ Binary::exponentBias
+ (precision
- 1)};
263 while (expo
< 1 && (fraction
> 0 || guard
> oneHalf
)) {
264 guard
= (guard
& 1) | (guard
>> 1) |
265 ((static_cast<GuardType
>(fraction
) & 1) << (guardBits
- 1));
274 if (guard
<= oneHalf
) {
275 if ((!isNegative
&& rounding
== RoundUp
) ||
276 (isNegative
&& rounding
== RoundDown
)) {
277 // round to least nonzero value
279 } else { // round to zero
288 std::move(zero
), static_cast<enum ConversionResultFlags
>(flags
)};
292 // The value is nonzero; normalize it.
293 while (fraction
< topBit
&& expo
> 1) {
295 fraction
= fraction
* 2 + (guard
>> (guardBits
- 2));
297 (((guard
>> (guardBits
- 2)) & 1) << (guardBits
- 1)) | (guard
& 1);
304 incr
= guard
> oneHalf
|| (guard
== oneHalf
&& (fraction
& 1));
307 incr
= guard
!= 0 && !isNegative
;
310 incr
= guard
!= 0 && isNegative
;
314 case RoundCompatible
:
315 incr
= guard
>= oneHalf
;
319 if (fraction
== mask
) {
320 // rounding causes a carry
327 if (expo
== 1 && fraction
< topBit
) {
328 expo
= 0; // subnormal
330 } else if (expo
== 0) {
332 } else if (expo
>= Binary::maxExponent
) {
333 if (RoundOverflowToHuge(rounding
, isNegative
)) {
334 expo
= Binary::maxExponent
- 1;
337 expo
= Binary::maxExponent
;
339 if constexpr (Binary::bits
== 80) { // x87
340 fraction
= IntType
{1} << 63;
346 using Raw
= typename
Binary::RawType
;
347 Raw raw
= static_cast<Raw
>(isNegative
) << (Binary::bits
- 1);
348 raw
|= static_cast<Raw
>(expo
) << Binary::significandBits
;
349 if constexpr (Binary::isImplicitMSB
) {
353 return {Binary(raw
), static_cast<enum ConversionResultFlags
>(flags
)};
356 template <int PREC
, int LOG10RADIX
>
357 ConversionToBinaryResult
<PREC
>
358 BigRadixFloatingPointNumber
<PREC
, LOG10RADIX
>::ConvertToBinary() {
359 // On entry, *this holds a multi-precision integer value in a radix of a
360 // large power of ten. Its radix point is defined to be to the right of its
361 // digits, and "exponent_" is the power of ten by which it is to be scaled.
363 if (digits_
== 0) { // zero value
364 return {Real
{SignBit()}};
366 // The value is not zero: x = D. * 10.**E
367 // Shift our perspective on the radix (& decimal) point so that
368 // it sits to the *left* of the digits: i.e., x = .D * 10.**E
369 exponent_
+= digits_
* log10Radix
;
370 // Sanity checks for ridiculous exponents
371 static constexpr int crazy
{2 * Real::decimalRange
+ log10Radix
};
372 if (exponent_
< -crazy
) {
373 enum ConversionResultFlags flags
{
374 static_cast<enum ConversionResultFlags
>(Inexact
| Underflow
)
376 if ((!isNegative_
&& rounding_
== RoundUp
) ||
377 (isNegative_
&& rounding_
== RoundDown
)) {
378 // return least nonzero value
379 return {Real
{Raw
{1} | SignBit()}, flags
};
380 } else { // underflow to +/-0.
381 return {Real
{SignBit()}, flags
};
383 } else if (exponent_
> crazy
) { // overflow to +/-HUGE() or +/-Inf
384 if (RoundOverflowToHuge(rounding_
, isNegative_
)) {
385 return {Real
{HUGE()}};
387 return {Real
{Infinity()}, Overflow
};
390 // Apply any negative decimal exponent by multiplication
391 // by a power of two, adjusting the binary exponent to compensate.
392 IntermediateFloat
<PREC
> f
;
393 while (exponent_
< log10Radix
) {
394 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
395 f
.AdjustExponent(-9);
396 digitLimit_
= digits_
;
397 if (int carry
{MultiplyWithoutNormalization
<512>()}) {
398 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
400 exponent_
+= log10Radix
;
403 // Apply any positive decimal exponent greater than
404 // is needed to treat the topmost digit as an integer
405 // part by multiplying by 10 or 10000 repeatedly.
406 while (exponent_
> log10Radix
) {
407 digitLimit_
= digits_
;
409 if (exponent_
>= log10Radix
+ 4) {
410 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
412 carry
= MultiplyWithoutNormalization
<(5 * 5 * 5 * 5)>();
415 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
417 carry
= MultiplyWithoutNormalization
<5>();
421 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
423 exponent_
+= log10Radix
;
426 // So exponent_ is now log10Radix, meaning that the
427 // MSD can be taken as an integer part and transferred
428 // to the binary result.
429 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
430 int guardShift
{f
.SetTo(digit_
[--digits_
])};
431 // Transfer additional bits until the result is normal.
432 digitLimit_
= digits_
;
433 while (!f
.IsFull()) {
434 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
435 f
.AdjustExponent(-1);
436 std::uint32_t carry
= MultiplyWithoutNormalization
<2>();
439 // Get the next few bits for rounding. Allow for some guard bits
440 // that may have already been set in f.SetTo() above.
442 if (guardShift
== 0) {
443 guard
= MultiplyWithoutNormalization
<4>();
444 } else if (guardShift
== 1) {
445 guard
= MultiplyWithoutNormalization
<2>();
447 guard
= guard
+ guard
+ !IsZero();
449 return f
.ToBinary(isNegative_
, rounding_
);
452 template <int PREC
, int LOG10RADIX
>
453 ConversionToBinaryResult
<PREC
>
454 BigRadixFloatingPointNumber
<PREC
, LOG10RADIX
>::ConvertToBinary(
455 const char *&p
, const char *limit
) {
457 if (ParseNumber(p
, inexact
, limit
)) {
458 auto result
{ConvertToBinary()};
461 static_cast<enum ConversionResultFlags
>(result
.flags
| Inexact
);
465 // Could not parse a decimal floating-point number. p has been
466 // advanced over any leading spaces. Most Fortran compilers set
467 // the sign bit for -NaN.
469 if (!limit
|| q
< limit
) {
470 isNegative_
= *q
== '-';
471 if (isNegative_
|| *q
== '+') {
475 if ((!limit
|| limit
>= q
+ 3) && runtime::toupper(q
[0]) == 'N' &&
476 runtime::toupper(q
[1]) == 'A' && runtime::toupper(q
[2]) == 'N') {
480 if ((!limit
|| p
< limit
) && *p
== '(') {
484 if (limit
&& p
>= limit
) {
486 return {Real
{NaN(false)}, Invalid
};
487 } else if (*p
== '(') {
489 } else if (*p
== ')') {
491 } else if (*p
!= ' ') {
492 // Implementation dependent, but other compilers
493 // all return quiet NaNs.
498 return {Real
{NaN(isQuiet
)}};
500 if ((!limit
|| limit
>= q
+ 3) && runtime::toupper(q
[0]) == 'I' &&
501 runtime::toupper(q
[1]) == 'N' && runtime::toupper(q
[2]) == 'F') {
502 if ((!limit
|| limit
>= q
+ 8) && runtime::toupper(q
[3]) == 'I' &&
503 runtime::toupper(q
[4]) == 'N' && runtime::toupper(q
[5]) == 'I' &&
504 runtime::toupper(q
[6]) == 'T' && runtime::toupper(q
[7]) == 'Y') {
509 return {Real
{Infinity()}};
512 return {Real
{NaN()}, Invalid
};
519 ConversionToBinaryResult
<PREC
> ConvertToBinary(
520 const char *&p
, enum FortranRounding rounding
, const char *end
) {
521 return BigRadixFloatingPointNumber
<PREC
>{rounding
}.ConvertToBinary(p
, end
);
524 template ConversionToBinaryResult
<8> ConvertToBinary
<8>(
525 const char *&, enum FortranRounding
, const char *end
);
526 template ConversionToBinaryResult
<11> ConvertToBinary
<11>(
527 const char *&, enum FortranRounding
, const char *end
);
528 template ConversionToBinaryResult
<24> ConvertToBinary
<24>(
529 const char *&, enum FortranRounding
, const char *end
);
530 template ConversionToBinaryResult
<53> ConvertToBinary
<53>(
531 const char *&, enum FortranRounding
, const char *end
);
532 template ConversionToBinaryResult
<64> ConvertToBinary
<64>(
533 const char *&, enum FortranRounding
, const char *end
);
534 template ConversionToBinaryResult
<113> ConvertToBinary
<113>(
535 const char *&, enum FortranRounding
, const char *end
);
538 RT_EXT_API_GROUP_BEGIN
540 enum ConversionResultFlags
ConvertDecimalToFloat(
541 const char **p
, float *f
, enum FortranRounding rounding
) {
542 auto result
{Fortran::decimal::ConvertToBinary
<24>(*p
, rounding
)};
543 std::memcpy(reinterpret_cast<void *>(f
),
544 reinterpret_cast<const void *>(&result
.binary
), sizeof *f
);
547 enum ConversionResultFlags
ConvertDecimalToDouble(
548 const char **p
, double *d
, enum FortranRounding rounding
) {
549 auto result
{Fortran::decimal::ConvertToBinary
<53>(*p
, rounding
)};
550 std::memcpy(reinterpret_cast<void *>(d
),
551 reinterpret_cast<const void *>(&result
.binary
), sizeof *d
);
554 enum ConversionResultFlags
ConvertDecimalToLongDouble(
555 const char **p
, long double *ld
, enum FortranRounding rounding
) {
556 auto result
{Fortran::decimal::ConvertToBinary
<64>(*p
, rounding
)};
557 std::memcpy(reinterpret_cast<void *>(ld
),
558 reinterpret_cast<const void *>(&result
.binary
), sizeof *ld
);
564 } // namespace Fortran::decimal