flang/lib/Decimal/decimal-to-binary.cpp

   1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "big-radix-floating-point.h"
  10 #include "flang/Common/bit-population-count.h"
  11 #include "flang/Common/leading-zero-bit-count.h"
  12 #include "flang/Decimal/binary-floating-point.h"
  13 #include "flang/Decimal/decimal.h"
  14 #include <cinttypes>
  15 #include <cstring>
  16 #include <ctype.h>
  17
  18 namespace Fortran::decimal {
  19
  20 template <int PREC, int LOG10RADIX>
  21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
  22     const char *&p, bool &inexact, const char *end) {
  23   SetToZero();
  24   if (end && p >= end) {
  25     return false;
  26   }
  27   // Skip leading spaces
  28   for (; p != end && *p == ' '; ++p) {
  29   }
  30   if (p == end) {
  31     return false;
  32   }
  33   const char *q{p};
  34   isNegative_ = *q == '-';
  35   if (*q == '-' || *q == '+') {
  36     ++q;
  37   }
  38   const char *start{q};
  39   for (; q != end && *q == '0'; ++q) {
  40   }
  41   const char *firstDigit{q};
  42   for (; q != end && *q >= '0' && *q <= '9'; ++q) {
  43   }
  44   const char *point{nullptr};
  45   if (q != end && *q == '.') {
  46     point = q;
  47     for (++q; q != end && *q >= '0' && *q <= '9'; ++q) {
  48     }
  49   }
  50   if (q == start || (q == start + 1 && start == point)) {
  51     return false; // require at least one digit
  52   }
  53   // There's a valid number here; set the reference argument to point to
  54   // the first character afterward, which might be an exponent part.
  55   p = q;
  56   // Strip off trailing zeroes
  57   if (point) {
  58     while (q[-1] == '0') {
  59       --q;
  60     }
  61     if (q[-1] == '.') {
  62       point = nullptr;
  63       --q;
  64     }
  65   }
  66   if (!point) {
  67     while (q > firstDigit && q[-1] == '0') {
  68       --q;
  69       ++exponent_;
  70     }
  71   }
  72   // Trim any excess digits
  73   const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
  74   if (q > limit) {
  75     inexact = true;
  76     if (point >= limit) {
  77       q = point;
  78       point = nullptr;
  79     }
  80     if (!point) {
  81       exponent_ += q - limit;
  82     }
  83     q = limit;
  84   }
  85   if (point) {
  86     exponent_ -= static_cast<int>(q - point - 1);
  87   }
  88   if (q == firstDigit) {
  89     exponent_ = 0; // all zeros
  90   }
  91   // Rack the decimal digits up into big Digits.
  92   for (auto times{radix}; q-- > firstDigit;) {
  93     if (*q != '.') {
  94       if (times == radix) {
  95         digit_[digits_++] = *q - '0';
  96         times = 10;
  97       } else {
  98         digit_[digits_ - 1] += times * (*q - '0');
  99         times *= 10;
 100       }
 101     }
 102   }
 103   // Look for an optional exponent field.
 104   if (p == end) {
 105     return true;
 106   }
 107   q = p;
 108   switch (*q) {
 109   case 'e':
 110   case 'E':
 111   case 'd':
 112   case 'D':
 113   case 'q':
 114   case 'Q': {
 115     if (++q == end) {
 116       break;
 117     }
 118     bool negExpo{*q == '-'};
 119     if (*q == '-' || *q == '+') {
 120       ++q;
 121     }
 122     if (q != end && *q >= '0' && *q <= '9') {
 123       int expo{0};
 124       for (; q != end && *q == '0'; ++q) {
 125       }
 126       const char *expDig{q};
 127       for (; q != end && *q >= '0' && *q <= '9'; ++q) {
 128         expo = 10 * expo + *q - '0';
 129       }
 130       if (q >= expDig + 8) {
 131         // There's a ridiculous number of nonzero exponent digits.
 132         // The decimal->binary conversion routine will cope with
 133         // returning 0 or Inf, but we must ensure that "expo" didn't
 134         // overflow back around to something legal.
 135         expo = 10 * Real::decimalRange;
 136         exponent_ = 0;
 137       }
 138       p = q; // exponent is valid; advance the termination pointer
 139       if (negExpo) {
 140         exponent_ -= expo;
 141       } else {
 142         exponent_ += expo;
 143       }
 144     }
 145   } break;
 146   default:
 147     break;
 148   }
 149   return true;
 150 }
 151
 152 template <int PREC, int LOG10RADIX>
 153 void BigRadixFloatingPointNumber<PREC,
 154     LOG10RADIX>::LoseLeastSignificantDigit() {
 155   Digit LSD{digit_[0]};
 156   for (int j{0}; j < digits_ - 1; ++j) {
 157     digit_[j] = digit_[j + 1];
 158   }
 159   digit_[digits_ - 1] = 0;
 160   bool incr{false};
 161   switch (rounding_) {
 162   case RoundNearest:
 163     incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
 164     break;
 165   case RoundUp:
 166     incr = LSD > 0 && !isNegative_;
 167     break;
 168   case RoundDown:
 169     incr = LSD > 0 && isNegative_;
 170     break;
 171   case RoundToZero:
 172     break;
 173   case RoundCompatible:
 174     incr = LSD >= radix / 2;
 175     break;
 176   }
 177   for (int j{0}; (digit_[j] += incr) == radix; ++j) {
 178     digit_[j] = 0;
 179   }
 180 }
 181
 182 // This local utility class represents an unrounded nonnegative
 183 // binary floating-point value with an unbiased (i.e., signed)
 184 // binary exponent, an integer value (not a fraction) with an implied
 185 // binary point to its *right*, and some guard bits for rounding.
 186 template <int PREC> class IntermediateFloat {
 187 public:
 188   static constexpr int precision{PREC};
 189   using IntType = common::HostUnsignedIntType<precision>;
 190   static constexpr IntType topBit{IntType{1} << (precision - 1)};
 191   static constexpr IntType mask{topBit + (topBit - 1)};
 192
 193   IntermediateFloat() {}
 194   IntermediateFloat(const IntermediateFloat &) = default;
 195
 196   // Assumes that exponent_ is valid on entry, and may increment it.
 197   // Returns the number of guard_ bits that have been determined.
 198   template <typename UINT> bool SetTo(UINT n) {
 199     static constexpr int nBits{CHAR_BIT * sizeof n};
 200     if constexpr (precision >= nBits) {
 201       value_ = n;
 202       guard_ = 0;
 203       return 0;
 204     } else {
 205       int shift{common::BitsNeededFor(n) - precision};
 206       if (shift <= 0) {
 207         value_ = n;
 208         guard_ = 0;
 209         return 0;
 210       } else {
 211         value_ = n >> shift;
 212         exponent_ += shift;
 213         n <<= nBits - shift;
 214         guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0);
 215         return shift;
 216       }
 217     }
 218   }
 219
 220   void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
 221   bool IsFull() const { return value_ >= topBit; }
 222   void AdjustExponent(int by) { exponent_ += by; }
 223   void SetGuard(int g) {
 224     guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
 225   }
 226
 227   ConversionToBinaryResult<PREC> ToBinary(
 228       bool isNegative, FortranRounding) const;
 229
 230 private:
 231   static constexpr int guardBits{3}; // guard, round, sticky
 232   using GuardType = int;
 233   static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)};
 234
 235   IntType value_{0};
 236   GuardType guard_{0};
 237   int exponent_{0};
 238 };
 239
 240 template <int PREC>
 241 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
 242     bool isNegative, FortranRounding rounding) const {
 243   using Binary = BinaryFloatingPointNumber<PREC>;
 244   // Create a fraction with a binary point to the left of the integer
 245   // value_, and bias the exponent.
 246   IntType fraction{value_};
 247   GuardType guard{guard_};
 248   int expo{exponent_ + Binary::exponentBias + (precision - 1)};
 249   while (expo < 1 && (fraction > 0 || guard > oneHalf)) {
 250     guard = (guard & 1) | (guard >> 1) |
 251         ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1));
 252     fraction >>= 1;
 253     ++expo;
 254   }
 255   int flags{Exact};
 256   if (guard != 0) {
 257     flags |= Inexact;
 258   }
 259   if (fraction == 0 && guard <= oneHalf) {
 260     return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
 261   }
 262   // The value is nonzero; normalize it.
 263   while (fraction < topBit && expo > 1) {
 264     --expo;
 265     fraction = fraction * 2 + (guard >> (guardBits - 2));
 266     guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
 267   }
 268   // Apply rounding
 269   bool incr{false};
 270   switch (rounding) {
 271   case RoundNearest:
 272     incr = guard > oneHalf || (guard == oneHalf && (fraction & 1));
 273     break;
 274   case RoundUp:
 275     incr = guard != 0 && !isNegative;
 276     break;
 277   case RoundDown:
 278     incr = guard != 0 && isNegative;
 279     break;
 280   case RoundToZero:
 281     break;
 282   case RoundCompatible:
 283     incr = guard >= oneHalf;
 284     break;
 285   }
 286   if (incr) {
 287     if (fraction == mask) {
 288       // rounding causes a carry
 289       ++expo;
 290       fraction = topBit;
 291     } else {
 292       ++fraction;
 293     }
 294   }
 295   if (expo == 1 && fraction < topBit) {
 296     expo = 0; // subnormal
 297   }
 298   if (expo >= Binary::maxExponent) {
 299     expo = Binary::maxExponent; // Inf
 300     flags |= Overflow;
 301     fraction = 0;
 302   }
 303   using Raw = typename Binary::RawType;
 304   Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1);
 305   raw |= static_cast<Raw>(expo) << Binary::significandBits;
 306   if constexpr (Binary::isImplicitMSB) {
 307     fraction &= ~topBit;
 308   }
 309   raw |= fraction;
 310   return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)};
 311 }
 312
 313 template <int PREC, int LOG10RADIX>
 314 ConversionToBinaryResult<PREC>
 315 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
 316   // On entry, *this holds a multi-precision integer value in a radix of a
 317   // large power of ten.  Its radix point is defined to be to the right of its
 318   // digits, and "exponent_" is the power of ten by which it is to be scaled.
 319   Normalize();
 320   if (digits_ == 0) { // zero value
 321     return {Real{SignBit()}};
 322   }
 323   // The value is not zero:  x = D. * 10.**E
 324   // Shift our perspective on the radix (& decimal) point so that
 325   // it sits to the *left* of the digits: i.e., x = .D * 10.**E
 326   exponent_ += digits_ * log10Radix;
 327   // Sanity checks for ridiculous exponents
 328   static constexpr int crazy{2 * Real::decimalRange + log10Radix};
 329   if (exponent_ < -crazy) { // underflow to +/-0.
 330     return {Real{SignBit()}, Inexact};
 331   } else if (exponent_ > crazy) { // overflow to +/-Inf.
 332     return {Real{Infinity()}, Overflow};
 333   }
 334   // Apply any negative decimal exponent by multiplication
 335   // by a power of two, adjusting the binary exponent to compensate.
 336   IntermediateFloat<PREC> f;
 337   while (exponent_ < log10Radix) {
 338     // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
 339     f.AdjustExponent(-9);
 340     digitLimit_ = digits_;
 341     if (int carry{MultiplyWithoutNormalization<512>()}) {
 342       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
 343       PushCarry(carry);
 344       exponent_ += log10Radix;
 345     }
 346   }
 347   // Apply any positive decimal exponent greater than
 348   // is needed to treat the topmost digit as an integer
 349   // part by multiplying by 10 or 10000 repeatedly.
 350   while (exponent_ > log10Radix) {
 351     digitLimit_ = digits_;
 352     int carry;
 353     if (exponent_ >= log10Radix + 4) {
 354       // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
 355       exponent_ -= 4;
 356       carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
 357       f.AdjustExponent(4);
 358     } else {
 359       // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
 360       --exponent_;
 361       carry = MultiplyWithoutNormalization<5>();
 362       f.AdjustExponent(1);
 363     }
 364     if (carry != 0) {
 365       // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
 366       PushCarry(carry);
 367       exponent_ += log10Radix;
 368     }
 369   }
 370   // So exponent_ is now log10Radix, meaning that the
 371   // MSD can be taken as an integer part and transferred
 372   // to the binary result.
 373   // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
 374   int guardShift{f.SetTo(digit_[--digits_])};
 375   // Transfer additional bits until the result is normal.
 376   digitLimit_ = digits_;
 377   while (!f.IsFull()) {
 378     // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
 379     f.AdjustExponent(-1);
 380     std::uint32_t carry = MultiplyWithoutNormalization<2>();
 381     f.ShiftIn(carry);
 382   }
 383   // Get the next few bits for rounding.  Allow for some guard bits
 384   // that may have already been set in f.SetTo() above.
 385   int guard{0};
 386   if (guardShift == 0) {
 387     guard = MultiplyWithoutNormalization<4>();
 388   } else if (guardShift == 1) {
 389     guard = MultiplyWithoutNormalization<2>();
 390   }
 391   guard = guard + guard + !IsZero();
 392   f.SetGuard(guard);
 393   return f.ToBinary(isNegative_, rounding_);
 394 }
 395
 396 template <int PREC, int LOG10RADIX>
 397 ConversionToBinaryResult<PREC>
 398 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(
 399     const char *&p, const char *limit) {
 400   bool inexact{false};
 401   if (ParseNumber(p, inexact, limit)) {
 402     auto result{ConvertToBinary()};
 403     if (inexact) {
 404       result.flags =
 405           static_cast<enum ConversionResultFlags>(result.flags | Inexact);
 406     }
 407     return result;
 408   } else {
 409     // Could not parse a decimal floating-point number.  p has been
 410     // advanced over any leading spaces.
 411     if ((!limit || limit >= p + 3) && toupper(p[0]) == 'N' &&
 412         toupper(p[1]) == 'A' && toupper(p[2]) == 'N') {
 413       // NaN
 414       p += 3;
 415       if ((!limit || p < limit) && *p == '(') {
 416         int depth{1};
 417         do {
 418           ++p;
 419           if (limit && p >= limit) {
 420             // Invalid input
 421             return {Real{NaN()}, Invalid};
 422           } else if (*p == '(') {
 423             ++depth;
 424           } else if (*p == ')') {
 425             --depth;
 426           }
 427         } while (depth > 0);
 428         ++p;
 429       }
 430       return {Real{NaN()}};
 431     } else {
 432       // Try to parse Inf, maybe with a sign
 433       const char *q{p};
 434       if (!limit || q < limit) {
 435         isNegative_ = *q == '-';
 436         if (isNegative_ || *q == '+') {
 437           ++q;
 438         }
 439       }
 440       if ((!limit || limit >= q + 3) && toupper(q[0]) == 'I' &&
 441           toupper(q[1]) == 'N' && toupper(q[2]) == 'F') {
 442         if ((!limit || limit >= q + 8) && toupper(q[3]) == 'I' &&
 443             toupper(q[4]) == 'N' && toupper(q[5]) == 'I' &&
 444             toupper(q[6]) == 'T' && toupper(q[7]) == 'Y') {
 445           p = q + 8;
 446         } else {
 447           p = q + 3;
 448         }
 449         return {Real{Infinity()}};
 450       } else {
 451         // Invalid input
 452         return {Real{NaN()}, Invalid};
 453       }
 454     }
 455   }
 456 }
 457
 458 template <int PREC>
 459 ConversionToBinaryResult<PREC> ConvertToBinary(
 460     const char *&p, enum FortranRounding rounding, const char *end) {
 461   return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end);
 462 }
 463
 464 template ConversionToBinaryResult<8> ConvertToBinary<8>(
 465     const char *&, enum FortranRounding, const char *end);
 466 template ConversionToBinaryResult<11> ConvertToBinary<11>(
 467     const char *&, enum FortranRounding, const char *end);
 468 template ConversionToBinaryResult<24> ConvertToBinary<24>(
 469     const char *&, enum FortranRounding, const char *end);
 470 template ConversionToBinaryResult<53> ConvertToBinary<53>(
 471     const char *&, enum FortranRounding, const char *end);
 472 template ConversionToBinaryResult<64> ConvertToBinary<64>(
 473     const char *&, enum FortranRounding, const char *end);
 474 template ConversionToBinaryResult<113> ConvertToBinary<113>(
 475     const char *&, enum FortranRounding, const char *end);
 476
 477 extern "C" {
 478 enum ConversionResultFlags ConvertDecimalToFloat(
 479     const char **p, float *f, enum FortranRounding rounding) {
 480   auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
 481   std::memcpy(reinterpret_cast<void *>(f),
 482       reinterpret_cast<const void *>(&result.binary), sizeof *f);
 483   return result.flags;
 484 }
 485 enum ConversionResultFlags ConvertDecimalToDouble(
 486     const char **p, double *d, enum FortranRounding rounding) {
 487   auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)};
 488   std::memcpy(reinterpret_cast<void *>(d),
 489       reinterpret_cast<const void *>(&result.binary), sizeof *d);
 490   return result.flags;
 491 }
 492 enum ConversionResultFlags ConvertDecimalToLongDouble(
 493     const char **p, long double *ld, enum FortranRounding rounding) {
 494   auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
 495   std::memcpy(reinterpret_cast<void *>(ld),
 496       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
 497   return result.flags;
 498 }
 499 }
 500 } // namespace Fortran::decimal