flang/lib/Decimal/big-radix-floating-point.h

   1 //===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
  10 #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
  11
  12 // This is a helper class for use in floating-point conversions between
  13 // binary and decimal representations.  It holds a multiple-precision
  14 // integer value using digits of a radix that is a large even power of ten
  15 // (10,000,000,000,000,000 by default, 10**16).  These digits are accompanied
  16 // by a signed exponent that denotes multiplication by a power of ten.
  17 // The effective radix point is to the right of the digits (i.e., they do
  18 // not represent a fraction).
  19 //
  20 // The operations supported by this class are limited to those required
  21 // for conversions between binary and decimal representations; it is not
  22 // a general-purpose facility.
  23
  24 #include "flang/Common/bit-population-count.h"
  25 #include "flang/Common/leading-zero-bit-count.h"
  26 #include "flang/Common/uint128.h"
  27 #include "flang/Decimal/binary-floating-point.h"
  28 #include "flang/Decimal/decimal.h"
  29 #include <cinttypes>
  30 #include <limits>
  31 #include <type_traits>
  32
  33 namespace Fortran::decimal {
  34
  35 static constexpr std::uint64_t TenToThe(int power) {
  36   return power <= 0 ? 1 : 10 * TenToThe(power - 1);
  37 }
  38
  39 // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be
  40 // even, so that pairs of decimal digits do not straddle Digits.
  41 // So LOG10RADIX must be 16 or 6.
  42 template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
  43 public:
  44   using Real = BinaryFloatingPointNumber<PREC>;
  45   static constexpr int log10Radix{LOG10RADIX};
  46
  47 private:
  48   static constexpr std::uint64_t uint64Radix{TenToThe(log10Radix)};
  49   static constexpr int minDigitBits{
  50       64 - common::LeadingZeroBitCount(uint64Radix)};
  51   using Digit = common::HostUnsignedIntType<minDigitBits>;
  52   static constexpr Digit radix{uint64Radix};
  53   static_assert(radix < std::numeric_limits<Digit>::max() / 1000,
  54       "radix is somehow too big");
  55   static_assert(radix > std::numeric_limits<Digit>::max() / 10000,
  56       "radix is somehow too small");
  57
  58   // The base-2 logarithm of the least significant bit that can arise
  59   // in a subnormal IEEE floating-point number.
  60   static constexpr int minLog2AnyBit{
  61       -Real::exponentBias - Real::binaryPrecision};
  62
  63   // The number of Digits needed to represent the smallest subnormal.
  64   static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
  65
  66 public:
  67   explicit BigRadixFloatingPointNumber(
  68       enum FortranRounding rounding = RoundNearest)
  69       : rounding_{rounding} {}
  70
  71   // Converts a binary floating point value.
  72   explicit BigRadixFloatingPointNumber(
  73       Real, enum FortranRounding = RoundNearest);
  74
  75   BigRadixFloatingPointNumber &SetToZero() {
  76     isNegative_ = false;
  77     digits_ = 0;
  78     exponent_ = 0;
  79     return *this;
  80   }
  81
  82   // Converts decimal floating-point to binary.
  83   ConversionToBinaryResult<PREC> ConvertToBinary();
  84
  85   // Parses and converts to binary.  Handles leading spaces,
  86   // "NaN", & optionally-signed "Inf".  Does not skip internal
  87   // spaces.
  88   // The argument is a reference to a pointer that is left
  89   // pointing to the first character that wasn't parsed.
  90   ConversionToBinaryResult<PREC> ConvertToBinary(
  91       const char *&, const char *end = nullptr);
  92
  93   // Formats a decimal floating-point number to a user buffer.
  94   // May emit "NaN" or "Inf", or an possibly-signed integer.
  95   // No decimal point is written, but if it were, it would be
  96   // after the last digit; the effective decimal exponent is
  97   // returned as part of the result structure so that it can be
  98   // formatted by the client.
  99   ConversionToDecimalResult ConvertToDecimal(
 100       char *, std::size_t, enum DecimalConversionFlags, int digits) const;
 101
 102   // Discard decimal digits not needed to distinguish this value
 103   // from the decimal encodings of two others (viz., the nearest binary
 104   // floating-point numbers immediately below and above this one).
 105   // The last decimal digit may not be uniquely determined in all
 106   // cases, and will be the mean value when that is so (e.g., if
 107   // last decimal digit values 6-8 would all work, it'll be a 7).
 108   // This minimization necessarily assumes that the value will be
 109   // emitted and read back into the same (or less precise) format
 110   // with default rounding to the nearest value.
 111   void Minimize(
 112       BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
 113
 114   template <typename STREAM> STREAM &Dump(STREAM &) const;
 115
 116 private:
 117   BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that)
 118       : digits_{that.digits_}, exponent_{that.exponent_},
 119         isNegative_{that.isNegative_}, rounding_{that.rounding_} {
 120     for (int j{0}; j < digits_; ++j) {
 121       digit_[j] = that.digit_[j];
 122     }
 123   }
 124
 125   bool IsZero() const {
 126     // Don't assume normalization.
 127     for (int j{0}; j < digits_; ++j) {
 128       if (digit_[j] != 0) {
 129         return false;
 130       }
 131     }
 132     return true;
 133   }
 134
 135   // Predicate: true when 10*value would cause a carry.
 136   // (When this happens during decimal-to-binary conversion,
 137   // there are more digits in the input string than can be
 138   // represented precisely.)
 139   bool IsFull() const {
 140     return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10;
 141   }
 142
 143   // Sets *this to an unsigned integer value.
 144   // Returns any remainder.
 145   template <typename UINT> UINT SetTo(UINT n) {
 146     static_assert(
 147         std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>);
 148     SetToZero();
 149     while (n != 0) {
 150       auto q{n / 10u};
 151       if (n != q * 10) {
 152         break;
 153       }
 154       ++exponent_;
 155       n = q;
 156     }
 157     if constexpr (sizeof n < sizeof(Digit)) {
 158       if (n != 0) {
 159         digit_[digits_++] = n;
 160       }
 161       return 0;
 162     } else {
 163       while (n != 0 && digits_ < digitLimit_) {
 164         auto q{n / radix};
 165         digit_[digits_++] = static_cast<Digit>(n - q * radix);
 166         n = q;
 167       }
 168       return n;
 169     }
 170   }
 171
 172   int RemoveLeastOrderZeroDigits() {
 173     int remove{0};
 174     if (digits_ > 0 && digit_[0] == 0) {
 175       while (remove < digits_ && digit_[remove] == 0) {
 176         ++remove;
 177       }
 178       if (remove >= digits_) {
 179         digits_ = 0;
 180       } else if (remove > 0) {
 181 #if defined __GNUC__ && __GNUC__ < 8
 182         // (&& j + remove < maxDigits) was added to avoid GCC < 8 build failure
 183         // on -Werror=array-bounds. This can be removed if -Werror is disable.
 184         for (int j{0}; j + remove < digits_ && (j + remove < maxDigits); ++j) {
 185 #else
 186         for (int j{0}; j + remove < digits_; ++j) {
 187 #endif
 188           digit_[j] = digit_[j + remove];
 189         }
 190         digits_ -= remove;
 191       }
 192     }
 193     return remove;
 194   }
 195
 196   void RemoveLeadingZeroDigits() {
 197     while (digits_ > 0 && digit_[digits_ - 1] == 0) {
 198       --digits_;
 199     }
 200   }
 201
 202   void Normalize() {
 203     RemoveLeadingZeroDigits();
 204     exponent_ += RemoveLeastOrderZeroDigits() * log10Radix;
 205   }
 206
 207   // This limited divisibility test only works for even divisors of the radix,
 208   // which is fine since it's only ever used with 2 and 5.
 209   template <int N> bool IsDivisibleBy() const {
 210     static_assert(N > 1 && radix % N == 0, "bad modulus");
 211     return digits_ == 0 || (digit_[0] % N) == 0;
 212   }
 213
 214   template <unsigned DIVISOR> int DivideBy() {
 215     Digit remainder{0};
 216     for (int j{digits_ - 1}; j >= 0; --j) {
 217       Digit q{digit_[j] / DIVISOR};
 218       Digit nrem{digit_[j] - DIVISOR * q};
 219       digit_[j] = q + (radix / DIVISOR) * remainder;
 220       remainder = nrem;
 221     }
 222     return remainder;
 223   }
 224
 225   void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
 226     Digit remainder{0};
 227     auto mask{(Digit{1} << twoPow) - 1};
 228     auto coeff{radix >> twoPow};
 229     for (int j{digits_ - 1}; j >= 0; --j) {
 230       auto nrem{digit_[j] & mask};
 231       digit_[j] = (digit_[j] >> twoPow) + coeff * remainder;
 232       remainder = nrem;
 233     }
 234   }
 235
 236   // Returns true on overflow
 237   bool DivideByPowerOfTwoInPlace(int twoPow) {
 238     if (digits_ > 0) {
 239       while (twoPow > 0) {
 240         int chunk{twoPow > log10Radix ? log10Radix : twoPow};
 241         if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) {
 242           DivideByPowerOfTwo(chunk);
 243           twoPow -= chunk;
 244           continue;
 245         }
 246         twoPow -= chunk;
 247         if (digit_[digits_ - 1] >> chunk != 0) {
 248           if (digits_ == digitLimit_) {
 249             return true; // overflow
 250           }
 251           digit_[digits_++] = 0;
 252         }
 253         auto remainder{digit_[digits_ - 1]};
 254         exponent_ -= log10Radix;
 255         auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix
 256         auto mask{(Digit{1} << chunk) - 1};
 257         for (int j{digits_ - 1}; j >= 1; --j) {
 258           digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder;
 259           remainder = digit_[j - 1] & mask;
 260         }
 261         digit_[0] = coeff * remainder;
 262       }
 263     }
 264     return false; // no overflow
 265   }
 266
 267   int AddCarry(int position = 0, int carry = 1) {
 268     for (; position < digits_; ++position) {
 269       Digit v{digit_[position] + carry};
 270       if (v < radix) {
 271         digit_[position] = v;
 272         return 0;
 273       }
 274       digit_[position] = v - radix;
 275       carry = 1;
 276     }
 277     if (digits_ < digitLimit_) {
 278       digit_[digits_++] = carry;
 279       return 0;
 280     }
 281     Normalize();
 282     if (digits_ < digitLimit_) {
 283       digit_[digits_++] = carry;
 284       return 0;
 285     }
 286     return carry;
 287   }
 288
 289   void Decrement() {
 290     for (int j{0}; digit_[j]-- == 0; ++j) {
 291       digit_[j] = radix - 1;
 292     }
 293   }
 294
 295   template <int N> int MultiplyByHelper(int carry = 0) {
 296     for (int j{0}; j < digits_; ++j) {
 297       auto v{N * digit_[j] + carry};
 298       carry = v / radix;
 299       digit_[j] = v - carry * radix; // i.e., v % radix
 300     }
 301     return carry;
 302   }
 303
 304   template <int N> int MultiplyBy(int carry = 0) {
 305     if (int newCarry{MultiplyByHelper<N>(carry)}) {
 306       return AddCarry(digits_, newCarry);
 307     } else {
 308       return 0;
 309     }
 310   }
 311
 312   template <int N> int MultiplyWithoutNormalization() {
 313     if (int carry{MultiplyByHelper<N>(0)}) {
 314       if (digits_ < digitLimit_) {
 315         digit_[digits_++] = carry;
 316         return 0;
 317       } else {
 318         return carry;
 319       }
 320     } else {
 321       return 0;
 322     }
 323   }
 324
 325   void LoseLeastSignificantDigit(); // with rounding
 326
 327   void PushCarry(int carry) {
 328     if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) {
 329       LoseLeastSignificantDigit();
 330       digit_[digits_ - 1] += carry;
 331     } else {
 332       digit_[digits_++] = carry;
 333     }
 334   }
 335
 336   // Adds another number and then divides by two.
 337   // Assumes same exponent and sign.
 338   // Returns true when the the result has effectively been rounded down.
 339   bool Mean(const BigRadixFloatingPointNumber &);
 340
 341   // Parses a floating-point number; leaves the pointer reference
 342   // argument pointing at the next character after what was recognized.
 343   // The "end" argument can be left null if the caller is sure that the
 344   // string is properly terminated with an addressable character that
 345   // can't be in a valid floating-point character.
 346   bool ParseNumber(const char *&, bool &inexact, const char *end);
 347
 348   using Raw = typename Real::RawType;
 349   constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); }
 350   constexpr Raw Infinity() const {
 351     return (Raw{Real::maxExponent} << Real::significandBits) | SignBit();
 352   }
 353   static constexpr Raw NaN() {
 354     return (Raw{Real::maxExponent} << Real::significandBits) |
 355         (Raw{1} << (Real::significandBits - 2));
 356   }
 357
 358   Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD
 359   int digits_{0}; // # of elements in digit_[] array; zero when zero
 360   int digitLimit_{maxDigits}; // precision clamp
 361   int exponent_{0}; // signed power of ten
 362   bool isNegative_{false};
 363   enum FortranRounding rounding_ { RoundNearest };
 364 };
 365 } // namespace Fortran::decimal
 366 #endif