flang/runtime/edit-input.cpp

   1 //===-- runtime/edit-input.cpp --------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "edit-input.h"
  10 #include "namelist.h"
  11 #include "utf.h"
  12 #include "flang/Common/real.h"
  13 #include "flang/Common/uint128.h"
  14 #include <algorithm>
  15 #include <cfenv>
  16
  17 namespace Fortran::runtime::io {
  18
  19 // Checks that a list-directed input value has been entirely consumed and
  20 // doesn't contain unparsed characters before the next value separator.
  21 static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) {
  22   char32_t comma{
  23       edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
  24   return ch == ' ' || ch == '\t' || ch == '/' || ch == comma;
  25 }
  26
  27 static bool CheckCompleteListDirectedField(
  28     IoStatementState &io, const DataEdit &edit) {
  29   if (edit.IsListDirected()) {
  30     std::size_t byteCount;
  31     if (auto ch{io.GetCurrentChar(byteCount)}) {
  32       if (IsCharValueSeparator(edit, *ch)) {
  33         return true;
  34       } else {
  35         const auto &connection{io.GetConnectionState()};
  36         io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
  37             "invalid character (0x%x) after list-directed input value, "
  38             "at column %d in record %d",
  39             static_cast<unsigned>(*ch),
  40             static_cast<int>(connection.positionInRecord + 1),
  41             static_cast<int>(connection.currentRecordNumber));
  42         return false;
  43       }
  44     } else {
  45       return true; // end of record: ok
  46     }
  47   } else {
  48     return true;
  49   }
  50 }
  51
  52 template <int LOG2_BASE>
  53 static bool EditBOZInput(
  54     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
  55   // Skip leading white space & zeroes
  56   std::optional<int> remaining{io.CueUpInput(edit)};
  57   auto start{io.GetConnectionState().positionInRecord};
  58   std::optional<char32_t> next{io.NextInField(remaining, edit)};
  59   if (next.value_or('?') == '0') {
  60     do {
  61       start = io.GetConnectionState().positionInRecord;
  62       next = io.NextInField(remaining, edit);
  63     } while (next && *next == '0');
  64   }
  65   // Count significant digits after any leading white space & zeroes
  66   int digits{0};
  67   for (; next; next = io.NextInField(remaining, edit)) {
  68     char32_t ch{*next};
  69     if (ch == ' ' || ch == '\t') {
  70       continue;
  71     }
  72     if (ch >= '0' && ch <= '1') {
  73     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
  74     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
  75     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
  76     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
  77     } else {
  78       io.GetIoErrorHandler().SignalError(
  79           "Bad character '%lc' in B/O/Z input field", ch);
  80       return false;
  81     }
  82     ++digits;
  83   }
  84   auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
  85   if (significantBytes > bytes) {
  86     io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
  87         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
  88     return false;
  89   }
  90   // Reset to start of significant digits
  91   io.HandleAbsolutePosition(start);
  92   remaining.reset();
  93   // Make a second pass now that the digit count is known
  94   std::memset(n, 0, bytes);
  95   int increment{isHostLittleEndian ? -1 : 1};
  96   auto *data{reinterpret_cast<unsigned char *>(n) +
  97       (isHostLittleEndian ? significantBytes - 1 : 0)};
  98   int shift{((digits - 1) * LOG2_BASE) & 7};
  99   if (shift + LOG2_BASE > 8) {
 100     shift -= 8; // misaligned octal
 101   }
 102   while (digits > 0) {
 103     char32_t ch{*io.NextInField(remaining, edit)};
 104     int digit{0};
 105     if (ch >= '0' && ch <= '9') {
 106       digit = ch - '0';
 107     } else if (ch >= 'A' && ch <= 'F') {
 108       digit = ch + 10 - 'A';
 109     } else if (ch >= 'a' && ch <= 'f') {
 110       digit = ch + 10 - 'a';
 111     } else {
 112       continue;
 113     }
 114     --digits;
 115     if (shift < 0) {
 116       shift += 8;
 117       if (shift + LOG2_BASE > 8) { // misaligned octal
 118         *data |= digit >> (8 - shift);
 119       }
 120       data += increment;
 121     }
 122     *data |= digit << shift;
 123     shift -= LOG2_BASE;
 124   }
 125   return CheckCompleteListDirectedField(io, edit);
 126 }
 127
 128 static inline char32_t GetRadixPointChar(const DataEdit &edit) {
 129   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
 130 }
 131
 132 // Prepares input from a field, and returns the sign, if any, else '\0'.
 133 static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
 134     std::optional<char32_t> &next, std::optional<int> &remaining) {
 135   remaining = io.CueUpInput(edit);
 136   next = io.NextInField(remaining, edit);
 137   char sign{'\0'};
 138   if (next) {
 139     if (*next == '-' || *next == '+') {
 140       sign = *next;
 141       if (!edit.IsListDirected()) {
 142         io.SkipSpaces(remaining);
 143       }
 144       next = io.NextInField(remaining, edit);
 145     }
 146   }
 147   return sign;
 148 }
 149
 150 bool EditIntegerInput(
 151     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
 152   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
 153   switch (edit.descriptor) {
 154   case DataEdit::ListDirected:
 155     if (IsNamelistNameOrSlash(io)) {
 156       return false;
 157     }
 158     break;
 159   case 'G':
 160   case 'I':
 161     break;
 162   case 'B':
 163     return EditBOZInput<1>(io, edit, n, kind);
 164   case 'O':
 165     return EditBOZInput<3>(io, edit, n, kind);
 166   case 'Z':
 167     return EditBOZInput<4>(io, edit, n, kind);
 168   case 'A': // legacy extension
 169     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
 170   default:
 171     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 172         "Data edit descriptor '%c' may not be used with an INTEGER data item",
 173         edit.descriptor);
 174     return false;
 175   }
 176   std::optional<int> remaining;
 177   std::optional<char32_t> next;
 178   char sign{ScanNumericPrefix(io, edit, next, remaining)};
 179   common::UnsignedInt128 value{0};
 180   bool any{!!sign};
 181   bool overflow{false};
 182   for (; next; next = io.NextInField(remaining, edit)) {
 183     char32_t ch{*next};
 184     if (ch == ' ' || ch == '\t') {
 185       if (edit.modes.editingFlags & blankZero) {
 186         ch = '0'; // BZ mode - treat blank as if it were zero
 187       } else {
 188         continue;
 189       }
 190     }
 191     int digit{0};
 192     if (ch >= '0' && ch <= '9') {
 193       digit = ch - '0';
 194     } else {
 195       io.GetIoErrorHandler().SignalError(
 196           "Bad character '%lc' in INTEGER input field", ch);
 197       return false;
 198     }
 199     static constexpr auto maxu128{~common::UnsignedInt128{0}};
 200     static constexpr auto maxu128OverTen{maxu128 / 10};
 201     static constexpr int maxLastDigit{
 202         static_cast<int>(maxu128 - (maxu128OverTen * 10))};
 203     overflow |= value >= maxu128OverTen &&
 204         (value > maxu128OverTen || digit > maxLastDigit);
 205     value *= 10;
 206     value += digit;
 207     any = true;
 208   }
 209   if (!any && !remaining) {
 210     io.GetIoErrorHandler().SignalError(
 211         "Integer value absent from NAMELIST or list-directed input");
 212     return false;
 213   }
 214   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
 215   overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
 216   if (overflow) {
 217     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
 218         "Decimal input overflows INTEGER(%d) variable", kind);
 219     return false;
 220   }
 221   if (sign == '-') {
 222     value = -value;
 223   }
 224   if (any || !io.GetConnectionState().IsAtEOF()) {
 225     std::memcpy(n, &value, kind); // a blank field means zero
 226   }
 227   return any;
 228 }
 229
 230 // Parses a REAL input number from the input source as a normalized
 231 // fraction into a supplied buffer -- there's an optional '-', a
 232 // decimal point when the input is not hexadecimal, and at least one
 233 // digit.  Replaces blanks with zeroes where appropriate.
 234 struct ScannedRealInput {
 235   // Number of characters that (should) have been written to the
 236   // buffer -- this can be larger than the buffer size, which
 237   // indicates buffer overflow.  Zero indicates an error.
 238   int got{0};
 239   int exponent{0}; // adjusted as necessary; binary if isHexadecimal
 240   bool isHexadecimal{false}; // 0X...
 241 };
 242 static ScannedRealInput ScanRealInput(
 243     char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
 244   std::optional<int> remaining;
 245   std::optional<char32_t> next;
 246   int got{0};
 247   std::optional<int> radixPointOffset;
 248   auto Put{[&](char ch) -> void {
 249     if (got < bufferSize) {
 250       buffer[got] = ch;
 251     }
 252     ++got;
 253   }};
 254   char sign{ScanNumericPrefix(io, edit, next, remaining)};
 255   if (sign == '-') {
 256     Put('-');
 257   }
 258   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
 259   int exponent{0};
 260   if (!next || (!bzMode && *next == ' ')) {
 261     if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
 262       // An empty/blank field means zero when not list-directed.
 263       // A fixed-width field containing only a sign is also zero;
 264       // this behavior isn't standard-conforming in F'2023 but it is
 265       // required to pass FCVS.
 266       Put('0');
 267     }
 268     return {got, exponent, false};
 269   }
 270   char32_t radixPointChar{GetRadixPointChar(edit)};
 271   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
 272   bool isHexadecimal{false};
 273   if (first == 'N' || first == 'I') {
 274     // NaN or infinity - convert to upper case
 275     // Subtle: a blank field of digits could be followed by 'E' or 'D',
 276     for (; next &&
 277          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
 278          next = io.NextInField(remaining, edit)) {
 279       if (*next >= 'a' && *next <= 'z') {
 280         Put(*next - 'a' + 'A');
 281       } else {
 282         Put(*next);
 283       }
 284     }
 285     if (next && *next == '(') { // NaN(...)
 286       Put('(');
 287       int depth{1};
 288       while (true) {
 289         next = io.NextInField(remaining, edit);
 290         if (depth == 0) {
 291           break;
 292         } else if (!next) {
 293           return {}; // error
 294         } else if (*next == '(') {
 295           ++depth;
 296         } else if (*next == ')') {
 297           --depth;
 298         }
 299         Put(*next);
 300       }
 301     }
 302   } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
 303       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
 304       first == 'D' || first == 'Q') {
 305     if (first == '0') {
 306       next = io.NextInField(remaining, edit);
 307       if (next && (*next == 'x' || *next == 'X')) { // 0X...
 308         isHexadecimal = true;
 309         next = io.NextInField(remaining, edit);
 310       } else {
 311         Put('0');
 312       }
 313     }
 314     // input field is normalized to a fraction
 315     if (!isHexadecimal) {
 316       Put('.');
 317     }
 318     auto start{got};
 319     for (; next; next = io.NextInField(remaining, edit)) {
 320       char32_t ch{*next};
 321       if (ch == ' ' || ch == '\t') {
 322         if (isHexadecimal) {
 323           return {}; // error
 324         } else if (bzMode) {
 325           ch = '0'; // BZ mode - treat blank as if it were zero
 326         } else {
 327           continue; // ignore blank in fixed field
 328         }
 329       }
 330       if (ch == '0' && got == start && !radixPointOffset) {
 331         // omit leading zeroes before the radix point
 332       } else if (ch >= '0' && ch <= '9') {
 333         Put(ch);
 334       } else if (ch == radixPointChar && !radixPointOffset) {
 335         // The radix point character is *not* copied to the buffer.
 336         radixPointOffset = got - start; // # of digits before the radix point
 337       } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
 338         Put(ch);
 339       } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
 340         Put(ch - 'a' + 'A'); // normalize to capitals
 341       } else {
 342         break;
 343       }
 344     }
 345     if (got == start) {
 346       // Nothing but zeroes and maybe a radix point.  F'2018 requires
 347       // at least one digit, but F'77 did not, and a bare "." shows up in
 348       // the FCVS suite.
 349       Put('0'); // emit at least one digit
 350     }
 351     // In list-directed input, a bad exponent is not consumed.
 352     auto nextBeforeExponent{next};
 353     auto startExponent{io.GetConnectionState().positionInRecord};
 354     bool hasGoodExponent{false};
 355     if (next) {
 356       if (isHexadecimal) {
 357         if (*next == 'p' || *next == 'P') {
 358           next = io.NextInField(remaining, edit);
 359         } else {
 360           // The binary exponent is not optional in the standard.
 361           return {}; // error
 362         }
 363       } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
 364           *next == 'q' || *next == 'Q') {
 365         // Optional exponent letter.  Blanks are allowed between the
 366         // optional exponent letter and the exponent value.
 367         io.SkipSpaces(remaining);
 368         next = io.NextInField(remaining, edit);
 369       }
 370     }
 371     if (next &&
 372         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
 373             *next == ' ' || *next == '\t')) {
 374       bool negExpo{*next == '-'};
 375       if (negExpo || *next == '+') {
 376         next = io.NextInField(remaining, edit);
 377       }
 378       for (; next; next = io.NextInField(remaining, edit)) {
 379         if (*next >= '0' && *next <= '9') {
 380           hasGoodExponent = true;
 381           if (exponent < 10000) {
 382             exponent = 10 * exponent + *next - '0';
 383           }
 384         } else if (*next == ' ' || *next == '\t') {
 385           if (isHexadecimal) {
 386             break;
 387           } else if (bzMode) {
 388             hasGoodExponent = true;
 389             exponent = 10 * exponent;
 390           }
 391         } else {
 392           break;
 393         }
 394       }
 395       if (negExpo) {
 396         exponent = -exponent;
 397       }
 398     }
 399     if (!hasGoodExponent) {
 400       if (isHexadecimal) {
 401         return {}; // error
 402       }
 403       // There isn't a good exponent; do not consume it.
 404       next = nextBeforeExponent;
 405       io.HandleAbsolutePosition(startExponent);
 406       // The default exponent is -kP, but the scale factor doesn't affect
 407       // an explicit exponent.
 408       exponent = -edit.modes.scale;
 409     }
 410     // Adjust exponent by number of digits before the radix point.
 411     if (isHexadecimal) {
 412       // Exponents for hexadecimal input are binary.
 413       exponent += radixPointOffset.value_or(got - start) * 4;
 414     } else if (radixPointOffset) {
 415       exponent += *radixPointOffset;
 416     } else {
 417       // When no redix point (or comma) appears in the value, the 'd'
 418       // part of the edit descriptor must be interpreted as the number of
 419       // digits in the value to be interpreted as being to the *right* of
 420       // the assumed radix point (13.7.2.3.2)
 421       exponent += got - start - edit.digits.value_or(0);
 422     }
 423   }
 424   // Consume the trailing ')' of a list-directed or NAMELIST complex
 425   // input value.
 426   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 427     if (next && (*next == ' ' || *next == '\t')) {
 428       io.SkipSpaces(remaining);
 429       next = io.NextInField(remaining, edit);
 430     }
 431     if (!next) { // NextInField fails on separators like ')'
 432       std::size_t byteCount{0};
 433       next = io.GetCurrentChar(byteCount);
 434       if (next && *next == ')') {
 435         io.HandleRelativePosition(byteCount);
 436       }
 437     }
 438   } else if (remaining) {
 439     while (next && (*next == ' ' || *next == '\t')) {
 440       next = io.NextInField(remaining, edit);
 441     }
 442     if (next) {
 443       return {}; // error: unused nonblank character in fixed-width field
 444     }
 445   }
 446   return {got, exponent, isHexadecimal};
 447 }
 448
 449 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
 450 #undef RAISE
 451 #ifdef feraisexcept // a macro in some environments; omit std::
 452 #define RAISE feraiseexcept
 453 #else
 454 #define RAISE std::feraiseexcept
 455 #endif
 456   if (flags & decimal::ConversionResultFlags::Overflow) {
 457     RAISE(FE_OVERFLOW);
 458   }
 459   if (flags & decimal::ConversionResultFlags::Inexact) {
 460     RAISE(FE_INEXACT);
 461   }
 462   if (flags & decimal::ConversionResultFlags::Invalid) {
 463     RAISE(FE_INVALID);
 464   }
 465 #undef RAISE
 466 }
 467
 468 // If no special modes are in effect and the form of the input value
 469 // that's present in the input stream is acceptable to the decimal->binary
 470 // converter without modification, this fast path for real input
 471 // saves time by avoiding memory copies and reformatting of the exponent.
 472 template <int PRECISION>
 473 static bool TryFastPathRealDecimalInput(
 474     IoStatementState &io, const DataEdit &edit, void *n) {
 475   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
 476     return false;
 477   }
 478   if (edit.modes.scale != 0) {
 479     return false;
 480   }
 481   const ConnectionState &connection{io.GetConnectionState()};
 482   if (connection.internalIoCharKind > 1) {
 483     return false; // reading non-default character
 484   }
 485   const char *str{nullptr};
 486   std::size_t got{io.GetNextInputBytes(str)};
 487   if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
 488     return false; // could not access reliably-terminated input stream
 489   }
 490   const char *p{str};
 491   std::int64_t maxConsume{
 492       std::min<std::int64_t>(got, edit.width.value_or(got))};
 493   const char *limit{str + maxConsume};
 494   decimal::ConversionToBinaryResult<PRECISION> converted{
 495       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
 496   if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
 497     return false;
 498   }
 499   if (edit.digits.value_or(0) != 0) {
 500     // Edit descriptor is Fw.d (or other) with d != 0, which
 501     // implies scaling
 502     const char *q{str};
 503     for (; q < limit; ++q) {
 504       if (*q == '.' || *q == 'n' || *q == 'N') {
 505         break;
 506       }
 507     }
 508     if (q == limit) {
 509       // No explicit decimal point, and not NaN/Inf.
 510       return false;
 511     }
 512   }
 513   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 514     // Need to consume a trailing ')', possibly with leading spaces
 515     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 516     }
 517     if (p < limit && *p == ')') {
 518       ++p;
 519     } else {
 520       return false;
 521     }
 522   } else if (edit.IsListDirected()) {
 523     if (p < limit && !IsCharValueSeparator(edit, *p)) {
 524       return false;
 525     }
 526   } else {
 527     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 528     }
 529     if (edit.width && p < str + *edit.width) {
 530       return false; // unconverted characters remain in fixed width field
 531     }
 532   }
 533   // Success on the fast path!
 534   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
 535       converted.binary;
 536   io.HandleRelativePosition(p - str);
 537   // Set FP exception flags
 538   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 539     RaiseFPExceptions(converted.flags);
 540   }
 541   return true;
 542 }
 543
 544 template <int binaryPrecision>
 545 decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
 546     const char *&p, enum decimal::FortranRounding rounding, int expo) {
 547   using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
 548   using RawType = typename RealType::RawType;
 549   bool isNegative{*p == '-'};
 550   constexpr RawType one{1};
 551   RawType signBit{0};
 552   if (isNegative) {
 553     ++p;
 554     signBit = one << (RealType::bits - 1);
 555   }
 556   RawType fraction{0};
 557   // Adjust the incoming binary P+/- exponent to shift the radix point
 558   // to below the LSB and add in the bias.
 559   expo += binaryPrecision - 1 + RealType::exponentBias;
 560   // Input the fraction.
 561   int roundingBit{0};
 562   int guardBit{0};
 563   for (; *p; ++p) {
 564     fraction <<= 4;
 565     expo -= 4;
 566     if (*p >= '0' && *p <= '9') {
 567       fraction |= *p - '0';
 568     } else if (*p >= 'A' && *p <= 'F') {
 569       fraction |= *p - 'A' + 10; // data were normalized to capitals
 570     } else {
 571       break;
 572     }
 573     while (fraction >> binaryPrecision) {
 574       guardBit |= roundingBit;
 575       roundingBit = (int)fraction & 1;
 576       fraction >>= 1;
 577       ++expo;
 578     }
 579   }
 580   if (fraction) {
 581     // Boost biased expo if too small
 582     while (expo < 1) {
 583       guardBit |= roundingBit;
 584       roundingBit = (int)fraction & 1;
 585       fraction >>= 1;
 586       ++expo;
 587     }
 588     // Normalize
 589     while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
 590       fraction <<= 1;
 591       --expo;
 592     }
 593     // Rounding
 594     bool increase{false};
 595     switch (rounding) {
 596     case decimal::RoundNearest: // RN & RP
 597       increase = roundingBit && (guardBit | ((int)fraction & 1));
 598       break;
 599     case decimal::RoundUp: // RU
 600       increase = !isNegative && (roundingBit | guardBit);
 601       break;
 602     case decimal::RoundDown: // RD
 603       increase = isNegative && (roundingBit | guardBit);
 604       break;
 605     case decimal::RoundToZero: // RZ
 606       break;
 607     case decimal::RoundCompatible: // RC
 608       increase = roundingBit != 0;
 609       break;
 610     }
 611     if (increase) {
 612       ++fraction;
 613       if (fraction >> binaryPrecision) {
 614         fraction >>= 1;
 615         ++expo;
 616       }
 617     }
 618   }
 619   // Package & return result
 620   constexpr RawType significandMask{(one << RealType::significandBits) - 1};
 621   if (!fraction) {
 622     expo = 0;
 623   } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
 624     expo = 0; // subnormal
 625   } else if (expo >= RealType::maxExponent) {
 626     expo = RealType::maxExponent; // +/-Inf
 627     fraction = 0;
 628   } else {
 629     fraction &= significandMask; // remove explicit normalization unless x87
 630   }
 631   return decimal::ConversionToBinaryResult<binaryPrecision>{
 632       RealType{static_cast<RawType>(signBit |
 633           static_cast<RawType>(expo) << RealType::significandBits | fraction)},
 634       (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
 635 }
 636
 637 template <int KIND>
 638 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
 639   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
 640   if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
 641     return CheckCompleteListDirectedField(io, edit);
 642   }
 643   // Fast path wasn't available or didn't work; go the more general route
 644   static constexpr int maxDigits{
 645       common::MaxDecimalConversionDigits(binaryPrecision)};
 646   static constexpr int bufferSize{maxDigits + 18};
 647   char buffer[bufferSize];
 648   auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
 649   int got{scanned.got};
 650   if (got >= maxDigits + 2) {
 651     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
 652     return false;
 653   }
 654   if (got == 0) {
 655     const auto &connection{io.GetConnectionState()};
 656     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
 657         "Bad real input data at column %d of record %d",
 658         static_cast<int>(connection.positionInRecord + 1),
 659         static_cast<int>(connection.currentRecordNumber));
 660     return false;
 661   }
 662   decimal::ConversionToBinaryResult<binaryPrecision> converted;
 663   const char *p{buffer};
 664   if (scanned.isHexadecimal) {
 665     buffer[got] = '\0';
 666     converted = ConvertHexadecimal<binaryPrecision>(
 667         p, edit.modes.round, scanned.exponent);
 668   } else {
 669     bool hadExtra{got > maxDigits};
 670     int exponent{scanned.exponent};
 671     if (exponent != 0) {
 672       buffer[got++] = 'e';
 673       if (exponent < 0) {
 674         buffer[got++] = '-';
 675         exponent = -exponent;
 676       }
 677       if (exponent > 9999) {
 678         exponent = 9999; // will convert to +/-Inf
 679       }
 680       if (exponent > 999) {
 681         int dig{exponent / 1000};
 682         buffer[got++] = '0' + dig;
 683         int rest{exponent - 1000 * dig};
 684         dig = rest / 100;
 685         buffer[got++] = '0' + dig;
 686         rest -= 100 * dig;
 687         dig = rest / 10;
 688         buffer[got++] = '0' + dig;
 689         buffer[got++] = '0' + (rest - 10 * dig);
 690       } else if (exponent > 99) {
 691         int dig{exponent / 100};
 692         buffer[got++] = '0' + dig;
 693         int rest{exponent - 100 * dig};
 694         dig = rest / 10;
 695         buffer[got++] = '0' + dig;
 696         buffer[got++] = '0' + (rest - 10 * dig);
 697       } else if (exponent > 9) {
 698         int dig{exponent / 10};
 699         buffer[got++] = '0' + dig;
 700         buffer[got++] = '0' + (exponent - 10 * dig);
 701       } else {
 702         buffer[got++] = '0' + exponent;
 703       }
 704     }
 705     buffer[got] = '\0';
 706     converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
 707     if (hadExtra) {
 708       converted.flags = static_cast<enum decimal::ConversionResultFlags>(
 709           converted.flags | decimal::Inexact);
 710     }
 711   }
 712   if (*p) { // unprocessed junk after value
 713     const auto &connection{io.GetConnectionState()};
 714     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
 715         "Trailing characters after real input data at column %d of record %d",
 716         static_cast<int>(connection.positionInRecord + 1),
 717         static_cast<int>(connection.currentRecordNumber));
 718     return false;
 719   }
 720   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
 721       converted.binary;
 722   // Set FP exception flags
 723   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 724     if (converted.flags & decimal::ConversionResultFlags::Overflow) {
 725       io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
 726       return false;
 727     }
 728     RaiseFPExceptions(converted.flags);
 729   }
 730   return CheckCompleteListDirectedField(io, edit);
 731 }
 732
 733 template <int KIND>
 734 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
 735   switch (edit.descriptor) {
 736   case DataEdit::ListDirected:
 737     if (IsNamelistNameOrSlash(io)) {
 738       return false;
 739     }
 740     return EditCommonRealInput<KIND>(io, edit, n);
 741   case DataEdit::ListDirectedRealPart:
 742   case DataEdit::ListDirectedImaginaryPart:
 743   case 'F':
 744   case 'E': // incl. EN, ES, & EX
 745   case 'D':
 746   case 'G':
 747     return EditCommonRealInput<KIND>(io, edit, n);
 748   case 'B':
 749     return EditBOZInput<1>(io, edit, n,
 750         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 751   case 'O':
 752     return EditBOZInput<3>(io, edit, n,
 753         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 754   case 'Z':
 755     return EditBOZInput<4>(io, edit, n,
 756         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 757   case 'A': // legacy extension
 758     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
 759   default:
 760     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 761         "Data edit descriptor '%c' may not be used for REAL input",
 762         edit.descriptor);
 763     return false;
 764   }
 765 }
 766
 767 // 13.7.3 in Fortran 2018
 768 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
 769   switch (edit.descriptor) {
 770   case DataEdit::ListDirected:
 771     if (IsNamelistNameOrSlash(io)) {
 772       return false;
 773     }
 774     break;
 775   case 'L':
 776   case 'G':
 777     break;
 778   default:
 779     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 780         "Data edit descriptor '%c' may not be used for LOGICAL input",
 781         edit.descriptor);
 782     return false;
 783   }
 784   std::optional<int> remaining{io.CueUpInput(edit)};
 785   std::optional<char32_t> next{io.NextInField(remaining, edit)};
 786   if (next && *next == '.') { // skip optional period
 787     next = io.NextInField(remaining, edit);
 788   }
 789   if (!next) {
 790     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
 791     return false;
 792   }
 793   switch (*next) {
 794   case 'T':
 795   case 't':
 796     x = true;
 797     break;
 798   case 'F':
 799   case 'f':
 800     x = false;
 801     break;
 802   default:
 803     io.GetIoErrorHandler().SignalError(
 804         "Bad character '%lc' in LOGICAL input field", *next);
 805     return false;
 806   }
 807   if (remaining) { // ignore the rest of a fixed-width field
 808     io.HandleRelativePosition(*remaining);
 809   } else if (edit.descriptor == DataEdit::ListDirected) {
 810     while (io.NextInField(remaining, edit)) { // discard rest of field
 811     }
 812   }
 813   return CheckCompleteListDirectedField(io, edit);
 814 }
 815
 816 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
 817 template <typename CHAR>
 818 static bool EditDelimitedCharacterInput(
 819     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
 820   bool result{true};
 821   while (true) {
 822     std::size_t byteCount{0};
 823     auto ch{io.GetCurrentChar(byteCount)};
 824     if (!ch) {
 825       if (io.AdvanceRecord()) {
 826         continue;
 827       } else {
 828         result = false; // EOF in character value
 829         break;
 830       }
 831     }
 832     io.HandleRelativePosition(byteCount);
 833     if (*ch == delimiter) {
 834       auto next{io.GetCurrentChar(byteCount)};
 835       if (next && *next == delimiter) {
 836         // Repeated delimiter: use as character value
 837         io.HandleRelativePosition(byteCount);
 838       } else {
 839         break; // closing delimiter
 840       }
 841     }
 842     if (length > 0) {
 843       *x++ = *ch;
 844       --length;
 845     }
 846   }
 847   std::fill_n(x, length, ' ');
 848   return result;
 849 }
 850
 851 template <typename CHAR>
 852 static bool EditListDirectedCharacterInput(
 853     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
 854   std::size_t byteCount{0};
 855   auto ch{io.GetCurrentChar(byteCount)};
 856   if (ch && (*ch == '\'' || *ch == '"')) {
 857     io.HandleRelativePosition(byteCount);
 858     return EditDelimitedCharacterInput(io, x, length, *ch);
 859   }
 860   if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
 861     return false;
 862   }
 863   // Undelimited list-directed character input: stop at a value separator
 864   // or the end of the current record.  Subtlety: the "remaining" count
 865   // here is a dummy that's used to avoid the interpretation of separators
 866   // in NextInField.
 867   std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
 868   while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
 869     bool isSep{false};
 870     switch (*next) {
 871     case ' ':
 872     case '\t':
 873     case '/':
 874       isSep = true;
 875       break;
 876     case ',':
 877       isSep = !(edit.modes.editingFlags & decimalComma);
 878       break;
 879     case ';':
 880       isSep = !!(edit.modes.editingFlags & decimalComma);
 881       break;
 882     default:
 883       break;
 884     }
 885     if (isSep) {
 886       remaining = 0;
 887     } else {
 888       *x++ = *next;
 889       remaining = --length > 0 ? maxUTF8Bytes : 0;
 890     }
 891   }
 892   std::fill_n(x, length, ' ');
 893   return true;
 894 }
 895
 896 template <typename CHAR>
 897 bool EditCharacterInput(
 898     IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
 899   switch (edit.descriptor) {
 900   case DataEdit::ListDirected:
 901     return EditListDirectedCharacterInput(io, x, length, edit);
 902   case 'A':
 903   case 'G':
 904     break;
 905   case 'B':
 906     return EditBOZInput<1>(io, edit, x, length * sizeof *x);
 907   case 'O':
 908     return EditBOZInput<3>(io, edit, x, length * sizeof *x);
 909   case 'Z':
 910     return EditBOZInput<4>(io, edit, x, length * sizeof *x);
 911   default:
 912     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 913         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
 914         edit.descriptor);
 915     return false;
 916   }
 917   const ConnectionState &connection{io.GetConnectionState()};
 918   std::size_t remaining{length};
 919   if (edit.width && *edit.width > 0) {
 920     remaining = *edit.width;
 921   }
 922   // When the field is wider than the variable, we drop the leading
 923   // characters.  When the variable is wider than the field, there can be
 924   // trailing padding or an EOR condition.
 925   const char *input{nullptr};
 926   std::size_t ready{0};
 927   // Skip leading bytes.
 928   // These bytes don't count towards INQUIRE(IOLENGTH=).
 929   std::size_t skip{remaining > length ? remaining - length : 0};
 930   // Transfer payload bytes; these do count.
 931   while (remaining > 0) {
 932     if (ready == 0) {
 933       ready = io.GetNextInputBytes(input);
 934       if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
 935         if (io.CheckForEndOfRecord(ready)) {
 936           if (ready == 0) {
 937             // PAD='YES' and no more data
 938             std::fill_n(x, length, ' ');
 939             return !io.GetIoErrorHandler().InError();
 940           } else {
 941             // Do partial read(s) then pad on last iteration
 942           }
 943         } else {
 944           return !io.GetIoErrorHandler().InError();
 945         }
 946       }
 947     }
 948     std::size_t chunk;
 949     bool skipping{skip > 0};
 950     if (connection.isUTF8) {
 951       chunk = MeasureUTF8Bytes(*input);
 952       if (skipping) {
 953         --skip;
 954       } else if (auto ucs{DecodeUTF8(input)}) {
 955         *x++ = *ucs;
 956         --length;
 957       } else if (chunk == 0) {
 958         // error recovery: skip bad encoding
 959         chunk = 1;
 960       }
 961       --remaining;
 962     } else if (connection.internalIoCharKind > 1) {
 963       // Reading from non-default character internal unit
 964       chunk = connection.internalIoCharKind;
 965       if (skipping) {
 966         --skip;
 967       } else {
 968         char32_t buffer{0};
 969         std::memcpy(&buffer, input, chunk);
 970         *x++ = buffer;
 971         --length;
 972       }
 973       --remaining;
 974     } else if constexpr (sizeof *x > 1) {
 975       // Read single byte with expansion into multi-byte CHARACTER
 976       chunk = 1;
 977       if (skipping) {
 978         --skip;
 979       } else {
 980         *x++ = static_cast<unsigned char>(*input);
 981         --length;
 982       }
 983       --remaining;
 984     } else { // single bytes -> default CHARACTER
 985       if (skipping) {
 986         chunk = std::min<std::size_t>(skip, ready);
 987         skip -= chunk;
 988       } else {
 989         chunk = std::min<std::size_t>(remaining, ready);
 990         std::memcpy(x, input, chunk);
 991         x += chunk;
 992         length -= chunk;
 993       }
 994       remaining -= chunk;
 995     }
 996     input += chunk;
 997     if (!skipping) {
 998       io.GotChar(chunk);
 999     }
1000     io.HandleRelativePosition(chunk);
1001     ready -= chunk;
1002   }
1003   // Pad the remainder of the input variable, if any.
1004   std::fill_n(x, length, ' ');
1005   return CheckCompleteListDirectedField(io, edit);
1006 }
1007
1008 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
1009 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
1010 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
1011 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
1012 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
1013 // TODO: double/double
1014 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
1015
1016 template bool EditCharacterInput(
1017     IoStatementState &, const DataEdit &, char *, std::size_t);
1018 template bool EditCharacterInput(
1019     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1020 template bool EditCharacterInput(
1021     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1022
1023 } // namespace Fortran::runtime::io