flang/runtime/edit-input.cpp

   1 //===-- runtime/edit-input.cpp --------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "edit-input.h"
  10 #include "namelist.h"
  11 #include "utf.h"
  12 #include "flang/Common/real.h"
  13 #include "flang/Common/uint128.h"
  14 #include <algorithm>
  15 #include <cfenv>
  16
  17 namespace Fortran::runtime::io {
  18
  19 template <int LOG2_BASE>
  20 static bool EditBOZInput(
  21     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
  22   std::optional<int> remaining;
  23   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
  24   if (next.value_or('?') == '0') {
  25     do {
  26       next = io.NextInField(remaining, edit);
  27     } while (next && *next == '0');
  28   }
  29   // Count significant digits after any leading white space & zeroes
  30   int digits{0};
  31   int chars{0};
  32   for (; next; next = io.NextInField(remaining, edit)) {
  33     ++chars;
  34     char32_t ch{*next};
  35     if (ch == ' ' || ch == '\t') {
  36       continue;
  37     }
  38     if (ch >= '0' && ch <= '1') {
  39     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
  40     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
  41     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
  42     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
  43     } else {
  44       io.GetIoErrorHandler().SignalError(
  45           "Bad character '%lc' in B/O/Z input field", ch);
  46       return false;
  47     }
  48     ++digits;
  49   }
  50   auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
  51   if (significantBytes > bytes) {
  52     io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
  53         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
  54     return false;
  55   }
  56   // Reset to start of significant digits
  57   io.HandleRelativePosition(-chars);
  58   remaining.reset();
  59   // Make a second pass now that the digit count is known
  60   std::memset(n, 0, bytes);
  61   int increment{isHostLittleEndian ? -1 : 1};
  62   auto *data{reinterpret_cast<unsigned char *>(n) +
  63       (isHostLittleEndian ? significantBytes - 1 : 0)};
  64   int shift{((digits - 1) * LOG2_BASE) & 7};
  65   if (shift + LOG2_BASE > 8) {
  66     shift -= 8; // misaligned octal
  67   }
  68   while (digits > 0) {
  69     char32_t ch{*io.NextInField(remaining, edit)};
  70     int digit{0};
  71     if (ch >= '0' && ch <= '9') {
  72       digit = ch - '0';
  73     } else if (ch >= 'A' && ch <= 'F') {
  74       digit = ch + 10 - 'A';
  75     } else if (ch >= 'a' && ch <= 'f') {
  76       digit = ch + 10 - 'a';
  77     } else {
  78       continue;
  79     }
  80     --digits;
  81     if (shift < 0) {
  82       shift += 8;
  83       if (shift + LOG2_BASE > 8) { // misaligned octal
  84         *data |= digit >> (8 - shift);
  85       }
  86       data += increment;
  87     }
  88     *data |= digit << shift;
  89     shift -= LOG2_BASE;
  90   }
  91   return true;
  92 }
  93
  94 static inline char32_t GetDecimalPoint(const DataEdit &edit) {
  95   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
  96 }
  97
  98 // Prepares input from a field, and consumes the sign, if any.
  99 // Returns true if there's a '-' sign.
 100 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
 101     std::optional<char32_t> &next, std::optional<int> &remaining) {
 102   next = io.PrepareInput(edit, remaining);
 103   bool negative{false};
 104   if (next) {
 105     negative = *next == '-';
 106     if (negative || *next == '+') {
 107       io.SkipSpaces(remaining);
 108       next = io.NextInField(remaining, edit);
 109     }
 110   }
 111   return negative;
 112 }
 113
 114 bool EditIntegerInput(
 115     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
 116   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
 117   switch (edit.descriptor) {
 118   case DataEdit::ListDirected:
 119     if (IsNamelistName(io)) {
 120       return false;
 121     }
 122     break;
 123   case 'G':
 124   case 'I':
 125     break;
 126   case 'B':
 127     return EditBOZInput<1>(io, edit, n, kind);
 128   case 'O':
 129     return EditBOZInput<3>(io, edit, n, kind);
 130   case 'Z':
 131     return EditBOZInput<4>(io, edit, n, kind);
 132   case 'A': // legacy extension
 133     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
 134   default:
 135     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 136         "Data edit descriptor '%c' may not be used with an INTEGER data item",
 137         edit.descriptor);
 138     return false;
 139   }
 140   std::optional<int> remaining;
 141   std::optional<char32_t> next;
 142   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
 143   common::UnsignedInt128 value{0};
 144   bool any{negate};
 145   bool overflow{false};
 146   for (; next; next = io.NextInField(remaining, edit)) {
 147     char32_t ch{*next};
 148     if (ch == ' ' || ch == '\t') {
 149       if (edit.modes.editingFlags & blankZero) {
 150         ch = '0'; // BZ mode - treat blank as if it were zero
 151       } else {
 152         continue;
 153       }
 154     }
 155     int digit{0};
 156     if (ch >= '0' && ch <= '9') {
 157       digit = ch - '0';
 158     } else {
 159       io.GetIoErrorHandler().SignalError(
 160           "Bad character '%lc' in INTEGER input field", ch);
 161       return false;
 162     }
 163     static constexpr auto maxu128{~common::UnsignedInt128{0}};
 164     static constexpr auto maxu128OverTen{maxu128 / 10};
 165     static constexpr int maxLastDigit{
 166         static_cast<int>(maxu128 - (maxu128OverTen * 10))};
 167     overflow |= value >= maxu128OverTen &&
 168         (value > maxu128OverTen || digit > maxLastDigit);
 169     value *= 10;
 170     value += digit;
 171     any = true;
 172   }
 173   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
 174   overflow |= value >= maxForKind && (value > maxForKind || !negate);
 175   if (overflow) {
 176     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
 177         "Decimal input overflows INTEGER(%d) variable", kind);
 178     return false;
 179   }
 180   if (negate) {
 181     value = -value;
 182   }
 183   if (any || !io.GetConnectionState().IsAtEOF()) {
 184     std::memcpy(n, &value, kind); // a blank field means zero
 185   }
 186   return any;
 187 }
 188
 189 // Parses a REAL input number from the input source as a normalized
 190 // fraction into a supplied buffer -- there's an optional '-', a
 191 // decimal point, and at least one digit.  The adjusted exponent value
 192 // is returned in a reference argument.  The returned value is the number
 193 // of characters that (should) have been written to the buffer -- this can
 194 // be larger than the buffer size and can indicate overflow.  Replaces
 195 // blanks with zeroes if appropriate.
 196 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
 197     const DataEdit &edit, int &exponent) {
 198   std::optional<int> remaining;
 199   std::optional<char32_t> next;
 200   int got{0};
 201   std::optional<int> decimalPoint;
 202   auto Put{[&](char ch) -> void {
 203     if (got < bufferSize) {
 204       buffer[got] = ch;
 205     }
 206     ++got;
 207   }};
 208   if (ScanNumericPrefix(io, edit, next, remaining)) {
 209     Put('-');
 210   }
 211   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
 212   if (!next || (!bzMode && *next == ' ')) { // empty/blank field means zero
 213     remaining.reset();
 214     if (!io.GetConnectionState().IsAtEOF()) {
 215       Put('0');
 216     }
 217     return got;
 218   }
 219   char32_t decimal{GetDecimalPoint(edit)};
 220   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
 221   if (first == 'N' || first == 'I') {
 222     // NaN or infinity - convert to upper case
 223     // Subtle: a blank field of digits could be followed by 'E' or 'D',
 224     for (; next &&
 225          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
 226          next = io.NextInField(remaining, edit)) {
 227       if (*next >= 'a' && *next <= 'z') {
 228         Put(*next - 'a' + 'A');
 229       } else {
 230         Put(*next);
 231       }
 232     }
 233     if (next && *next == '(') { // NaN(...)
 234       Put('(');
 235       int depth{1};
 236       while (true) {
 237         next = io.NextInField(remaining, edit);
 238         if (depth == 0) {
 239           break;
 240         } else if (!next) {
 241           return 0; // error
 242         } else if (*next == '(') {
 243           ++depth;
 244         } else if (*next == ')') {
 245           --depth;
 246         }
 247         Put(*next);
 248       }
 249     }
 250     exponent = 0;
 251   } else if (first == decimal || (first >= '0' && first <= '9') ||
 252       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
 253       first == 'D' || first == 'Q') {
 254     Put('.'); // input field is normalized to a fraction
 255     auto start{got};
 256     for (; next; next = io.NextInField(remaining, edit)) {
 257       char32_t ch{*next};
 258       if (ch == ' ' || ch == '\t') {
 259         if (bzMode) {
 260           ch = '0'; // BZ mode - treat blank as if it were zero
 261         } else {
 262           continue;
 263         }
 264       }
 265       if (ch == '0' && got == start && !decimalPoint) {
 266         // omit leading zeroes before the decimal
 267       } else if (ch >= '0' && ch <= '9') {
 268         Put(ch);
 269       } else if (ch == decimal && !decimalPoint) {
 270         // the decimal point is *not* copied to the buffer
 271         decimalPoint = got - start; // # of digits before the decimal point
 272       } else {
 273         break;
 274       }
 275     }
 276     if (got == start) {
 277       // Nothing but zeroes and maybe a decimal point.  F'2018 requires
 278       // at least one digit, but F'77 did not, and a bare "." shows up in
 279       // the FCVS suite.
 280       Put('0'); // emit at least one digit
 281     }
 282     if (next &&
 283         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
 284             *next == 'q' || *next == 'Q')) {
 285       // Optional exponent letter.  Blanks are allowed between the
 286       // optional exponent letter and the exponent value.
 287       io.SkipSpaces(remaining);
 288       next = io.NextInField(remaining, edit);
 289     }
 290     // The default exponent is -kP, but the scale factor doesn't affect
 291     // an explicit exponent.
 292     exponent = -edit.modes.scale;
 293     if (next &&
 294         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
 295             *next == ' ' || *next == '\t')) {
 296       bool negExpo{*next == '-'};
 297       if (negExpo || *next == '+') {
 298         next = io.NextInField(remaining, edit);
 299       }
 300       for (exponent = 0; next; next = io.NextInField(remaining, edit)) {
 301         if (*next >= '0' && *next <= '9') {
 302           if (exponent < 10000) {
 303             exponent = 10 * exponent + *next - '0';
 304           }
 305         } else if (*next == ' ' || *next == '\t') {
 306           if (bzMode) {
 307             exponent = 10 * exponent;
 308           }
 309         } else {
 310           break;
 311         }
 312       }
 313       if (negExpo) {
 314         exponent = -exponent;
 315       }
 316     }
 317     if (decimalPoint) {
 318       exponent += *decimalPoint;
 319     } else {
 320       // When no decimal point (or comma) appears in the value, the 'd'
 321       // part of the edit descriptor must be interpreted as the number of
 322       // digits in the value to be interpreted as being to the *right* of
 323       // the assumed decimal point (13.7.2.3.2)
 324       exponent += got - start - edit.digits.value_or(0);
 325     }
 326   } else {
 327     // TODO: hex FP input
 328     exponent = 0;
 329     return 0;
 330   }
 331   // Consume the trailing ')' of a list-directed or NAMELIST complex
 332   // input value.
 333   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 334     if (next && (*next == ' ' || *next == '\t')) {
 335       next = io.NextInField(remaining, edit);
 336     }
 337     if (!next) { // NextInField fails on separators like ')'
 338       std::size_t byteCount{0};
 339       next = io.GetCurrentChar(byteCount);
 340       if (next && *next == ')') {
 341         io.HandleRelativePosition(byteCount);
 342       }
 343     }
 344   } else if (remaining) {
 345     while (next && (*next == ' ' || *next == '\t')) {
 346       next = io.NextInField(remaining, edit);
 347     }
 348     if (next) {
 349       return 0; // error: unused nonblank character in fixed-width field
 350     }
 351   }
 352   return got;
 353 }
 354
 355 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
 356 #undef RAISE
 357 #ifdef feraisexcept // a macro in some environments; omit std::
 358 #define RAISE feraiseexcept
 359 #else
 360 #define RAISE std::feraiseexcept
 361 #endif
 362   if (flags & decimal::ConversionResultFlags::Overflow) {
 363     RAISE(FE_OVERFLOW);
 364   }
 365   if (flags & decimal::ConversionResultFlags::Inexact) {
 366     RAISE(FE_INEXACT);
 367   }
 368   if (flags & decimal::ConversionResultFlags::Invalid) {
 369     RAISE(FE_INVALID);
 370   }
 371 #undef RAISE
 372 }
 373
 374 // If no special modes are in effect and the form of the input value
 375 // that's present in the input stream is acceptable to the decimal->binary
 376 // converter without modification, this fast path for real input
 377 // saves time by avoiding memory copies and reformatting of the exponent.
 378 template <int PRECISION>
 379 static bool TryFastPathRealInput(
 380     IoStatementState &io, const DataEdit &edit, void *n) {
 381   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
 382     return false;
 383   }
 384   if (edit.modes.scale != 0) {
 385     return false;
 386   }
 387   const char *str{nullptr};
 388   std::size_t got{io.GetNextInputBytes(str)};
 389   if (got == 0 || str == nullptr ||
 390       !io.GetConnectionState().recordLength.has_value()) {
 391     return false; // could not access reliably-terminated input stream
 392   }
 393   const char *p{str};
 394   std::int64_t maxConsume{
 395       std::min<std::int64_t>(got, edit.width.value_or(got))};
 396   const char *limit{str + maxConsume};
 397   decimal::ConversionToBinaryResult<PRECISION> converted{
 398       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
 399   if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
 400     return false;
 401   }
 402   if (edit.digits.value_or(0) != 0) {
 403     // Edit descriptor is Fw.d (or other) with d != 0, which
 404     // implies scaling
 405     const char *q{str};
 406     for (; q < limit; ++q) {
 407       if (*q == '.' || *q == 'n' || *q == 'N') {
 408         break;
 409       }
 410     }
 411     if (q == limit) {
 412       // No explicit decimal point, and not NaN/Inf.
 413       return false;
 414     }
 415   }
 416   for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 417   }
 418   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 419     // Need to consume a trailing ')' and any white space after
 420     if (p >= limit || *p != ')') {
 421       return false;
 422     }
 423     for (++p; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 424     }
 425   }
 426   if (edit.width && p < str + *edit.width) {
 427     return false; // unconverted characters remain in fixed width field
 428   }
 429   // Success on the fast path!
 430   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
 431       converted.binary;
 432   io.HandleRelativePosition(p - str);
 433   // Set FP exception flags
 434   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 435     RaiseFPExceptions(converted.flags);
 436   }
 437   return true;
 438 }
 439
 440 template <int KIND>
 441 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
 442   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
 443   if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) {
 444     return true;
 445   }
 446   // Fast path wasn't available or didn't work; go the more general route
 447   static constexpr int maxDigits{
 448       common::MaxDecimalConversionDigits(binaryPrecision)};
 449   static constexpr int bufferSize{maxDigits + 18};
 450   char buffer[bufferSize];
 451   int exponent{0};
 452   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
 453   if (got >= maxDigits + 2) {
 454     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
 455     return false;
 456   }
 457   if (got == 0) {
 458     io.GetIoErrorHandler().SignalError(IostatBadRealInput);
 459     return false;
 460   }
 461   bool hadExtra{got > maxDigits};
 462   if (exponent != 0) {
 463     buffer[got++] = 'e';
 464     if (exponent < 0) {
 465       buffer[got++] = '-';
 466       exponent = -exponent;
 467     }
 468     if (exponent > 9999) {
 469       exponent = 9999; // will convert to +/-Inf
 470     }
 471     if (exponent > 999) {
 472       int dig{exponent / 1000};
 473       buffer[got++] = '0' + dig;
 474       int rest{exponent - 1000 * dig};
 475       dig = rest / 100;
 476       buffer[got++] = '0' + dig;
 477       rest -= 100 * dig;
 478       dig = rest / 10;
 479       buffer[got++] = '0' + dig;
 480       buffer[got++] = '0' + (rest - 10 * dig);
 481     } else if (exponent > 99) {
 482       int dig{exponent / 100};
 483       buffer[got++] = '0' + dig;
 484       int rest{exponent - 100 * dig};
 485       dig = rest / 10;
 486       buffer[got++] = '0' + dig;
 487       buffer[got++] = '0' + (rest - 10 * dig);
 488     } else if (exponent > 9) {
 489       int dig{exponent / 10};
 490       buffer[got++] = '0' + dig;
 491       buffer[got++] = '0' + (exponent - 10 * dig);
 492     } else {
 493       buffer[got++] = '0' + exponent;
 494     }
 495   }
 496   buffer[got] = '\0';
 497   const char *p{buffer};
 498   decimal::ConversionToBinaryResult<binaryPrecision> converted{
 499       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
 500   if (hadExtra) {
 501     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
 502         converted.flags | decimal::Inexact);
 503   }
 504   if (*p) { // unprocessed junk after value
 505     io.GetIoErrorHandler().SignalError(IostatBadRealInput);
 506     return false;
 507   }
 508   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
 509       converted.binary;
 510   // Set FP exception flags
 511   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 512     if (converted.flags & decimal::ConversionResultFlags::Overflow) {
 513       io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
 514       return false;
 515     }
 516     RaiseFPExceptions(converted.flags);
 517   }
 518   return true;
 519 }
 520
 521 template <int KIND>
 522 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
 523   switch (edit.descriptor) {
 524   case DataEdit::ListDirected:
 525     if (IsNamelistName(io)) {
 526       return false;
 527     }
 528     return EditCommonRealInput<KIND>(io, edit, n);
 529   case DataEdit::ListDirectedRealPart:
 530   case DataEdit::ListDirectedImaginaryPart:
 531   case 'F':
 532   case 'E': // incl. EN, ES, & EX
 533   case 'D':
 534   case 'G':
 535     return EditCommonRealInput<KIND>(io, edit, n);
 536   case 'B':
 537     return EditBOZInput<1>(io, edit, n,
 538         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 539   case 'O':
 540     return EditBOZInput<3>(io, edit, n,
 541         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 542   case 'Z':
 543     return EditBOZInput<4>(io, edit, n,
 544         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 545   case 'A': // legacy extension
 546     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
 547   default:
 548     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 549         "Data edit descriptor '%c' may not be used for REAL input",
 550         edit.descriptor);
 551     return false;
 552   }
 553 }
 554
 555 // 13.7.3 in Fortran 2018
 556 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
 557   switch (edit.descriptor) {
 558   case DataEdit::ListDirected:
 559     if (IsNamelistName(io)) {
 560       return false;
 561     }
 562     break;
 563   case 'L':
 564   case 'G':
 565     break;
 566   default:
 567     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 568         "Data edit descriptor '%c' may not be used for LOGICAL input",
 569         edit.descriptor);
 570     return false;
 571   }
 572   std::optional<int> remaining;
 573   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
 574   if (next && *next == '.') { // skip optional period
 575     next = io.NextInField(remaining, edit);
 576   }
 577   if (!next) {
 578     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
 579     return false;
 580   }
 581   switch (*next) {
 582   case 'T':
 583   case 't':
 584     x = true;
 585     break;
 586   case 'F':
 587   case 'f':
 588     x = false;
 589     break;
 590   default:
 591     io.GetIoErrorHandler().SignalError(
 592         "Bad character '%lc' in LOGICAL input field", *next);
 593     return false;
 594   }
 595   if (remaining) { // ignore the rest of the field
 596     io.HandleRelativePosition(*remaining);
 597   } else if (edit.descriptor == DataEdit::ListDirected) {
 598     while (io.NextInField(remaining, edit)) { // discard rest of field
 599     }
 600   }
 601   return true;
 602 }
 603
 604 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
 605 template <typename CHAR>
 606 static bool EditDelimitedCharacterInput(
 607     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
 608   bool result{true};
 609   while (true) {
 610     std::size_t byteCount{0};
 611     auto ch{io.GetCurrentChar(byteCount)};
 612     if (!ch) {
 613       if (io.AdvanceRecord()) {
 614         continue;
 615       } else {
 616         result = false; // EOF in character value
 617         break;
 618       }
 619     }
 620     io.HandleRelativePosition(byteCount);
 621     if (*ch == delimiter) {
 622       auto next{io.GetCurrentChar(byteCount)};
 623       if (next && *next == delimiter) {
 624         // Repeated delimiter: use as character value
 625         io.HandleRelativePosition(byteCount);
 626       } else {
 627         break; // closing delimiter
 628       }
 629     }
 630     if (length > 0) {
 631       *x++ = *ch;
 632       --length;
 633     }
 634   }
 635   std::fill_n(x, length, ' ');
 636   return result;
 637 }
 638
 639 template <typename CHAR>
 640 static bool EditListDirectedCharacterInput(
 641     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
 642   std::size_t byteCount{0};
 643   auto ch{io.GetCurrentChar(byteCount)};
 644   if (ch && (*ch == '\'' || *ch == '"')) {
 645     io.HandleRelativePosition(byteCount);
 646     return EditDelimitedCharacterInput(io, x, length, *ch);
 647   }
 648   if (IsNamelistName(io) || io.GetConnectionState().IsAtEOF()) {
 649     return false;
 650   }
 651   // Undelimited list-directed character input: stop at a value separator
 652   // or the end of the current record.  Subtlety: the "remaining" count
 653   // here is a dummy that's used to avoid the interpretation of separators
 654   // in NextInField.
 655   std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
 656   while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
 657     bool isSep{false};
 658     switch (*next) {
 659     case ' ':
 660     case '\t':
 661     case '/':
 662       isSep = true;
 663       break;
 664     case ',':
 665       isSep = !(edit.modes.editingFlags & decimalComma);
 666       break;
 667     case ';':
 668       isSep = !!(edit.modes.editingFlags & decimalComma);
 669       break;
 670     default:
 671       break;
 672     }
 673     if (isSep) {
 674       remaining = 0;
 675     } else {
 676       *x++ = *next;
 677       remaining = --length > 0 ? maxUTF8Bytes : 0;
 678     }
 679   }
 680   std::fill_n(x, length, ' ');
 681   return true;
 682 }
 683
 684 template <typename CHAR>
 685 bool EditCharacterInput(
 686     IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
 687   switch (edit.descriptor) {
 688   case DataEdit::ListDirected:
 689     return EditListDirectedCharacterInput(io, x, length, edit);
 690   case 'A':
 691   case 'G':
 692     break;
 693   case 'B':
 694     return EditBOZInput<1>(io, edit, x, length * sizeof *x);
 695   case 'O':
 696     return EditBOZInput<3>(io, edit, x, length * sizeof *x);
 697   case 'Z':
 698     return EditBOZInput<4>(io, edit, x, length * sizeof *x);
 699   default:
 700     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 701         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
 702         edit.descriptor);
 703     return false;
 704   }
 705   const ConnectionState &connection{io.GetConnectionState()};
 706   std::size_t remaining{length};
 707   if (edit.width && *edit.width > 0) {
 708     remaining = *edit.width;
 709   }
 710   // When the field is wider than the variable, we drop the leading
 711   // characters.  When the variable is wider than the field, there can be
 712   // trailing padding.
 713   const char *input{nullptr};
 714   std::size_t ready{0};
 715   // Skip leading bytes.
 716   // These bytes don't count towards INQUIRE(IOLENGTH=).
 717   std::size_t skip{remaining > length ? remaining - length : 0};
 718   // Transfer payload bytes; these do count.
 719   while (remaining > 0) {
 720     if (ready == 0) {
 721       ready = io.GetNextInputBytes(input);
 722       if (ready == 0) {
 723         if (io.CheckForEndOfRecord()) {
 724           std::fill_n(x, length, ' '); // PAD='YES'
 725         }
 726         return !io.GetIoErrorHandler().InError();
 727       }
 728     }
 729     std::size_t chunk;
 730     bool skipping{skip > 0};
 731     if (connection.isUTF8) {
 732       chunk = MeasureUTF8Bytes(*input);
 733       if (skipping) {
 734         --skip;
 735       } else if (auto ucs{DecodeUTF8(input)}) {
 736         *x++ = *ucs;
 737         --length;
 738       } else if (chunk == 0) {
 739         // error recovery: skip bad encoding
 740         chunk = 1;
 741       }
 742       --remaining;
 743     } else if constexpr (sizeof *x > 1) {
 744       // Read single byte with expansion into multi-byte CHARACTER
 745       chunk = 1;
 746       if (skipping) {
 747         --skip;
 748       } else {
 749         *x++ = static_cast<unsigned char>(*input);
 750         --length;
 751       }
 752       --remaining;
 753     } else { // single bytes -> default CHARACTER
 754       if (skipping) {
 755         chunk = std::min<std::size_t>(skip, ready);
 756         skip -= chunk;
 757       } else {
 758         chunk = std::min<std::size_t>(remaining, ready);
 759         std::memcpy(x, input, chunk);
 760         x += chunk;
 761         length -= chunk;
 762       }
 763       remaining -= chunk;
 764     }
 765     input += chunk;
 766     if (!skipping) {
 767       io.GotChar(chunk);
 768     }
 769     io.HandleRelativePosition(chunk);
 770     ready -= chunk;
 771   }
 772   // Pad the remainder of the input variable, if any.
 773   std::fill_n(x, length, ' ');
 774   return true;
 775 }
 776
 777 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
 778 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
 779 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
 780 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
 781 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
 782 // TODO: double/double
 783 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
 784
 785 template bool EditCharacterInput(
 786     IoStatementState &, const DataEdit &, char *, std::size_t);
 787 template bool EditCharacterInput(
 788     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
 789 template bool EditCharacterInput(
 790     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
 791
 792 } // namespace Fortran::runtime::io