flang/runtime/edit-input.cpp

   1 //===-- runtime/edit-input.cpp --------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "edit-input.h"
  10 #include "namelist.h"
  11 #include "utf.h"
  12 #include "flang/Common/real.h"
  13 #include "flang/Common/uint128.h"
  14 #include <algorithm>
  15 #include <cfenv>
  16
  17 namespace Fortran::runtime::io {
  18
  19 template <int LOG2_BASE>
  20 static bool EditBOZInput(
  21     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
  22   // Skip leading white space & zeroes
  23   std::optional<int> remaining{io.CueUpInput(edit)};
  24   auto start{io.GetConnectionState().positionInRecord};
  25   std::optional<char32_t> next{io.NextInField(remaining, edit)};
  26   if (next.value_or('?') == '0') {
  27     do {
  28       start = io.GetConnectionState().positionInRecord;
  29       next = io.NextInField(remaining, edit);
  30     } while (next && *next == '0');
  31   }
  32   // Count significant digits after any leading white space & zeroes
  33   int digits{0};
  34   for (; next; next = io.NextInField(remaining, edit)) {
  35     char32_t ch{*next};
  36     if (ch == ' ' || ch == '\t') {
  37       continue;
  38     }
  39     if (ch >= '0' && ch <= '1') {
  40     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
  41     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
  42     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
  43     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
  44     } else {
  45       io.GetIoErrorHandler().SignalError(
  46           "Bad character '%lc' in B/O/Z input field", ch);
  47       return false;
  48     }
  49     ++digits;
  50   }
  51   auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
  52   if (significantBytes > bytes) {
  53     io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
  54         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
  55     return false;
  56   }
  57   // Reset to start of significant digits
  58   io.HandleAbsolutePosition(start);
  59   remaining.reset();
  60   // Make a second pass now that the digit count is known
  61   std::memset(n, 0, bytes);
  62   int increment{isHostLittleEndian ? -1 : 1};
  63   auto *data{reinterpret_cast<unsigned char *>(n) +
  64       (isHostLittleEndian ? significantBytes - 1 : 0)};
  65   int shift{((digits - 1) * LOG2_BASE) & 7};
  66   if (shift + LOG2_BASE > 8) {
  67     shift -= 8; // misaligned octal
  68   }
  69   while (digits > 0) {
  70     char32_t ch{*io.NextInField(remaining, edit)};
  71     int digit{0};
  72     if (ch >= '0' && ch <= '9') {
  73       digit = ch - '0';
  74     } else if (ch >= 'A' && ch <= 'F') {
  75       digit = ch + 10 - 'A';
  76     } else if (ch >= 'a' && ch <= 'f') {
  77       digit = ch + 10 - 'a';
  78     } else {
  79       continue;
  80     }
  81     --digits;
  82     if (shift < 0) {
  83       shift += 8;
  84       if (shift + LOG2_BASE > 8) { // misaligned octal
  85         *data |= digit >> (8 - shift);
  86       }
  87       data += increment;
  88     }
  89     *data |= digit << shift;
  90     shift -= LOG2_BASE;
  91   }
  92   return true;
  93 }
  94
  95 static inline char32_t GetDecimalPoint(const DataEdit &edit) {
  96   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
  97 }
  98
  99 // Prepares input from a field, and consumes the sign, if any.
 100 // Returns true if there's a '-' sign.
 101 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
 102     std::optional<char32_t> &next, std::optional<int> &remaining) {
 103   remaining = io.CueUpInput(edit);
 104   next = io.NextInField(remaining, edit);
 105   bool negative{false};
 106   if (next) {
 107     negative = *next == '-';
 108     if (negative || *next == '+') {
 109       io.SkipSpaces(remaining);
 110       next = io.NextInField(remaining, edit);
 111     }
 112   }
 113   return negative;
 114 }
 115
 116 bool EditIntegerInput(
 117     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
 118   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
 119   switch (edit.descriptor) {
 120   case DataEdit::ListDirected:
 121     if (IsNamelistNameOrSlash(io)) {
 122       return false;
 123     }
 124     break;
 125   case 'G':
 126   case 'I':
 127     break;
 128   case 'B':
 129     return EditBOZInput<1>(io, edit, n, kind);
 130   case 'O':
 131     return EditBOZInput<3>(io, edit, n, kind);
 132   case 'Z':
 133     return EditBOZInput<4>(io, edit, n, kind);
 134   case 'A': // legacy extension
 135     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
 136   default:
 137     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 138         "Data edit descriptor '%c' may not be used with an INTEGER data item",
 139         edit.descriptor);
 140     return false;
 141   }
 142   std::optional<int> remaining;
 143   std::optional<char32_t> next;
 144   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
 145   common::UnsignedInt128 value{0};
 146   bool any{negate};
 147   bool overflow{false};
 148   for (; next; next = io.NextInField(remaining, edit)) {
 149     char32_t ch{*next};
 150     if (ch == ' ' || ch == '\t') {
 151       if (edit.modes.editingFlags & blankZero) {
 152         ch = '0'; // BZ mode - treat blank as if it were zero
 153       } else {
 154         continue;
 155       }
 156     }
 157     int digit{0};
 158     if (ch >= '0' && ch <= '9') {
 159       digit = ch - '0';
 160     } else {
 161       io.GetIoErrorHandler().SignalError(
 162           "Bad character '%lc' in INTEGER input field", ch);
 163       return false;
 164     }
 165     static constexpr auto maxu128{~common::UnsignedInt128{0}};
 166     static constexpr auto maxu128OverTen{maxu128 / 10};
 167     static constexpr int maxLastDigit{
 168         static_cast<int>(maxu128 - (maxu128OverTen * 10))};
 169     overflow |= value >= maxu128OverTen &&
 170         (value > maxu128OverTen || digit > maxLastDigit);
 171     value *= 10;
 172     value += digit;
 173     any = true;
 174   }
 175   if (!any && !remaining) {
 176     io.GetIoErrorHandler().SignalError(
 177         "Integer value absent from NAMELIST or list-directed input");
 178     return false;
 179   }
 180   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
 181   overflow |= value >= maxForKind && (value > maxForKind || !negate);
 182   if (overflow) {
 183     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
 184         "Decimal input overflows INTEGER(%d) variable", kind);
 185     return false;
 186   }
 187   if (negate) {
 188     value = -value;
 189   }
 190   if (any || !io.GetConnectionState().IsAtEOF()) {
 191     std::memcpy(n, &value, kind); // a blank field means zero
 192   }
 193   return any;
 194 }
 195
 196 // Parses a REAL input number from the input source as a normalized
 197 // fraction into a supplied buffer -- there's an optional '-', a
 198 // decimal point, and at least one digit.  The adjusted exponent value
 199 // is returned in a reference argument.  The returned value is the number
 200 // of characters that (should) have been written to the buffer -- this can
 201 // be larger than the buffer size and can indicate overflow.  Replaces
 202 // blanks with zeroes if appropriate.
 203 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
 204     const DataEdit &edit, int &exponent) {
 205   std::optional<int> remaining;
 206   std::optional<char32_t> next;
 207   int got{0};
 208   std::optional<int> decimalPoint;
 209   auto Put{[&](char ch) -> void {
 210     if (got < bufferSize) {
 211       buffer[got] = ch;
 212     }
 213     ++got;
 214   }};
 215   if (ScanNumericPrefix(io, edit, next, remaining)) {
 216     Put('-');
 217   }
 218   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
 219   if (!next || (!bzMode && *next == ' ')) { // empty/blank field means zero
 220     remaining.reset();
 221     if (!io.GetConnectionState().IsAtEOF()) {
 222       Put('0');
 223     }
 224     return got;
 225   }
 226   char32_t decimal{GetDecimalPoint(edit)};
 227   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
 228   if (first == 'N' || first == 'I') {
 229     // NaN or infinity - convert to upper case
 230     // Subtle: a blank field of digits could be followed by 'E' or 'D',
 231     for (; next &&
 232          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
 233          next = io.NextInField(remaining, edit)) {
 234       if (*next >= 'a' && *next <= 'z') {
 235         Put(*next - 'a' + 'A');
 236       } else {
 237         Put(*next);
 238       }
 239     }
 240     if (next && *next == '(') { // NaN(...)
 241       Put('(');
 242       int depth{1};
 243       while (true) {
 244         next = io.NextInField(remaining, edit);
 245         if (depth == 0) {
 246           break;
 247         } else if (!next) {
 248           return 0; // error
 249         } else if (*next == '(') {
 250           ++depth;
 251         } else if (*next == ')') {
 252           --depth;
 253         }
 254         Put(*next);
 255       }
 256     }
 257     exponent = 0;
 258   } else if (first == decimal || (first >= '0' && first <= '9') ||
 259       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
 260       first == 'D' || first == 'Q') {
 261     Put('.'); // input field is normalized to a fraction
 262     auto start{got};
 263     for (; next; next = io.NextInField(remaining, edit)) {
 264       char32_t ch{*next};
 265       if (ch == ' ' || ch == '\t') {
 266         if (bzMode) {
 267           ch = '0'; // BZ mode - treat blank as if it were zero
 268         } else {
 269           continue;
 270         }
 271       }
 272       if (ch == '0' && got == start && !decimalPoint) {
 273         // omit leading zeroes before the decimal
 274       } else if (ch >= '0' && ch <= '9') {
 275         Put(ch);
 276       } else if (ch == decimal && !decimalPoint) {
 277         // the decimal point is *not* copied to the buffer
 278         decimalPoint = got - start; // # of digits before the decimal point
 279       } else {
 280         break;
 281       }
 282     }
 283     if (got == start) {
 284       // Nothing but zeroes and maybe a decimal point.  F'2018 requires
 285       // at least one digit, but F'77 did not, and a bare "." shows up in
 286       // the FCVS suite.
 287       Put('0'); // emit at least one digit
 288     }
 289     if (next &&
 290         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
 291             *next == 'q' || *next == 'Q')) {
 292       // Optional exponent letter.  Blanks are allowed between the
 293       // optional exponent letter and the exponent value.
 294       io.SkipSpaces(remaining);
 295       next = io.NextInField(remaining, edit);
 296     }
 297     // The default exponent is -kP, but the scale factor doesn't affect
 298     // an explicit exponent.
 299     exponent = -edit.modes.scale;
 300     if (next &&
 301         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
 302             *next == ' ' || *next == '\t')) {
 303       bool negExpo{*next == '-'};
 304       if (negExpo || *next == '+') {
 305         next = io.NextInField(remaining, edit);
 306       }
 307       for (exponent = 0; next; next = io.NextInField(remaining, edit)) {
 308         if (*next >= '0' && *next <= '9') {
 309           if (exponent < 10000) {
 310             exponent = 10 * exponent + *next - '0';
 311           }
 312         } else if (*next == ' ' || *next == '\t') {
 313           if (bzMode) {
 314             exponent = 10 * exponent;
 315           }
 316         } else {
 317           break;
 318         }
 319       }
 320       if (negExpo) {
 321         exponent = -exponent;
 322       }
 323     }
 324     if (decimalPoint) {
 325       exponent += *decimalPoint;
 326     } else {
 327       // When no decimal point (or comma) appears in the value, the 'd'
 328       // part of the edit descriptor must be interpreted as the number of
 329       // digits in the value to be interpreted as being to the *right* of
 330       // the assumed decimal point (13.7.2.3.2)
 331       exponent += got - start - edit.digits.value_or(0);
 332     }
 333   } else {
 334     // TODO: hex FP input
 335     exponent = 0;
 336     return 0;
 337   }
 338   // Consume the trailing ')' of a list-directed or NAMELIST complex
 339   // input value.
 340   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 341     if (next && (*next == ' ' || *next == '\t')) {
 342       next = io.NextInField(remaining, edit);
 343     }
 344     if (!next) { // NextInField fails on separators like ')'
 345       std::size_t byteCount{0};
 346       next = io.GetCurrentChar(byteCount);
 347       if (next && *next == ')') {
 348         io.HandleRelativePosition(byteCount);
 349       }
 350     }
 351   } else if (remaining) {
 352     while (next && (*next == ' ' || *next == '\t')) {
 353       next = io.NextInField(remaining, edit);
 354     }
 355     if (next) {
 356       return 0; // error: unused nonblank character in fixed-width field
 357     }
 358   }
 359   return got;
 360 }
 361
 362 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
 363 #undef RAISE
 364 #ifdef feraisexcept // a macro in some environments; omit std::
 365 #define RAISE feraiseexcept
 366 #else
 367 #define RAISE std::feraiseexcept
 368 #endif
 369   if (flags & decimal::ConversionResultFlags::Overflow) {
 370     RAISE(FE_OVERFLOW);
 371   }
 372   if (flags & decimal::ConversionResultFlags::Inexact) {
 373     RAISE(FE_INEXACT);
 374   }
 375   if (flags & decimal::ConversionResultFlags::Invalid) {
 376     RAISE(FE_INVALID);
 377   }
 378 #undef RAISE
 379 }
 380
 381 // If no special modes are in effect and the form of the input value
 382 // that's present in the input stream is acceptable to the decimal->binary
 383 // converter without modification, this fast path for real input
 384 // saves time by avoiding memory copies and reformatting of the exponent.
 385 template <int PRECISION>
 386 static bool TryFastPathRealInput(
 387     IoStatementState &io, const DataEdit &edit, void *n) {
 388   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
 389     return false;
 390   }
 391   if (edit.modes.scale != 0) {
 392     return false;
 393   }
 394   const ConnectionState &connection{io.GetConnectionState()};
 395   if (connection.internalIoCharKind > 1) {
 396     return false; // reading non-default character
 397   }
 398   const char *str{nullptr};
 399   std::size_t got{io.GetNextInputBytes(str)};
 400   if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
 401     return false; // could not access reliably-terminated input stream
 402   }
 403   const char *p{str};
 404   std::int64_t maxConsume{
 405       std::min<std::int64_t>(got, edit.width.value_or(got))};
 406   const char *limit{str + maxConsume};
 407   decimal::ConversionToBinaryResult<PRECISION> converted{
 408       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
 409   if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
 410     return false;
 411   }
 412   if (edit.digits.value_or(0) != 0) {
 413     // Edit descriptor is Fw.d (or other) with d != 0, which
 414     // implies scaling
 415     const char *q{str};
 416     for (; q < limit; ++q) {
 417       if (*q == '.' || *q == 'n' || *q == 'N') {
 418         break;
 419       }
 420     }
 421     if (q == limit) {
 422       // No explicit decimal point, and not NaN/Inf.
 423       return false;
 424     }
 425   }
 426   for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 427   }
 428   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 429     // Need to consume a trailing ')' and any white space after
 430     if (p >= limit || *p != ')') {
 431       return false;
 432     }
 433     for (++p; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 434     }
 435   }
 436   if (edit.width && p < str + *edit.width) {
 437     return false; // unconverted characters remain in fixed width field
 438   }
 439   // Success on the fast path!
 440   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
 441       converted.binary;
 442   io.HandleRelativePosition(p - str);
 443   // Set FP exception flags
 444   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 445     RaiseFPExceptions(converted.flags);
 446   }
 447   return true;
 448 }
 449
 450 template <int KIND>
 451 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
 452   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
 453   if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) {
 454     return true;
 455   }
 456   // Fast path wasn't available or didn't work; go the more general route
 457   static constexpr int maxDigits{
 458       common::MaxDecimalConversionDigits(binaryPrecision)};
 459   static constexpr int bufferSize{maxDigits + 18};
 460   char buffer[bufferSize];
 461   int exponent{0};
 462   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
 463   if (got >= maxDigits + 2) {
 464     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
 465     return false;
 466   }
 467   if (got == 0) {
 468     io.GetIoErrorHandler().SignalError(IostatBadRealInput);
 469     return false;
 470   }
 471   bool hadExtra{got > maxDigits};
 472   if (exponent != 0) {
 473     buffer[got++] = 'e';
 474     if (exponent < 0) {
 475       buffer[got++] = '-';
 476       exponent = -exponent;
 477     }
 478     if (exponent > 9999) {
 479       exponent = 9999; // will convert to +/-Inf
 480     }
 481     if (exponent > 999) {
 482       int dig{exponent / 1000};
 483       buffer[got++] = '0' + dig;
 484       int rest{exponent - 1000 * dig};
 485       dig = rest / 100;
 486       buffer[got++] = '0' + dig;
 487       rest -= 100 * dig;
 488       dig = rest / 10;
 489       buffer[got++] = '0' + dig;
 490       buffer[got++] = '0' + (rest - 10 * dig);
 491     } else if (exponent > 99) {
 492       int dig{exponent / 100};
 493       buffer[got++] = '0' + dig;
 494       int rest{exponent - 100 * dig};
 495       dig = rest / 10;
 496       buffer[got++] = '0' + dig;
 497       buffer[got++] = '0' + (rest - 10 * dig);
 498     } else if (exponent > 9) {
 499       int dig{exponent / 10};
 500       buffer[got++] = '0' + dig;
 501       buffer[got++] = '0' + (exponent - 10 * dig);
 502     } else {
 503       buffer[got++] = '0' + exponent;
 504     }
 505   }
 506   buffer[got] = '\0';
 507   const char *p{buffer};
 508   decimal::ConversionToBinaryResult<binaryPrecision> converted{
 509       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
 510   if (hadExtra) {
 511     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
 512         converted.flags | decimal::Inexact);
 513   }
 514   if (*p) { // unprocessed junk after value
 515     io.GetIoErrorHandler().SignalError(IostatBadRealInput);
 516     return false;
 517   }
 518   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
 519       converted.binary;
 520   // Set FP exception flags
 521   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 522     if (converted.flags & decimal::ConversionResultFlags::Overflow) {
 523       io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
 524       return false;
 525     }
 526     RaiseFPExceptions(converted.flags);
 527   }
 528   return true;
 529 }
 530
 531 template <int KIND>
 532 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
 533   switch (edit.descriptor) {
 534   case DataEdit::ListDirected:
 535     if (IsNamelistNameOrSlash(io)) {
 536       return false;
 537     }
 538     return EditCommonRealInput<KIND>(io, edit, n);
 539   case DataEdit::ListDirectedRealPart:
 540   case DataEdit::ListDirectedImaginaryPart:
 541   case 'F':
 542   case 'E': // incl. EN, ES, & EX
 543   case 'D':
 544   case 'G':
 545     return EditCommonRealInput<KIND>(io, edit, n);
 546   case 'B':
 547     return EditBOZInput<1>(io, edit, n,
 548         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 549   case 'O':
 550     return EditBOZInput<3>(io, edit, n,
 551         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 552   case 'Z':
 553     return EditBOZInput<4>(io, edit, n,
 554         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 555   case 'A': // legacy extension
 556     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
 557   default:
 558     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 559         "Data edit descriptor '%c' may not be used for REAL input",
 560         edit.descriptor);
 561     return false;
 562   }
 563 }
 564
 565 // 13.7.3 in Fortran 2018
 566 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
 567   switch (edit.descriptor) {
 568   case DataEdit::ListDirected:
 569     if (IsNamelistNameOrSlash(io)) {
 570       return false;
 571     }
 572     break;
 573   case 'L':
 574   case 'G':
 575     break;
 576   default:
 577     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 578         "Data edit descriptor '%c' may not be used for LOGICAL input",
 579         edit.descriptor);
 580     return false;
 581   }
 582   std::optional<int> remaining{io.CueUpInput(edit)};
 583   std::optional<char32_t> next{io.NextInField(remaining, edit)};
 584   if (next && *next == '.') { // skip optional period
 585     next = io.NextInField(remaining, edit);
 586   }
 587   if (!next) {
 588     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
 589     return false;
 590   }
 591   switch (*next) {
 592   case 'T':
 593   case 't':
 594     x = true;
 595     break;
 596   case 'F':
 597   case 'f':
 598     x = false;
 599     break;
 600   default:
 601     io.GetIoErrorHandler().SignalError(
 602         "Bad character '%lc' in LOGICAL input field", *next);
 603     return false;
 604   }
 605   if (remaining) { // ignore the rest of the field
 606     io.HandleRelativePosition(*remaining);
 607   } else if (edit.descriptor == DataEdit::ListDirected) {
 608     while (io.NextInField(remaining, edit)) { // discard rest of field
 609     }
 610   }
 611   return true;
 612 }
 613
 614 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
 615 template <typename CHAR>
 616 static bool EditDelimitedCharacterInput(
 617     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
 618   bool result{true};
 619   while (true) {
 620     std::size_t byteCount{0};
 621     auto ch{io.GetCurrentChar(byteCount)};
 622     if (!ch) {
 623       if (io.AdvanceRecord()) {
 624         continue;
 625       } else {
 626         result = false; // EOF in character value
 627         break;
 628       }
 629     }
 630     io.HandleRelativePosition(byteCount);
 631     if (*ch == delimiter) {
 632       auto next{io.GetCurrentChar(byteCount)};
 633       if (next && *next == delimiter) {
 634         // Repeated delimiter: use as character value
 635         io.HandleRelativePosition(byteCount);
 636       } else {
 637         break; // closing delimiter
 638       }
 639     }
 640     if (length > 0) {
 641       *x++ = *ch;
 642       --length;
 643     }
 644   }
 645   std::fill_n(x, length, ' ');
 646   return result;
 647 }
 648
 649 template <typename CHAR>
 650 static bool EditListDirectedCharacterInput(
 651     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
 652   std::size_t byteCount{0};
 653   auto ch{io.GetCurrentChar(byteCount)};
 654   if (ch && (*ch == '\'' || *ch == '"')) {
 655     io.HandleRelativePosition(byteCount);
 656     return EditDelimitedCharacterInput(io, x, length, *ch);
 657   }
 658   if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
 659     return false;
 660   }
 661   // Undelimited list-directed character input: stop at a value separator
 662   // or the end of the current record.  Subtlety: the "remaining" count
 663   // here is a dummy that's used to avoid the interpretation of separators
 664   // in NextInField.
 665   std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
 666   while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
 667     bool isSep{false};
 668     switch (*next) {
 669     case ' ':
 670     case '\t':
 671     case '/':
 672       isSep = true;
 673       break;
 674     case ',':
 675       isSep = !(edit.modes.editingFlags & decimalComma);
 676       break;
 677     case ';':
 678       isSep = !!(edit.modes.editingFlags & decimalComma);
 679       break;
 680     default:
 681       break;
 682     }
 683     if (isSep) {
 684       remaining = 0;
 685     } else {
 686       *x++ = *next;
 687       remaining = --length > 0 ? maxUTF8Bytes : 0;
 688     }
 689   }
 690   std::fill_n(x, length, ' ');
 691   return true;
 692 }
 693
 694 template <typename CHAR>
 695 bool EditCharacterInput(
 696     IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
 697   switch (edit.descriptor) {
 698   case DataEdit::ListDirected:
 699     return EditListDirectedCharacterInput(io, x, length, edit);
 700   case 'A':
 701   case 'G':
 702     break;
 703   case 'B':
 704     return EditBOZInput<1>(io, edit, x, length * sizeof *x);
 705   case 'O':
 706     return EditBOZInput<3>(io, edit, x, length * sizeof *x);
 707   case 'Z':
 708     return EditBOZInput<4>(io, edit, x, length * sizeof *x);
 709   default:
 710     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 711         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
 712         edit.descriptor);
 713     return false;
 714   }
 715   const ConnectionState &connection{io.GetConnectionState()};
 716   std::size_t remaining{length};
 717   if (edit.width && *edit.width > 0) {
 718     remaining = *edit.width;
 719   }
 720   // When the field is wider than the variable, we drop the leading
 721   // characters.  When the variable is wider than the field, there can be
 722   // trailing padding.
 723   const char *input{nullptr};
 724   std::size_t ready{0};
 725   // Skip leading bytes.
 726   // These bytes don't count towards INQUIRE(IOLENGTH=).
 727   std::size_t skip{remaining > length ? remaining - length : 0};
 728   // Transfer payload bytes; these do count.
 729   while (remaining > 0) {
 730     if (ready == 0) {
 731       ready = io.GetNextInputBytes(input);
 732       if (ready == 0) {
 733         if (io.CheckForEndOfRecord()) {
 734           std::fill_n(x, length, ' '); // PAD='YES'
 735         }
 736         return !io.GetIoErrorHandler().InError();
 737       }
 738     }
 739     std::size_t chunk;
 740     bool skipping{skip > 0};
 741     if (connection.isUTF8) {
 742       chunk = MeasureUTF8Bytes(*input);
 743       if (skipping) {
 744         --skip;
 745       } else if (auto ucs{DecodeUTF8(input)}) {
 746         *x++ = *ucs;
 747         --length;
 748       } else if (chunk == 0) {
 749         // error recovery: skip bad encoding
 750         chunk = 1;
 751       }
 752       --remaining;
 753     } else if (connection.internalIoCharKind > 1) {
 754       // Reading from non-default character internal unit
 755       chunk = connection.internalIoCharKind;
 756       if (skipping) {
 757         --skip;
 758       } else {
 759         char32_t buffer{0};
 760         std::memcpy(&buffer, input, chunk);
 761         *x++ = buffer;
 762         --length;
 763       }
 764       --remaining;
 765     } else if constexpr (sizeof *x > 1) {
 766       // Read single byte with expansion into multi-byte CHARACTER
 767       chunk = 1;
 768       if (skipping) {
 769         --skip;
 770       } else {
 771         *x++ = static_cast<unsigned char>(*input);
 772         --length;
 773       }
 774       --remaining;
 775     } else { // single bytes -> default CHARACTER
 776       if (skipping) {
 777         chunk = std::min<std::size_t>(skip, ready);
 778         skip -= chunk;
 779       } else {
 780         chunk = std::min<std::size_t>(remaining, ready);
 781         std::memcpy(x, input, chunk);
 782         x += chunk;
 783         length -= chunk;
 784       }
 785       remaining -= chunk;
 786     }
 787     input += chunk;
 788     if (!skipping) {
 789       io.GotChar(chunk);
 790     }
 791     io.HandleRelativePosition(chunk);
 792     ready -= chunk;
 793   }
 794   // Pad the remainder of the input variable, if any.
 795   std::fill_n(x, length, ' ');
 796   return true;
 797 }
 798
 799 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
 800 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
 801 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
 802 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
 803 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
 804 // TODO: double/double
 805 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
 806
 807 template bool EditCharacterInput(
 808     IoStatementState &, const DataEdit &, char *, std::size_t);
 809 template bool EditCharacterInput(
 810     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
 811 template bool EditCharacterInput(
 812     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
 813
 814 } // namespace Fortran::runtime::io