flang/runtime/edit-input.cpp

   1 //===-- runtime/edit-input.cpp --------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "edit-input.h"
  10 #include "namelist.h"
  11 #include "utf.h"
  12 #include "flang/Common/optional.h"
  13 #include "flang/Common/real.h"
  14 #include "flang/Common/uint128.h"
  15 #include "flang/Runtime/freestanding-tools.h"
  16 #include <algorithm>
  17 #include <cfenv>
  18
  19 namespace Fortran::runtime::io {
  20 RT_OFFLOAD_API_GROUP_BEGIN
  21
  22 // Checks that a list-directed input value has been entirely consumed and
  23 // doesn't contain unparsed characters before the next value separator.
  24 static inline RT_API_ATTRS bool IsCharValueSeparator(
  25     const DataEdit &edit, char32_t ch) {
  26   char32_t comma{
  27       edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
  28   return ch == ' ' || ch == '\t' || ch == comma || ch == '/' ||
  29       (edit.IsNamelist() && (ch == '&' || ch == '$'));
  30 }
  31
  32 static RT_API_ATTRS bool CheckCompleteListDirectedField(
  33     IoStatementState &io, const DataEdit &edit) {
  34   if (edit.IsListDirected()) {
  35     std::size_t byteCount;
  36     if (auto ch{io.GetCurrentChar(byteCount)}) {
  37       if (IsCharValueSeparator(edit, *ch)) {
  38         return true;
  39       } else {
  40         const auto &connection{io.GetConnectionState()};
  41         io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
  42             "invalid character (0x%x) after list-directed input value, "
  43             "at column %d in record %d",
  44             static_cast<unsigned>(*ch),
  45             static_cast<int>(connection.positionInRecord + 1),
  46             static_cast<int>(connection.currentRecordNumber));
  47         return false;
  48       }
  49     } else {
  50       return true; // end of record: ok
  51     }
  52   } else {
  53     return true;
  54   }
  55 }
  56
  57 static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) {
  58   return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','};
  59 }
  60
  61 template <int LOG2_BASE>
  62 static RT_API_ATTRS bool EditBOZInput(
  63     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
  64   // Skip leading white space & zeroes
  65   Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
  66   auto start{io.GetConnectionState().positionInRecord};
  67   Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
  68   if (next.value_or('?') == '0') {
  69     do {
  70       start = io.GetConnectionState().positionInRecord;
  71       next = io.NextInField(remaining, edit);
  72     } while (next && *next == '0');
  73   }
  74   // Count significant digits after any leading white space & zeroes
  75   int digits{0};
  76   int significantBits{0};
  77   const char32_t comma{GetSeparatorChar(edit)};
  78   for (; next; next = io.NextInField(remaining, edit)) {
  79     char32_t ch{*next};
  80     if (ch == ' ' || ch == '\t') {
  81       if (edit.modes.editingFlags & blankZero) {
  82         ch = '0'; // BZ mode - treat blank as if it were zero
  83       } else {
  84         continue;
  85       }
  86     }
  87     if (ch >= '0' && ch <= '1') {
  88     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
  89     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
  90     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
  91     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
  92     } else if (ch == comma) {
  93       break; // end non-list-directed field early
  94     } else {
  95       io.GetIoErrorHandler().SignalError(
  96           "Bad character '%lc' in B/O/Z input field", ch);
  97       return false;
  98     }
  99     if (digits++ == 0) {
 100       significantBits = 4;
 101       if (ch >= '0' && ch <= '1') {
 102         significantBits = 1;
 103       } else if (ch >= '2' && ch <= '3') {
 104         significantBits = 2;
 105       } else if (ch >= '4' && ch <= '7') {
 106         significantBits = 3;
 107       } else {
 108         significantBits = 4;
 109       }
 110     } else {
 111       significantBits += LOG2_BASE;
 112     }
 113   }
 114   auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
 115   if (significantBytes > bytes) {
 116     io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
 117         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
 118     return false;
 119   }
 120   // Reset to start of significant digits
 121   io.HandleAbsolutePosition(start);
 122   remaining.reset();
 123   // Make a second pass now that the digit count is known
 124   std::memset(n, 0, bytes);
 125   int increment{isHostLittleEndian ? -1 : 1};
 126   auto *data{reinterpret_cast<unsigned char *>(n) +
 127       (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)};
 128   int shift{((digits - 1) * LOG2_BASE) & 7};
 129   while (digits > 0) {
 130     char32_t ch{*io.NextInField(remaining, edit)};
 131     int digit{0};
 132     if (ch == ' ' || ch == '\t') {
 133       if (edit.modes.editingFlags & blankZero) {
 134         ch = '0'; // BZ mode - treat blank as if it were zero
 135       } else {
 136         continue;
 137       }
 138     }
 139     --digits;
 140     if (ch >= '0' && ch <= '9') {
 141       digit = ch - '0';
 142     } else if (ch >= 'A' && ch <= 'F') {
 143       digit = ch + 10 - 'A';
 144     } else if (ch >= 'a' && ch <= 'f') {
 145       digit = ch + 10 - 'a';
 146     } else {
 147       continue;
 148     }
 149     if (shift < 0) {
 150       if (shift + LOG2_BASE > 0) { // misaligned octal
 151         *data |= digit >> -shift;
 152       }
 153       shift += 8;
 154       data += increment;
 155     }
 156     *data |= digit << shift;
 157     shift -= LOG2_BASE;
 158   }
 159   return CheckCompleteListDirectedField(io, edit);
 160 }
 161
 162 static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
 163   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
 164 }
 165
 166 // Prepares input from a field, and returns the sign, if any, else '\0'.
 167 static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
 168     const DataEdit &edit, Fortran::common::optional<char32_t> &next,
 169     Fortran::common::optional<int> &remaining) {
 170   remaining = io.CueUpInput(edit);
 171   next = io.NextInField(remaining, edit);
 172   char sign{'\0'};
 173   if (next) {
 174     if (*next == '-' || *next == '+') {
 175       sign = *next;
 176       if (!edit.IsListDirected()) {
 177         io.SkipSpaces(remaining);
 178       }
 179       next = io.NextInField(remaining, edit);
 180     }
 181   }
 182   return sign;
 183 }
 184
 185 RT_API_ATTRS bool EditIntegerInput(
 186     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
 187   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
 188   switch (edit.descriptor) {
 189   case DataEdit::ListDirected:
 190     if (IsNamelistNameOrSlash(io)) {
 191       return false;
 192     }
 193     break;
 194   case 'G':
 195   case 'I':
 196     break;
 197   case 'B':
 198     return EditBOZInput<1>(io, edit, n, kind);
 199   case 'O':
 200     return EditBOZInput<3>(io, edit, n, kind);
 201   case 'Z':
 202     return EditBOZInput<4>(io, edit, n, kind);
 203   case 'A': // legacy extension
 204     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
 205   default:
 206     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 207         "Data edit descriptor '%c' may not be used with an INTEGER data item",
 208         edit.descriptor);
 209     return false;
 210   }
 211   Fortran::common::optional<int> remaining;
 212   Fortran::common::optional<char32_t> next;
 213   char sign{ScanNumericPrefix(io, edit, next, remaining)};
 214   common::UnsignedInt128 value{0};
 215   bool any{!!sign};
 216   bool overflow{false};
 217   const char32_t comma{GetSeparatorChar(edit)};
 218   for (; next; next = io.NextInField(remaining, edit)) {
 219     char32_t ch{*next};
 220     if (ch == ' ' || ch == '\t') {
 221       if (edit.modes.editingFlags & blankZero) {
 222         ch = '0'; // BZ mode - treat blank as if it were zero
 223       } else {
 224         continue;
 225       }
 226     }
 227     int digit{0};
 228     if (ch >= '0' && ch <= '9') {
 229       digit = ch - '0';
 230     } else if (ch == comma) {
 231       break; // end non-list-directed field early
 232     } else {
 233       if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) {
 234         // Ignore any fractional part that might appear in NAMELIST integer
 235         // input, like a few other Fortran compilers do.
 236         // TODO: also process exponents?  Some compilers do, but they obviously
 237         // can't just be ignored.
 238         while ((next = io.NextInField(remaining, edit))) {
 239           if (*next < '0' || *next > '9') {
 240             break;
 241           }
 242         }
 243         if (!next || *next == comma) {
 244           break;
 245         }
 246       }
 247       io.GetIoErrorHandler().SignalError(
 248           "Bad character '%lc' in INTEGER input field", ch);
 249       return false;
 250     }
 251     static constexpr auto maxu128{~common::UnsignedInt128{0}};
 252     static constexpr auto maxu128OverTen{maxu128 / 10};
 253     static constexpr int maxLastDigit{
 254         static_cast<int>(maxu128 - (maxu128OverTen * 10))};
 255     overflow |= value >= maxu128OverTen &&
 256         (value > maxu128OverTen || digit > maxLastDigit);
 257     value *= 10;
 258     value += digit;
 259     any = true;
 260   }
 261   if (!any && !remaining) {
 262     io.GetIoErrorHandler().SignalError(
 263         "Integer value absent from NAMELIST or list-directed input");
 264     return false;
 265   }
 266   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
 267   overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
 268   if (overflow) {
 269     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
 270         "Decimal input overflows INTEGER(%d) variable", kind);
 271     return false;
 272   }
 273   if (sign == '-') {
 274     value = -value;
 275   }
 276   if (any || !io.GetIoErrorHandler().InError()) {
 277     // The value is stored in the lower order bits on big endian platform.
 278     // When memcpy, shift the value to the higher order bit.
 279     auto shft{static_cast<int>(sizeof(value.low())) - kind};
 280     // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
 281     if (!isHostLittleEndian && shft >= 0) {
 282       auto l{value.low() << (8 * shft)};
 283       std::memcpy(n, &l, kind);
 284     } else {
 285       std::memcpy(n, &value, kind); // a blank field means zero
 286     }
 287     return true;
 288   } else {
 289     return false;
 290   }
 291 }
 292
 293 // Parses a REAL input number from the input source as a normalized
 294 // fraction into a supplied buffer -- there's an optional '-', a
 295 // decimal point when the input is not hexadecimal, and at least one
 296 // digit.  Replaces blanks with zeroes where appropriate.
 297 struct ScannedRealInput {
 298   // Number of characters that (should) have been written to the
 299   // buffer -- this can be larger than the buffer size, which
 300   // indicates buffer overflow.  Zero indicates an error.
 301   int got{0};
 302   int exponent{0}; // adjusted as necessary; binary if isHexadecimal
 303   bool isHexadecimal{false}; // 0X...
 304 };
 305 static RT_API_ATTRS ScannedRealInput ScanRealInput(
 306     char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
 307   Fortran::common::optional<int> remaining;
 308   Fortran::common::optional<char32_t> next;
 309   int got{0};
 310   Fortran::common::optional<int> radixPointOffset;
 311   // The following lambda definition violates the conding style,
 312   // but cuda-11.8 nvcc hits an internal error with the brace initialization.
 313   auto Put = [&](char ch) -> void {
 314     if (got < bufferSize) {
 315       buffer[got] = ch;
 316     }
 317     ++got;
 318   };
 319   char sign{ScanNumericPrefix(io, edit, next, remaining)};
 320   if (sign == '-') {
 321     Put('-');
 322   }
 323   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
 324   int exponent{0};
 325   if (!next || (!bzMode && *next == ' ') ||
 326       (!(edit.modes.editingFlags & decimalComma) && *next == ',')) {
 327     if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
 328       // An empty/blank field means zero when not list-directed.
 329       // A fixed-width field containing only a sign is also zero;
 330       // this behavior isn't standard-conforming in F'2023 but it is
 331       // required to pass FCVS.
 332       Put('0');
 333     }
 334     return {got, exponent, false};
 335   }
 336   char32_t radixPointChar{GetRadixPointChar(edit)};
 337   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
 338   bool isHexadecimal{false};
 339   if (first == 'N' || first == 'I') {
 340     // NaN or infinity - convert to upper case
 341     // Subtle: a blank field of digits could be followed by 'E' or 'D',
 342     for (; next &&
 343          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
 344          next = io.NextInField(remaining, edit)) {
 345       if (*next >= 'a' && *next <= 'z') {
 346         Put(*next - 'a' + 'A');
 347       } else {
 348         Put(*next);
 349       }
 350     }
 351     if (next && *next == '(') { // NaN(...)
 352       Put('(');
 353       int depth{1};
 354       while (true) {
 355         next = io.NextInField(remaining, edit);
 356         if (depth == 0) {
 357           break;
 358         } else if (!next) {
 359           return {}; // error
 360         } else if (*next == '(') {
 361           ++depth;
 362         } else if (*next == ')') {
 363           --depth;
 364         }
 365         Put(*next);
 366       }
 367     }
 368   } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
 369       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
 370       first == 'D' || first == 'Q') {
 371     if (first == '0') {
 372       next = io.NextInField(remaining, edit);
 373       if (next && (*next == 'x' || *next == 'X')) { // 0X...
 374         isHexadecimal = true;
 375         next = io.NextInField(remaining, edit);
 376       } else {
 377         Put('0');
 378       }
 379     }
 380     // input field is normalized to a fraction
 381     if (!isHexadecimal) {
 382       Put('.');
 383     }
 384     auto start{got};
 385     for (; next; next = io.NextInField(remaining, edit)) {
 386       char32_t ch{*next};
 387       if (ch == ' ' || ch == '\t') {
 388         if (isHexadecimal) {
 389           return {}; // error
 390         } else if (bzMode) {
 391           ch = '0'; // BZ mode - treat blank as if it were zero
 392         } else {
 393           continue; // ignore blank in fixed field
 394         }
 395       }
 396       if (ch == '0' && got == start && !radixPointOffset) {
 397         // omit leading zeroes before the radix point
 398       } else if (ch >= '0' && ch <= '9') {
 399         Put(ch);
 400       } else if (ch == radixPointChar && !radixPointOffset) {
 401         // The radix point character is *not* copied to the buffer.
 402         radixPointOffset = got - start; // # of digits before the radix point
 403       } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
 404         Put(ch);
 405       } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
 406         Put(ch - 'a' + 'A'); // normalize to capitals
 407       } else {
 408         break;
 409       }
 410     }
 411     if (got == start) {
 412       // Nothing but zeroes and maybe a radix point.  F'2018 requires
 413       // at least one digit, but F'77 did not, and a bare "." shows up in
 414       // the FCVS suite.
 415       Put('0'); // emit at least one digit
 416     }
 417     // In list-directed input, a bad exponent is not consumed.
 418     auto nextBeforeExponent{next};
 419     auto startExponent{io.GetConnectionState().positionInRecord};
 420     bool hasGoodExponent{false};
 421     if (next) {
 422       if (isHexadecimal) {
 423         if (*next == 'p' || *next == 'P') {
 424           next = io.NextInField(remaining, edit);
 425         } else {
 426           // The binary exponent is not optional in the standard.
 427           return {}; // error
 428         }
 429       } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
 430           *next == 'q' || *next == 'Q') {
 431         // Optional exponent letter.  Blanks are allowed between the
 432         // optional exponent letter and the exponent value.
 433         io.SkipSpaces(remaining);
 434         next = io.NextInField(remaining, edit);
 435       }
 436     }
 437     if (next &&
 438         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
 439             *next == ' ' || *next == '\t')) {
 440       bool negExpo{*next == '-'};
 441       if (negExpo || *next == '+') {
 442         next = io.NextInField(remaining, edit);
 443       }
 444       for (; next; next = io.NextInField(remaining, edit)) {
 445         if (*next >= '0' && *next <= '9') {
 446           hasGoodExponent = true;
 447           if (exponent < 10000) {
 448             exponent = 10 * exponent + *next - '0';
 449           }
 450         } else if (*next == ' ' || *next == '\t') {
 451           if (isHexadecimal) {
 452             break;
 453           } else if (bzMode) {
 454             hasGoodExponent = true;
 455             exponent = 10 * exponent;
 456           }
 457         } else {
 458           break;
 459         }
 460       }
 461       if (negExpo) {
 462         exponent = -exponent;
 463       }
 464     }
 465     if (!hasGoodExponent) {
 466       if (isHexadecimal) {
 467         return {}; // error
 468       }
 469       // There isn't a good exponent; do not consume it.
 470       next = nextBeforeExponent;
 471       io.HandleAbsolutePosition(startExponent);
 472       // The default exponent is -kP, but the scale factor doesn't affect
 473       // an explicit exponent.
 474       exponent = -edit.modes.scale;
 475     }
 476     // Adjust exponent by number of digits before the radix point.
 477     if (isHexadecimal) {
 478       // Exponents for hexadecimal input are binary.
 479       exponent += radixPointOffset.value_or(got - start) * 4;
 480     } else if (radixPointOffset) {
 481       exponent += *radixPointOffset;
 482     } else {
 483       // When no redix point (or comma) appears in the value, the 'd'
 484       // part of the edit descriptor must be interpreted as the number of
 485       // digits in the value to be interpreted as being to the *right* of
 486       // the assumed radix point (13.7.2.3.2)
 487       exponent += got - start - edit.digits.value_or(0);
 488     }
 489   }
 490   // Consume the trailing ')' of a list-directed or NAMELIST complex
 491   // input value.
 492   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 493     if (next && (*next == ' ' || *next == '\t')) {
 494       io.SkipSpaces(remaining);
 495       next = io.NextInField(remaining, edit);
 496     }
 497     if (!next) { // NextInField fails on separators like ')'
 498       std::size_t byteCount{0};
 499       next = io.GetCurrentChar(byteCount);
 500       if (next && *next == ')') {
 501         io.HandleRelativePosition(byteCount);
 502       }
 503     }
 504   } else if (remaining) {
 505     while (next && (*next == ' ' || *next == '\t')) {
 506       next = io.NextInField(remaining, edit);
 507     }
 508     if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) {
 509       return {}; // error: unused nonblank character in fixed-width field
 510     }
 511   }
 512   return {got, exponent, isHexadecimal};
 513 }
 514
 515 static RT_API_ATTRS void RaiseFPExceptions(
 516     decimal::ConversionResultFlags flags) {
 517 #undef RAISE
 518 #if defined(RT_DEVICE_COMPILATION)
 519   Terminator terminator(__FILE__, __LINE__);
 520 #define RAISE(e) \
 521   terminator.Crash( \
 522       "not implemented yet: raising FP exception in device code: %s", #e);
 523 #else // !defined(RT_DEVICE_COMPILATION)
 524 #ifdef feraisexcept // a macro in some environments; omit std::
 525 #define RAISE feraiseexcept
 526 #else
 527 #define RAISE std::feraiseexcept
 528 #endif
 529 #endif // !defined(RT_DEVICE_COMPILATION)
 530
 531 // Some environment (e.g. emscripten, musl) don't define FE_OVERFLOW as allowed
 532 // by c99 (but not c++11) :-/
 533 #if defined(FE_OVERFLOW) || defined(RT_DEVICE_COMPILATION)
 534   if (flags & decimal::ConversionResultFlags::Overflow) {
 535     RAISE(FE_OVERFLOW);
 536   }
 537 #endif
 538 #if defined(FE_UNDERFLOW) || defined(RT_DEVICE_COMPILATION)
 539   if (flags & decimal::ConversionResultFlags::Underflow) {
 540     RAISE(FE_UNDERFLOW);
 541   }
 542 #endif
 543 #if defined(FE_INEXACT) || defined(RT_DEVICE_COMPILATION)
 544   if (flags & decimal::ConversionResultFlags::Inexact) {
 545     RAISE(FE_INEXACT);
 546   }
 547 #endif
 548 #if defined(FE_INVALID) || defined(RT_DEVICE_COMPILATION)
 549   if (flags & decimal::ConversionResultFlags::Invalid) {
 550     RAISE(FE_INVALID);
 551   }
 552 #endif
 553 #undef RAISE
 554 }
 555
 556 // If no special modes are in effect and the form of the input value
 557 // that's present in the input stream is acceptable to the decimal->binary
 558 // converter without modification, this fast path for real input
 559 // saves time by avoiding memory copies and reformatting of the exponent.
 560 template <int PRECISION>
 561 static RT_API_ATTRS bool TryFastPathRealDecimalInput(
 562     IoStatementState &io, const DataEdit &edit, void *n) {
 563   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
 564     return false;
 565   }
 566   if (edit.modes.scale != 0) {
 567     return false;
 568   }
 569   const ConnectionState &connection{io.GetConnectionState()};
 570   if (connection.internalIoCharKind > 1) {
 571     return false; // reading non-default character
 572   }
 573   const char *str{nullptr};
 574   std::size_t got{io.GetNextInputBytes(str)};
 575   if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
 576     return false; // could not access reliably-terminated input stream
 577   }
 578   const char *p{str};
 579   std::int64_t maxConsume{
 580       std::min<std::int64_t>(got, edit.width.value_or(got))};
 581   const char *limit{str + maxConsume};
 582   decimal::ConversionToBinaryResult<PRECISION> converted{
 583       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
 584   if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
 585     return false;
 586   }
 587   if (edit.digits.value_or(0) != 0) {
 588     // Edit descriptor is Fw.d (or other) with d != 0, which
 589     // implies scaling
 590     const char *q{str};
 591     for (; q < limit; ++q) {
 592       if (*q == '.' || *q == 'n' || *q == 'N') {
 593         break;
 594       }
 595     }
 596     if (q == limit) {
 597       // No explicit decimal point, and not NaN/Inf.
 598       return false;
 599     }
 600   }
 601   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
 602     // Need to consume a trailing ')', possibly with leading spaces
 603     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 604     }
 605     if (p < limit && *p == ')') {
 606       ++p;
 607     } else {
 608       return false;
 609     }
 610   } else if (edit.IsListDirected()) {
 611     if (p < limit && !IsCharValueSeparator(edit, *p)) {
 612       return false;
 613     }
 614   } else {
 615     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
 616     }
 617     if (edit.width && p < str + *edit.width) {
 618       return false; // unconverted characters remain in fixed width field
 619     }
 620   }
 621   // Success on the fast path!
 622   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
 623       converted.binary;
 624   io.HandleRelativePosition(p - str);
 625   // Set FP exception flags
 626   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 627     RaiseFPExceptions(converted.flags);
 628   }
 629   return true;
 630 }
 631
 632 template <int binaryPrecision>
 633 RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
 634 ConvertHexadecimal(
 635     const char *&p, enum decimal::FortranRounding rounding, int expo) {
 636   using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
 637   using RawType = typename RealType::RawType;
 638   bool isNegative{*p == '-'};
 639   constexpr RawType one{1};
 640   RawType signBit{0};
 641   if (isNegative) {
 642     ++p;
 643     signBit = one << (RealType::bits - 1);
 644   }
 645   RawType fraction{0};
 646   // Adjust the incoming binary P+/- exponent to shift the radix point
 647   // to below the LSB and add in the bias.
 648   expo += binaryPrecision - 1 + RealType::exponentBias;
 649   // Input the fraction.
 650   int roundingBit{0};
 651   int guardBit{0};
 652   for (; *p; ++p) {
 653     fraction <<= 4;
 654     expo -= 4;
 655     if (*p >= '0' && *p <= '9') {
 656       fraction |= *p - '0';
 657     } else if (*p >= 'A' && *p <= 'F') {
 658       fraction |= *p - 'A' + 10; // data were normalized to capitals
 659     } else {
 660       break;
 661     }
 662     if (fraction >> binaryPrecision) {
 663       while (fraction >> binaryPrecision) {
 664         guardBit |= roundingBit;
 665         roundingBit = (int)fraction & 1;
 666         fraction >>= 1;
 667         ++expo;
 668       }
 669       // Consume excess digits
 670       while (*++p) {
 671         if (*p == '0') {
 672         } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) {
 673           guardBit = 1;
 674         } else {
 675           break;
 676         }
 677       }
 678       break;
 679     }
 680   }
 681   if (fraction) {
 682     // Boost biased expo if too small
 683     while (expo < 1) {
 684       guardBit |= roundingBit;
 685       roundingBit = (int)fraction & 1;
 686       fraction >>= 1;
 687       ++expo;
 688     }
 689     // Normalize
 690     while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
 691       fraction <<= 1;
 692       --expo;
 693       guardBit = roundingBit = 0;
 694     }
 695   }
 696   // Rounding
 697   bool increase{false};
 698   switch (rounding) {
 699   case decimal::RoundNearest: // RN & RP
 700     increase = roundingBit && (guardBit | ((int)fraction & 1));
 701     break;
 702   case decimal::RoundUp: // RU
 703     increase = !isNegative && (roundingBit | guardBit);
 704     break;
 705   case decimal::RoundDown: // RD
 706     increase = isNegative && (roundingBit | guardBit);
 707     break;
 708   case decimal::RoundToZero: // RZ
 709     break;
 710   case decimal::RoundCompatible: // RC
 711     increase = roundingBit != 0;
 712     break;
 713   }
 714   if (increase) {
 715     ++fraction;
 716     if (fraction >> binaryPrecision) {
 717       fraction >>= 1;
 718       ++expo;
 719     }
 720   }
 721   // Package & return result
 722   constexpr RawType significandMask{(one << RealType::significandBits) - 1};
 723   int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
 724   if (!fraction) {
 725     expo = 0;
 726   } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
 727     expo = 0; // subnormal
 728     flags |= decimal::Underflow;
 729   } else if (expo >= RealType::maxExponent) {
 730     if (rounding == decimal::RoundToZero ||
 731         (rounding == decimal::RoundDown && !isNegative) ||
 732         (rounding == decimal::RoundUp && isNegative)) {
 733       expo = RealType::maxExponent - 1; // +/-HUGE()
 734       fraction = significandMask;
 735     } else {
 736       expo = RealType::maxExponent; // +/-Inf
 737       fraction = 0;
 738       flags |= decimal::Overflow;
 739     }
 740   } else {
 741     fraction &= significandMask; // remove explicit normalization unless x87
 742   }
 743   return decimal::ConversionToBinaryResult<binaryPrecision>{
 744       RealType{static_cast<RawType>(signBit |
 745           static_cast<RawType>(expo) << RealType::significandBits | fraction)},
 746       static_cast<decimal::ConversionResultFlags>(flags)};
 747 }
 748
 749 template <int KIND>
 750 RT_API_ATTRS bool EditCommonRealInput(
 751     IoStatementState &io, const DataEdit &edit, void *n) {
 752   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
 753   if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
 754     return CheckCompleteListDirectedField(io, edit);
 755   }
 756   // Fast path wasn't available or didn't work; go the more general route
 757   static constexpr int maxDigits{
 758       common::MaxDecimalConversionDigits(binaryPrecision)};
 759   static constexpr int bufferSize{maxDigits + 18};
 760   char buffer[bufferSize];
 761   auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
 762   int got{scanned.got};
 763   if (got >= maxDigits + 2) {
 764     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
 765     return false;
 766   }
 767   if (got == 0) {
 768     const auto &connection{io.GetConnectionState()};
 769     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
 770         "Bad real input data at column %d of record %d",
 771         static_cast<int>(connection.positionInRecord + 1),
 772         static_cast<int>(connection.currentRecordNumber));
 773     return false;
 774   }
 775   decimal::ConversionToBinaryResult<binaryPrecision> converted;
 776   const char *p{buffer};
 777   if (scanned.isHexadecimal) {
 778     buffer[got] = '\0';
 779     converted = ConvertHexadecimal<binaryPrecision>(
 780         p, edit.modes.round, scanned.exponent);
 781   } else {
 782     bool hadExtra{got > maxDigits};
 783     int exponent{scanned.exponent};
 784     if (exponent != 0) {
 785       buffer[got++] = 'e';
 786       if (exponent < 0) {
 787         buffer[got++] = '-';
 788         exponent = -exponent;
 789       }
 790       if (exponent > 9999) {
 791         exponent = 9999; // will convert to +/-Inf
 792       }
 793       if (exponent > 999) {
 794         int dig{exponent / 1000};
 795         buffer[got++] = '0' + dig;
 796         int rest{exponent - 1000 * dig};
 797         dig = rest / 100;
 798         buffer[got++] = '0' + dig;
 799         rest -= 100 * dig;
 800         dig = rest / 10;
 801         buffer[got++] = '0' + dig;
 802         buffer[got++] = '0' + (rest - 10 * dig);
 803       } else if (exponent > 99) {
 804         int dig{exponent / 100};
 805         buffer[got++] = '0' + dig;
 806         int rest{exponent - 100 * dig};
 807         dig = rest / 10;
 808         buffer[got++] = '0' + dig;
 809         buffer[got++] = '0' + (rest - 10 * dig);
 810       } else if (exponent > 9) {
 811         int dig{exponent / 10};
 812         buffer[got++] = '0' + dig;
 813         buffer[got++] = '0' + (exponent - 10 * dig);
 814       } else {
 815         buffer[got++] = '0' + exponent;
 816       }
 817     }
 818     buffer[got] = '\0';
 819     converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
 820     if (hadExtra) {
 821       converted.flags = static_cast<enum decimal::ConversionResultFlags>(
 822           converted.flags | decimal::Inexact);
 823     }
 824   }
 825   if (*p) { // unprocessed junk after value
 826     const auto &connection{io.GetConnectionState()};
 827     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
 828         "Trailing characters after real input data at column %d of record %d",
 829         static_cast<int>(connection.positionInRecord + 1),
 830         static_cast<int>(connection.currentRecordNumber));
 831     return false;
 832   }
 833   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
 834       converted.binary;
 835   // Set FP exception flags
 836   if (converted.flags != decimal::ConversionResultFlags::Exact) {
 837     if (converted.flags & decimal::ConversionResultFlags::Overflow) {
 838       io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
 839       return false;
 840     }
 841     RaiseFPExceptions(converted.flags);
 842   }
 843   return CheckCompleteListDirectedField(io, edit);
 844 }
 845
 846 template <int KIND>
 847 RT_API_ATTRS bool EditRealInput(
 848     IoStatementState &io, const DataEdit &edit, void *n) {
 849   switch (edit.descriptor) {
 850   case DataEdit::ListDirected:
 851     if (IsNamelistNameOrSlash(io)) {
 852       return false;
 853     }
 854     return EditCommonRealInput<KIND>(io, edit, n);
 855   case DataEdit::ListDirectedRealPart:
 856   case DataEdit::ListDirectedImaginaryPart:
 857   case 'F':
 858   case 'E': // incl. EN, ES, & EX
 859   case 'D':
 860   case 'G':
 861     return EditCommonRealInput<KIND>(io, edit, n);
 862   case 'B':
 863     return EditBOZInput<1>(io, edit, n,
 864         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 865   case 'O':
 866     return EditBOZInput<3>(io, edit, n,
 867         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 868   case 'Z':
 869     return EditBOZInput<4>(io, edit, n,
 870         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
 871   case 'A': // legacy extension
 872     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
 873   default:
 874     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 875         "Data edit descriptor '%c' may not be used for REAL input",
 876         edit.descriptor);
 877     return false;
 878   }
 879 }
 880
 881 // 13.7.3 in Fortran 2018
 882 RT_API_ATTRS bool EditLogicalInput(
 883     IoStatementState &io, const DataEdit &edit, bool &x) {
 884   switch (edit.descriptor) {
 885   case DataEdit::ListDirected:
 886     if (IsNamelistNameOrSlash(io)) {
 887       return false;
 888     }
 889     break;
 890   case 'L':
 891   case 'G':
 892     break;
 893   default:
 894     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
 895         "Data edit descriptor '%c' may not be used for LOGICAL input",
 896         edit.descriptor);
 897     return false;
 898   }
 899   Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
 900   Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
 901   if (next && *next == '.') { // skip optional period
 902     next = io.NextInField(remaining, edit);
 903   }
 904   if (!next) {
 905     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
 906     return false;
 907   }
 908   switch (*next) {
 909   case 'T':
 910   case 't':
 911     x = true;
 912     break;
 913   case 'F':
 914   case 'f':
 915     x = false;
 916     break;
 917   default:
 918     io.GetIoErrorHandler().SignalError(
 919         "Bad character '%lc' in LOGICAL input field", *next);
 920     return false;
 921   }
 922   if (remaining) { // ignore the rest of a fixed-width field
 923     io.HandleRelativePosition(*remaining);
 924   } else if (edit.descriptor == DataEdit::ListDirected) {
 925     while (io.NextInField(remaining, edit)) { // discard rest of field
 926     }
 927   }
 928   return CheckCompleteListDirectedField(io, edit);
 929 }
 930
 931 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
 932 template <typename CHAR>
 933 static RT_API_ATTRS bool EditDelimitedCharacterInput(
 934     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
 935   bool result{true};
 936   while (true) {
 937     std::size_t byteCount{0};
 938     auto ch{io.GetCurrentChar(byteCount)};
 939     if (!ch) {
 940       if (io.AdvanceRecord()) {
 941         continue;
 942       } else {
 943         result = false; // EOF in character value
 944         break;
 945       }
 946     }
 947     io.HandleRelativePosition(byteCount);
 948     if (*ch == delimiter) {
 949       auto next{io.GetCurrentChar(byteCount)};
 950       if (next && *next == delimiter) {
 951         // Repeated delimiter: use as character value
 952         io.HandleRelativePosition(byteCount);
 953       } else {
 954         break; // closing delimiter
 955       }
 956     }
 957     if (length > 0) {
 958       *x++ = *ch;
 959       --length;
 960     }
 961   }
 962   Fortran::runtime::fill_n(x, length, ' ');
 963   return result;
 964 }
 965
 966 template <typename CHAR>
 967 static RT_API_ATTRS bool EditListDirectedCharacterInput(
 968     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
 969   std::size_t byteCount{0};
 970   auto ch{io.GetCurrentChar(byteCount)};
 971   if (ch && (*ch == '\'' || *ch == '"')) {
 972     io.HandleRelativePosition(byteCount);
 973     return EditDelimitedCharacterInput(io, x, length, *ch);
 974   }
 975   if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
 976     return false;
 977   }
 978   // Undelimited list-directed character input: stop at a value separator
 979   // or the end of the current record.  Subtlety: the "remaining" count
 980   // here is a dummy that's used to avoid the interpretation of separators
 981   // in NextInField.
 982   Fortran::common::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
 983   while (Fortran::common::optional<char32_t> next{
 984       io.NextInField(remaining, edit)}) {
 985     bool isSep{false};
 986     switch (*next) {
 987     case ' ':
 988     case '\t':
 989     case '/':
 990       isSep = true;
 991       break;
 992     case '&':
 993     case '$':
 994       isSep = edit.IsNamelist();
 995       break;
 996     case ',':
 997       isSep = !(edit.modes.editingFlags & decimalComma);
 998       break;
 999     case ';':
1000       isSep = !!(edit.modes.editingFlags & decimalComma);
1001       break;
1002     default:
1003       break;
1004     }
1005     if (isSep) {
1006       remaining = 0;
1007     } else {
1008       *x++ = *next;
1009       remaining = --length > 0 ? maxUTF8Bytes : 0;
1010     }
1011   }
1012   Fortran::runtime::fill_n(x, length, ' ');
1013   return true;
1014 }
1015
1016 template <typename CHAR>
1017 RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
1018     CHAR *x, std::size_t lengthChars) {
1019   switch (edit.descriptor) {
1020   case DataEdit::ListDirected:
1021     return EditListDirectedCharacterInput(io, x, lengthChars, edit);
1022   case 'A':
1023   case 'G':
1024     break;
1025   case 'B':
1026     return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
1027   case 'O':
1028     return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
1029   case 'Z':
1030     return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
1031   default:
1032     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1033         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
1034         edit.descriptor);
1035     return false;
1036   }
1037   const ConnectionState &connection{io.GetConnectionState()};
1038   std::size_t remainingChars{lengthChars};
1039   // Skip leading characters.
1040   // Their bytes don't count towards INQUIRE(IOLENGTH=).
1041   std::size_t skipChars{0};
1042   if (edit.width && *edit.width > 0) {
1043     remainingChars = *edit.width;
1044     if (remainingChars > lengthChars) {
1045       skipChars = remainingChars - lengthChars;
1046     }
1047   }
1048   // When the field is wider than the variable, we drop the leading
1049   // characters.  When the variable is wider than the field, there can be
1050   // trailing padding or an EOR condition.
1051   const char *input{nullptr};
1052   std::size_t readyBytes{0};
1053   // Transfer payload bytes; these do count.
1054   while (remainingChars > 0) {
1055     if (readyBytes == 0) {
1056       readyBytes = io.GetNextInputBytes(input);
1057       if (readyBytes == 0 ||
1058           (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1059         if (io.CheckForEndOfRecord(readyBytes)) {
1060           if (readyBytes == 0) {
1061             // PAD='YES' and no more data
1062             Fortran::runtime::fill_n(x, lengthChars, ' ');
1063             return !io.GetIoErrorHandler().InError();
1064           } else {
1065             // Do partial read(s) then pad on last iteration
1066           }
1067         } else {
1068           return !io.GetIoErrorHandler().InError();
1069         }
1070       }
1071     }
1072     std::size_t chunkBytes;
1073     std::size_t chunkChars{1};
1074     bool skipping{skipChars > 0};
1075     if (connection.isUTF8) {
1076       chunkBytes = MeasureUTF8Bytes(*input);
1077       if (skipping) {
1078         --skipChars;
1079       } else if (auto ucs{DecodeUTF8(input)}) {
1080         if ((sizeof *x == 1 && *ucs > 0xff) ||
1081             (sizeof *x == 2 && *ucs > 0xffff)) {
1082           *x++ = '?';
1083         } else {
1084           *x++ = *ucs;
1085         }
1086         --lengthChars;
1087       } else if (chunkBytes == 0) {
1088         // error recovery: skip bad encoding
1089         chunkBytes = 1;
1090       }
1091     } else if (connection.internalIoCharKind > 1) {
1092       // Reading from non-default character internal unit
1093       chunkBytes = connection.internalIoCharKind;
1094       if (skipping) {
1095         --skipChars;
1096       } else {
1097         char32_t buffer{0};
1098         std::memcpy(&buffer, input, chunkBytes);
1099         if ((sizeof *x == 1 && buffer > 0xff) ||
1100             (sizeof *x == 2 && buffer > 0xffff)) {
1101           *x++ = '?';
1102         } else {
1103           *x++ = buffer;
1104         }
1105         --lengthChars;
1106       }
1107     } else if constexpr (sizeof *x > 1) {
1108       // Read single byte with expansion into multi-byte CHARACTER
1109       chunkBytes = 1;
1110       if (skipping) {
1111         --skipChars;
1112       } else {
1113         *x++ = static_cast<unsigned char>(*input);
1114         --lengthChars;
1115       }
1116     } else { // single bytes -> default CHARACTER
1117       if (skipping) {
1118         chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
1119         chunkChars = chunkBytes;
1120         skipChars -= chunkChars;
1121       } else {
1122         chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
1123         chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
1124         chunkChars = chunkBytes;
1125         std::memcpy(x, input, chunkBytes);
1126         x += chunkBytes;
1127         lengthChars -= chunkChars;
1128       }
1129     }
1130     input += chunkBytes;
1131     remainingChars -= chunkChars;
1132     if (!skipping) {
1133       io.GotChar(chunkBytes);
1134     }
1135     io.HandleRelativePosition(chunkBytes);
1136     readyBytes -= chunkBytes;
1137   }
1138   // Pad the remainder of the input variable, if any.
1139   Fortran::runtime::fill_n(x, lengthChars, ' ');
1140   return CheckCompleteListDirectedField(io, edit);
1141 }
1142
1143 template RT_API_ATTRS bool EditRealInput<2>(
1144     IoStatementState &, const DataEdit &, void *);
1145 template RT_API_ATTRS bool EditRealInput<3>(
1146     IoStatementState &, const DataEdit &, void *);
1147 template RT_API_ATTRS bool EditRealInput<4>(
1148     IoStatementState &, const DataEdit &, void *);
1149 template RT_API_ATTRS bool EditRealInput<8>(
1150     IoStatementState &, const DataEdit &, void *);
1151 template RT_API_ATTRS bool EditRealInput<10>(
1152     IoStatementState &, const DataEdit &, void *);
1153 // TODO: double/double
1154 template RT_API_ATTRS bool EditRealInput<16>(
1155     IoStatementState &, const DataEdit &, void *);
1156
1157 template RT_API_ATTRS bool EditCharacterInput(
1158     IoStatementState &, const DataEdit &, char *, std::size_t);
1159 template RT_API_ATTRS bool EditCharacterInput(
1160     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1161 template RT_API_ATTRS bool EditCharacterInput(
1162     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1163
1164 RT_OFFLOAD_API_GROUP_END
1165 } // namespace Fortran::runtime::io