[flang] Accept polymorphic component element in storage_size
[llvm-project.git] / flang / runtime / edit-input.cpp
blob7287d2e99979d9047cdee76b18fb55d0e7583ac0
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "edit-input.h"
10 #include "namelist.h"
11 #include "utf.h"
12 #include "flang/Common/real.h"
13 #include "flang/Common/uint128.h"
14 #include <algorithm>
15 #include <cfenv>
17 namespace Fortran::runtime::io {
19 template <int LOG2_BASE>
20 static bool EditBOZInput(
21 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
22 // Skip leading white space & zeroes
23 std::optional<int> remaining{io.CueUpInput(edit)};
24 auto start{io.GetConnectionState().positionInRecord};
25 std::optional<char32_t> next{io.NextInField(remaining, edit)};
26 if (next.value_or('?') == '0') {
27 do {
28 start = io.GetConnectionState().positionInRecord;
29 next = io.NextInField(remaining, edit);
30 } while (next && *next == '0');
32 // Count significant digits after any leading white space & zeroes
33 int digits{0};
34 for (; next; next = io.NextInField(remaining, edit)) {
35 char32_t ch{*next};
36 if (ch == ' ' || ch == '\t') {
37 continue;
39 if (ch >= '0' && ch <= '1') {
40 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
41 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
42 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
43 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
44 } else {
45 io.GetIoErrorHandler().SignalError(
46 "Bad character '%lc' in B/O/Z input field", ch);
47 return false;
49 ++digits;
51 auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
52 if (significantBytes > bytes) {
53 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
54 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
55 return false;
57 // Reset to start of significant digits
58 io.HandleAbsolutePosition(start);
59 remaining.reset();
60 // Make a second pass now that the digit count is known
61 std::memset(n, 0, bytes);
62 int increment{isHostLittleEndian ? -1 : 1};
63 auto *data{reinterpret_cast<unsigned char *>(n) +
64 (isHostLittleEndian ? significantBytes - 1 : 0)};
65 int shift{((digits - 1) * LOG2_BASE) & 7};
66 if (shift + LOG2_BASE > 8) {
67 shift -= 8; // misaligned octal
69 while (digits > 0) {
70 char32_t ch{*io.NextInField(remaining, edit)};
71 int digit{0};
72 if (ch >= '0' && ch <= '9') {
73 digit = ch - '0';
74 } else if (ch >= 'A' && ch <= 'F') {
75 digit = ch + 10 - 'A';
76 } else if (ch >= 'a' && ch <= 'f') {
77 digit = ch + 10 - 'a';
78 } else {
79 continue;
81 --digits;
82 if (shift < 0) {
83 shift += 8;
84 if (shift + LOG2_BASE > 8) { // misaligned octal
85 *data |= digit >> (8 - shift);
87 data += increment;
89 *data |= digit << shift;
90 shift -= LOG2_BASE;
92 return true;
95 static inline char32_t GetDecimalPoint(const DataEdit &edit) {
96 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
99 // Prepares input from a field, and consumes the sign, if any.
100 // Returns true if there's a '-' sign.
101 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
102 std::optional<char32_t> &next, std::optional<int> &remaining) {
103 remaining = io.CueUpInput(edit);
104 next = io.NextInField(remaining, edit);
105 bool negative{false};
106 if (next) {
107 negative = *next == '-';
108 if (negative || *next == '+') {
109 io.SkipSpaces(remaining);
110 next = io.NextInField(remaining, edit);
113 return negative;
116 bool EditIntegerInput(
117 IoStatementState &io, const DataEdit &edit, void *n, int kind) {
118 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
119 switch (edit.descriptor) {
120 case DataEdit::ListDirected:
121 if (IsNamelistNameOrSlash(io)) {
122 return false;
124 break;
125 case 'G':
126 case 'I':
127 break;
128 case 'B':
129 return EditBOZInput<1>(io, edit, n, kind);
130 case 'O':
131 return EditBOZInput<3>(io, edit, n, kind);
132 case 'Z':
133 return EditBOZInput<4>(io, edit, n, kind);
134 case 'A': // legacy extension
135 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
136 default:
137 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
138 "Data edit descriptor '%c' may not be used with an INTEGER data item",
139 edit.descriptor);
140 return false;
142 std::optional<int> remaining;
143 std::optional<char32_t> next;
144 bool negate{ScanNumericPrefix(io, edit, next, remaining)};
145 common::UnsignedInt128 value{0};
146 bool any{negate};
147 bool overflow{false};
148 for (; next; next = io.NextInField(remaining, edit)) {
149 char32_t ch{*next};
150 if (ch == ' ' || ch == '\t') {
151 if (edit.modes.editingFlags & blankZero) {
152 ch = '0'; // BZ mode - treat blank as if it were zero
153 } else {
154 continue;
157 int digit{0};
158 if (ch >= '0' && ch <= '9') {
159 digit = ch - '0';
160 } else {
161 io.GetIoErrorHandler().SignalError(
162 "Bad character '%lc' in INTEGER input field", ch);
163 return false;
165 static constexpr auto maxu128{~common::UnsignedInt128{0}};
166 static constexpr auto maxu128OverTen{maxu128 / 10};
167 static constexpr int maxLastDigit{
168 static_cast<int>(maxu128 - (maxu128OverTen * 10))};
169 overflow |= value >= maxu128OverTen &&
170 (value > maxu128OverTen || digit > maxLastDigit);
171 value *= 10;
172 value += digit;
173 any = true;
175 if (!any && !remaining) {
176 io.GetIoErrorHandler().SignalError(
177 "Integer value absent from NAMELIST or list-directed input");
178 return false;
180 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
181 overflow |= value >= maxForKind && (value > maxForKind || !negate);
182 if (overflow) {
183 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
184 "Decimal input overflows INTEGER(%d) variable", kind);
185 return false;
187 if (negate) {
188 value = -value;
190 if (any || !io.GetConnectionState().IsAtEOF()) {
191 std::memcpy(n, &value, kind); // a blank field means zero
193 return any;
196 // Parses a REAL input number from the input source as a normalized
197 // fraction into a supplied buffer -- there's an optional '-', a
198 // decimal point, and at least one digit. The adjusted exponent value
199 // is returned in a reference argument. The returned value is the number
200 // of characters that (should) have been written to the buffer -- this can
201 // be larger than the buffer size and can indicate overflow. Replaces
202 // blanks with zeroes if appropriate.
203 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
204 const DataEdit &edit, int &exponent) {
205 std::optional<int> remaining;
206 std::optional<char32_t> next;
207 int got{0};
208 std::optional<int> decimalPoint;
209 auto Put{[&](char ch) -> void {
210 if (got < bufferSize) {
211 buffer[got] = ch;
213 ++got;
215 if (ScanNumericPrefix(io, edit, next, remaining)) {
216 Put('-');
218 bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
219 if (!next || (!bzMode && *next == ' ')) { // empty/blank field means zero
220 remaining.reset();
221 if (!io.GetConnectionState().IsAtEOF()) {
222 Put('0');
224 return got;
226 char32_t decimal{GetDecimalPoint(edit)};
227 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
228 if (first == 'N' || first == 'I') {
229 // NaN or infinity - convert to upper case
230 // Subtle: a blank field of digits could be followed by 'E' or 'D',
231 for (; next &&
232 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
233 next = io.NextInField(remaining, edit)) {
234 if (*next >= 'a' && *next <= 'z') {
235 Put(*next - 'a' + 'A');
236 } else {
237 Put(*next);
240 if (next && *next == '(') { // NaN(...)
241 Put('(');
242 int depth{1};
243 while (true) {
244 next = io.NextInField(remaining, edit);
245 if (depth == 0) {
246 break;
247 } else if (!next) {
248 return 0; // error
249 } else if (*next == '(') {
250 ++depth;
251 } else if (*next == ')') {
252 --depth;
254 Put(*next);
257 exponent = 0;
258 } else if (first == decimal || (first >= '0' && first <= '9') ||
259 (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
260 first == 'D' || first == 'Q') {
261 Put('.'); // input field is normalized to a fraction
262 auto start{got};
263 for (; next; next = io.NextInField(remaining, edit)) {
264 char32_t ch{*next};
265 if (ch == ' ' || ch == '\t') {
266 if (bzMode) {
267 ch = '0'; // BZ mode - treat blank as if it were zero
268 } else {
269 continue;
272 if (ch == '0' && got == start && !decimalPoint) {
273 // omit leading zeroes before the decimal
274 } else if (ch >= '0' && ch <= '9') {
275 Put(ch);
276 } else if (ch == decimal && !decimalPoint) {
277 // the decimal point is *not* copied to the buffer
278 decimalPoint = got - start; // # of digits before the decimal point
279 } else {
280 break;
283 if (got == start) {
284 // Nothing but zeroes and maybe a decimal point. F'2018 requires
285 // at least one digit, but F'77 did not, and a bare "." shows up in
286 // the FCVS suite.
287 Put('0'); // emit at least one digit
289 if (next &&
290 (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
291 *next == 'q' || *next == 'Q')) {
292 // Optional exponent letter. Blanks are allowed between the
293 // optional exponent letter and the exponent value.
294 io.SkipSpaces(remaining);
295 next = io.NextInField(remaining, edit);
297 // The default exponent is -kP, but the scale factor doesn't affect
298 // an explicit exponent.
299 exponent = -edit.modes.scale;
300 if (next &&
301 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
302 *next == ' ' || *next == '\t')) {
303 bool negExpo{*next == '-'};
304 if (negExpo || *next == '+') {
305 next = io.NextInField(remaining, edit);
307 for (exponent = 0; next; next = io.NextInField(remaining, edit)) {
308 if (*next >= '0' && *next <= '9') {
309 if (exponent < 10000) {
310 exponent = 10 * exponent + *next - '0';
312 } else if (*next == ' ' || *next == '\t') {
313 if (bzMode) {
314 exponent = 10 * exponent;
316 } else {
317 break;
320 if (negExpo) {
321 exponent = -exponent;
324 if (decimalPoint) {
325 exponent += *decimalPoint;
326 } else {
327 // When no decimal point (or comma) appears in the value, the 'd'
328 // part of the edit descriptor must be interpreted as the number of
329 // digits in the value to be interpreted as being to the *right* of
330 // the assumed decimal point (13.7.2.3.2)
331 exponent += got - start - edit.digits.value_or(0);
333 } else {
334 // TODO: hex FP input
335 exponent = 0;
336 return 0;
338 // Consume the trailing ')' of a list-directed or NAMELIST complex
339 // input value.
340 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
341 if (next && (*next == ' ' || *next == '\t')) {
342 next = io.NextInField(remaining, edit);
344 if (!next) { // NextInField fails on separators like ')'
345 std::size_t byteCount{0};
346 next = io.GetCurrentChar(byteCount);
347 if (next && *next == ')') {
348 io.HandleRelativePosition(byteCount);
351 } else if (remaining) {
352 while (next && (*next == ' ' || *next == '\t')) {
353 next = io.NextInField(remaining, edit);
355 if (next) {
356 return 0; // error: unused nonblank character in fixed-width field
359 return got;
362 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
363 #undef RAISE
364 #ifdef feraisexcept // a macro in some environments; omit std::
365 #define RAISE feraiseexcept
366 #else
367 #define RAISE std::feraiseexcept
368 #endif
369 if (flags & decimal::ConversionResultFlags::Overflow) {
370 RAISE(FE_OVERFLOW);
372 if (flags & decimal::ConversionResultFlags::Inexact) {
373 RAISE(FE_INEXACT);
375 if (flags & decimal::ConversionResultFlags::Invalid) {
376 RAISE(FE_INVALID);
378 #undef RAISE
381 // If no special modes are in effect and the form of the input value
382 // that's present in the input stream is acceptable to the decimal->binary
383 // converter without modification, this fast path for real input
384 // saves time by avoiding memory copies and reformatting of the exponent.
385 template <int PRECISION>
386 static bool TryFastPathRealInput(
387 IoStatementState &io, const DataEdit &edit, void *n) {
388 if (edit.modes.editingFlags & (blankZero | decimalComma)) {
389 return false;
391 if (edit.modes.scale != 0) {
392 return false;
394 const ConnectionState &connection{io.GetConnectionState()};
395 if (connection.internalIoCharKind > 1) {
396 return false; // reading non-default character
398 const char *str{nullptr};
399 std::size_t got{io.GetNextInputBytes(str)};
400 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
401 return false; // could not access reliably-terminated input stream
403 const char *p{str};
404 std::int64_t maxConsume{
405 std::min<std::int64_t>(got, edit.width.value_or(got))};
406 const char *limit{str + maxConsume};
407 decimal::ConversionToBinaryResult<PRECISION> converted{
408 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
409 if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
410 return false;
412 if (edit.digits.value_or(0) != 0) {
413 // Edit descriptor is Fw.d (or other) with d != 0, which
414 // implies scaling
415 const char *q{str};
416 for (; q < limit; ++q) {
417 if (*q == '.' || *q == 'n' || *q == 'N') {
418 break;
421 if (q == limit) {
422 // No explicit decimal point, and not NaN/Inf.
423 return false;
426 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
428 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
429 // Need to consume a trailing ')' and any white space after
430 if (p >= limit || *p != ')') {
431 return false;
433 for (++p; p < limit && (*p == ' ' || *p == '\t'); ++p) {
436 if (edit.width && p < str + *edit.width) {
437 return false; // unconverted characters remain in fixed width field
439 // Success on the fast path!
440 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
441 converted.binary;
442 io.HandleRelativePosition(p - str);
443 // Set FP exception flags
444 if (converted.flags != decimal::ConversionResultFlags::Exact) {
445 RaiseFPExceptions(converted.flags);
447 return true;
450 template <int KIND>
451 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
452 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
453 if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) {
454 return true;
456 // Fast path wasn't available or didn't work; go the more general route
457 static constexpr int maxDigits{
458 common::MaxDecimalConversionDigits(binaryPrecision)};
459 static constexpr int bufferSize{maxDigits + 18};
460 char buffer[bufferSize];
461 int exponent{0};
462 int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
463 if (got >= maxDigits + 2) {
464 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
465 return false;
467 if (got == 0) {
468 io.GetIoErrorHandler().SignalError(IostatBadRealInput);
469 return false;
471 bool hadExtra{got > maxDigits};
472 if (exponent != 0) {
473 buffer[got++] = 'e';
474 if (exponent < 0) {
475 buffer[got++] = '-';
476 exponent = -exponent;
478 if (exponent > 9999) {
479 exponent = 9999; // will convert to +/-Inf
481 if (exponent > 999) {
482 int dig{exponent / 1000};
483 buffer[got++] = '0' + dig;
484 int rest{exponent - 1000 * dig};
485 dig = rest / 100;
486 buffer[got++] = '0' + dig;
487 rest -= 100 * dig;
488 dig = rest / 10;
489 buffer[got++] = '0' + dig;
490 buffer[got++] = '0' + (rest - 10 * dig);
491 } else if (exponent > 99) {
492 int dig{exponent / 100};
493 buffer[got++] = '0' + dig;
494 int rest{exponent - 100 * dig};
495 dig = rest / 10;
496 buffer[got++] = '0' + dig;
497 buffer[got++] = '0' + (rest - 10 * dig);
498 } else if (exponent > 9) {
499 int dig{exponent / 10};
500 buffer[got++] = '0' + dig;
501 buffer[got++] = '0' + (exponent - 10 * dig);
502 } else {
503 buffer[got++] = '0' + exponent;
506 buffer[got] = '\0';
507 const char *p{buffer};
508 decimal::ConversionToBinaryResult<binaryPrecision> converted{
509 decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
510 if (hadExtra) {
511 converted.flags = static_cast<enum decimal::ConversionResultFlags>(
512 converted.flags | decimal::Inexact);
514 if (*p) { // unprocessed junk after value
515 io.GetIoErrorHandler().SignalError(IostatBadRealInput);
516 return false;
518 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
519 converted.binary;
520 // Set FP exception flags
521 if (converted.flags != decimal::ConversionResultFlags::Exact) {
522 if (converted.flags & decimal::ConversionResultFlags::Overflow) {
523 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
524 return false;
526 RaiseFPExceptions(converted.flags);
528 return true;
531 template <int KIND>
532 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
533 switch (edit.descriptor) {
534 case DataEdit::ListDirected:
535 if (IsNamelistNameOrSlash(io)) {
536 return false;
538 return EditCommonRealInput<KIND>(io, edit, n);
539 case DataEdit::ListDirectedRealPart:
540 case DataEdit::ListDirectedImaginaryPart:
541 case 'F':
542 case 'E': // incl. EN, ES, & EX
543 case 'D':
544 case 'G':
545 return EditCommonRealInput<KIND>(io, edit, n);
546 case 'B':
547 return EditBOZInput<1>(io, edit, n,
548 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
549 case 'O':
550 return EditBOZInput<3>(io, edit, n,
551 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
552 case 'Z':
553 return EditBOZInput<4>(io, edit, n,
554 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
555 case 'A': // legacy extension
556 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
557 default:
558 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
559 "Data edit descriptor '%c' may not be used for REAL input",
560 edit.descriptor);
561 return false;
565 // 13.7.3 in Fortran 2018
566 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
567 switch (edit.descriptor) {
568 case DataEdit::ListDirected:
569 if (IsNamelistNameOrSlash(io)) {
570 return false;
572 break;
573 case 'L':
574 case 'G':
575 break;
576 default:
577 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
578 "Data edit descriptor '%c' may not be used for LOGICAL input",
579 edit.descriptor);
580 return false;
582 std::optional<int> remaining{io.CueUpInput(edit)};
583 std::optional<char32_t> next{io.NextInField(remaining, edit)};
584 if (next && *next == '.') { // skip optional period
585 next = io.NextInField(remaining, edit);
587 if (!next) {
588 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
589 return false;
591 switch (*next) {
592 case 'T':
593 case 't':
594 x = true;
595 break;
596 case 'F':
597 case 'f':
598 x = false;
599 break;
600 default:
601 io.GetIoErrorHandler().SignalError(
602 "Bad character '%lc' in LOGICAL input field", *next);
603 return false;
605 if (remaining) { // ignore the rest of the field
606 io.HandleRelativePosition(*remaining);
607 } else if (edit.descriptor == DataEdit::ListDirected) {
608 while (io.NextInField(remaining, edit)) { // discard rest of field
611 return true;
614 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
615 template <typename CHAR>
616 static bool EditDelimitedCharacterInput(
617 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
618 bool result{true};
619 while (true) {
620 std::size_t byteCount{0};
621 auto ch{io.GetCurrentChar(byteCount)};
622 if (!ch) {
623 if (io.AdvanceRecord()) {
624 continue;
625 } else {
626 result = false; // EOF in character value
627 break;
630 io.HandleRelativePosition(byteCount);
631 if (*ch == delimiter) {
632 auto next{io.GetCurrentChar(byteCount)};
633 if (next && *next == delimiter) {
634 // Repeated delimiter: use as character value
635 io.HandleRelativePosition(byteCount);
636 } else {
637 break; // closing delimiter
640 if (length > 0) {
641 *x++ = *ch;
642 --length;
645 std::fill_n(x, length, ' ');
646 return result;
649 template <typename CHAR>
650 static bool EditListDirectedCharacterInput(
651 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
652 std::size_t byteCount{0};
653 auto ch{io.GetCurrentChar(byteCount)};
654 if (ch && (*ch == '\'' || *ch == '"')) {
655 io.HandleRelativePosition(byteCount);
656 return EditDelimitedCharacterInput(io, x, length, *ch);
658 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
659 return false;
661 // Undelimited list-directed character input: stop at a value separator
662 // or the end of the current record. Subtlety: the "remaining" count
663 // here is a dummy that's used to avoid the interpretation of separators
664 // in NextInField.
665 std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
666 while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
667 bool isSep{false};
668 switch (*next) {
669 case ' ':
670 case '\t':
671 case '/':
672 isSep = true;
673 break;
674 case ',':
675 isSep = !(edit.modes.editingFlags & decimalComma);
676 break;
677 case ';':
678 isSep = !!(edit.modes.editingFlags & decimalComma);
679 break;
680 default:
681 break;
683 if (isSep) {
684 remaining = 0;
685 } else {
686 *x++ = *next;
687 remaining = --length > 0 ? maxUTF8Bytes : 0;
690 std::fill_n(x, length, ' ');
691 return true;
694 template <typename CHAR>
695 bool EditCharacterInput(
696 IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
697 switch (edit.descriptor) {
698 case DataEdit::ListDirected:
699 return EditListDirectedCharacterInput(io, x, length, edit);
700 case 'A':
701 case 'G':
702 break;
703 case 'B':
704 return EditBOZInput<1>(io, edit, x, length * sizeof *x);
705 case 'O':
706 return EditBOZInput<3>(io, edit, x, length * sizeof *x);
707 case 'Z':
708 return EditBOZInput<4>(io, edit, x, length * sizeof *x);
709 default:
710 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
711 "Data edit descriptor '%c' may not be used with a CHARACTER data item",
712 edit.descriptor);
713 return false;
715 const ConnectionState &connection{io.GetConnectionState()};
716 std::size_t remaining{length};
717 if (edit.width && *edit.width > 0) {
718 remaining = *edit.width;
720 // When the field is wider than the variable, we drop the leading
721 // characters. When the variable is wider than the field, there can be
722 // trailing padding.
723 const char *input{nullptr};
724 std::size_t ready{0};
725 // Skip leading bytes.
726 // These bytes don't count towards INQUIRE(IOLENGTH=).
727 std::size_t skip{remaining > length ? remaining - length : 0};
728 // Transfer payload bytes; these do count.
729 while (remaining > 0) {
730 if (ready == 0) {
731 ready = io.GetNextInputBytes(input);
732 if (ready == 0) {
733 if (io.CheckForEndOfRecord()) {
734 std::fill_n(x, length, ' '); // PAD='YES'
736 return !io.GetIoErrorHandler().InError();
739 std::size_t chunk;
740 bool skipping{skip > 0};
741 if (connection.isUTF8) {
742 chunk = MeasureUTF8Bytes(*input);
743 if (skipping) {
744 --skip;
745 } else if (auto ucs{DecodeUTF8(input)}) {
746 *x++ = *ucs;
747 --length;
748 } else if (chunk == 0) {
749 // error recovery: skip bad encoding
750 chunk = 1;
752 --remaining;
753 } else if (connection.internalIoCharKind > 1) {
754 // Reading from non-default character internal unit
755 chunk = connection.internalIoCharKind;
756 if (skipping) {
757 --skip;
758 } else {
759 char32_t buffer{0};
760 std::memcpy(&buffer, input, chunk);
761 *x++ = buffer;
762 --length;
764 --remaining;
765 } else if constexpr (sizeof *x > 1) {
766 // Read single byte with expansion into multi-byte CHARACTER
767 chunk = 1;
768 if (skipping) {
769 --skip;
770 } else {
771 *x++ = static_cast<unsigned char>(*input);
772 --length;
774 --remaining;
775 } else { // single bytes -> default CHARACTER
776 if (skipping) {
777 chunk = std::min<std::size_t>(skip, ready);
778 skip -= chunk;
779 } else {
780 chunk = std::min<std::size_t>(remaining, ready);
781 std::memcpy(x, input, chunk);
782 x += chunk;
783 length -= chunk;
785 remaining -= chunk;
787 input += chunk;
788 if (!skipping) {
789 io.GotChar(chunk);
791 io.HandleRelativePosition(chunk);
792 ready -= chunk;
794 // Pad the remainder of the input variable, if any.
795 std::fill_n(x, length, ' ');
796 return true;
799 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
800 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
801 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
802 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
803 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
804 // TODO: double/double
805 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
807 template bool EditCharacterInput(
808 IoStatementState &, const DataEdit &, char *, std::size_t);
809 template bool EditCharacterInput(
810 IoStatementState &, const DataEdit &, char16_t *, std::size_t);
811 template bool EditCharacterInput(
812 IoStatementState &, const DataEdit &, char32_t *, std::size_t);
814 } // namespace Fortran::runtime::io