[libc++][Android] Allow testing libc++ with clang-r536225 (#116149)
[llvm-project.git] / flang / runtime / edit-input.cpp
blob2cee35e23f31a3f5cbc2d36cb542c2a6cce9555f
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "edit-input.h"
10 #include "namelist.h"
11 #include "utf.h"
12 #include "flang/Common/optional.h"
13 #include "flang/Common/real.h"
14 #include "flang/Common/uint128.h"
15 #include "flang/Runtime/freestanding-tools.h"
16 #include <algorithm>
17 #include <cfenv>
19 namespace Fortran::runtime::io {
20 RT_OFFLOAD_API_GROUP_BEGIN
22 // Checks that a list-directed input value has been entirely consumed and
23 // doesn't contain unparsed characters before the next value separator.
24 static inline RT_API_ATTRS bool IsCharValueSeparator(
25 const DataEdit &edit, char32_t ch) {
26 char32_t comma{
27 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
28 return ch == ' ' || ch == '\t' || ch == comma || ch == '/' ||
29 (edit.IsNamelist() && (ch == '&' || ch == '$'));
32 static RT_API_ATTRS bool CheckCompleteListDirectedField(
33 IoStatementState &io, const DataEdit &edit) {
34 if (edit.IsListDirected()) {
35 std::size_t byteCount;
36 if (auto ch{io.GetCurrentChar(byteCount)}) {
37 if (IsCharValueSeparator(edit, *ch)) {
38 return true;
39 } else {
40 const auto &connection{io.GetConnectionState()};
41 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
42 "invalid character (0x%x) after list-directed input value, "
43 "at column %d in record %d",
44 static_cast<unsigned>(*ch),
45 static_cast<int>(connection.positionInRecord + 1),
46 static_cast<int>(connection.currentRecordNumber));
47 return false;
49 } else {
50 return true; // end of record: ok
52 } else {
53 return true;
57 static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) {
58 return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','};
61 template <int LOG2_BASE>
62 static RT_API_ATTRS bool EditBOZInput(
63 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
64 // Skip leading white space & zeroes
65 Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
66 auto start{io.GetConnectionState().positionInRecord};
67 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
68 if (next.value_or('?') == '0') {
69 do {
70 start = io.GetConnectionState().positionInRecord;
71 next = io.NextInField(remaining, edit);
72 } while (next && *next == '0');
74 // Count significant digits after any leading white space & zeroes
75 int digits{0};
76 int significantBits{0};
77 const char32_t comma{GetSeparatorChar(edit)};
78 for (; next; next = io.NextInField(remaining, edit)) {
79 char32_t ch{*next};
80 if (ch == ' ' || ch == '\t') {
81 if (edit.modes.editingFlags & blankZero) {
82 ch = '0'; // BZ mode - treat blank as if it were zero
83 } else {
84 continue;
87 if (ch >= '0' && ch <= '1') {
88 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
89 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
90 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
91 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
92 } else if (ch == comma) {
93 break; // end non-list-directed field early
94 } else {
95 io.GetIoErrorHandler().SignalError(
96 "Bad character '%lc' in B/O/Z input field", ch);
97 return false;
99 if (digits++ == 0) {
100 significantBits = 4;
101 if (ch >= '0' && ch <= '1') {
102 significantBits = 1;
103 } else if (ch >= '2' && ch <= '3') {
104 significantBits = 2;
105 } else if (ch >= '4' && ch <= '7') {
106 significantBits = 3;
107 } else {
108 significantBits = 4;
110 } else {
111 significantBits += LOG2_BASE;
114 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
115 if (significantBytes > bytes) {
116 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
117 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
118 return false;
120 // Reset to start of significant digits
121 io.HandleAbsolutePosition(start);
122 remaining.reset();
123 // Make a second pass now that the digit count is known
124 std::memset(n, 0, bytes);
125 int increment{isHostLittleEndian ? -1 : 1};
126 auto *data{reinterpret_cast<unsigned char *>(n) +
127 (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)};
128 int shift{((digits - 1) * LOG2_BASE) & 7};
129 while (digits > 0) {
130 char32_t ch{*io.NextInField(remaining, edit)};
131 int digit{0};
132 if (ch == ' ' || ch == '\t') {
133 if (edit.modes.editingFlags & blankZero) {
134 ch = '0'; // BZ mode - treat blank as if it were zero
135 } else {
136 continue;
139 --digits;
140 if (ch >= '0' && ch <= '9') {
141 digit = ch - '0';
142 } else if (ch >= 'A' && ch <= 'F') {
143 digit = ch + 10 - 'A';
144 } else if (ch >= 'a' && ch <= 'f') {
145 digit = ch + 10 - 'a';
146 } else {
147 continue;
149 if (shift < 0) {
150 if (shift + LOG2_BASE > 0) { // misaligned octal
151 *data |= digit >> -shift;
153 shift += 8;
154 data += increment;
156 *data |= digit << shift;
157 shift -= LOG2_BASE;
159 return CheckCompleteListDirectedField(io, edit);
162 static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
163 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
166 // Prepares input from a field, and returns the sign, if any, else '\0'.
167 static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
168 const DataEdit &edit, Fortran::common::optional<char32_t> &next,
169 Fortran::common::optional<int> &remaining) {
170 remaining = io.CueUpInput(edit);
171 next = io.NextInField(remaining, edit);
172 char sign{'\0'};
173 if (next) {
174 if (*next == '-' || *next == '+') {
175 sign = *next;
176 if (!edit.IsListDirected()) {
177 io.SkipSpaces(remaining);
179 next = io.NextInField(remaining, edit);
182 return sign;
185 RT_API_ATTRS bool EditIntegerInput(
186 IoStatementState &io, const DataEdit &edit, void *n, int kind) {
187 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
188 switch (edit.descriptor) {
189 case DataEdit::ListDirected:
190 if (IsNamelistNameOrSlash(io)) {
191 return false;
193 break;
194 case 'G':
195 case 'I':
196 break;
197 case 'B':
198 return EditBOZInput<1>(io, edit, n, kind);
199 case 'O':
200 return EditBOZInput<3>(io, edit, n, kind);
201 case 'Z':
202 return EditBOZInput<4>(io, edit, n, kind);
203 case 'A': // legacy extension
204 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
205 default:
206 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
207 "Data edit descriptor '%c' may not be used with an INTEGER data item",
208 edit.descriptor);
209 return false;
211 Fortran::common::optional<int> remaining;
212 Fortran::common::optional<char32_t> next;
213 char sign{ScanNumericPrefix(io, edit, next, remaining)};
214 common::UnsignedInt128 value{0};
215 bool any{!!sign};
216 bool overflow{false};
217 const char32_t comma{GetSeparatorChar(edit)};
218 for (; next; next = io.NextInField(remaining, edit)) {
219 char32_t ch{*next};
220 if (ch == ' ' || ch == '\t') {
221 if (edit.modes.editingFlags & blankZero) {
222 ch = '0'; // BZ mode - treat blank as if it were zero
223 } else {
224 continue;
227 int digit{0};
228 if (ch >= '0' && ch <= '9') {
229 digit = ch - '0';
230 } else if (ch == comma) {
231 break; // end non-list-directed field early
232 } else {
233 if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) {
234 // Ignore any fractional part that might appear in NAMELIST integer
235 // input, like a few other Fortran compilers do.
236 // TODO: also process exponents? Some compilers do, but they obviously
237 // can't just be ignored.
238 while ((next = io.NextInField(remaining, edit))) {
239 if (*next < '0' || *next > '9') {
240 break;
243 if (!next || *next == comma) {
244 break;
247 io.GetIoErrorHandler().SignalError(
248 "Bad character '%lc' in INTEGER input field", ch);
249 return false;
251 static constexpr auto maxu128{~common::UnsignedInt128{0}};
252 static constexpr auto maxu128OverTen{maxu128 / 10};
253 static constexpr int maxLastDigit{
254 static_cast<int>(maxu128 - (maxu128OverTen * 10))};
255 overflow |= value >= maxu128OverTen &&
256 (value > maxu128OverTen || digit > maxLastDigit);
257 value *= 10;
258 value += digit;
259 any = true;
261 if (!any && !remaining) {
262 io.GetIoErrorHandler().SignalError(
263 "Integer value absent from NAMELIST or list-directed input");
264 return false;
266 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
267 overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
268 if (overflow) {
269 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
270 "Decimal input overflows INTEGER(%d) variable", kind);
271 return false;
273 if (sign == '-') {
274 value = -value;
276 if (any || !io.GetIoErrorHandler().InError()) {
277 // The value is stored in the lower order bits on big endian platform.
278 // When memcpy, shift the value to the higher order bit.
279 auto shft{static_cast<int>(sizeof(value.low())) - kind};
280 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
281 if (!isHostLittleEndian && shft >= 0) {
282 auto l{value.low() << (8 * shft)};
283 std::memcpy(n, &l, kind);
284 } else {
285 std::memcpy(n, &value, kind); // a blank field means zero
287 return true;
288 } else {
289 return false;
293 // Parses a REAL input number from the input source as a normalized
294 // fraction into a supplied buffer -- there's an optional '-', a
295 // decimal point when the input is not hexadecimal, and at least one
296 // digit. Replaces blanks with zeroes where appropriate.
297 struct ScannedRealInput {
298 // Number of characters that (should) have been written to the
299 // buffer -- this can be larger than the buffer size, which
300 // indicates buffer overflow. Zero indicates an error.
301 int got{0};
302 int exponent{0}; // adjusted as necessary; binary if isHexadecimal
303 bool isHexadecimal{false}; // 0X...
305 static RT_API_ATTRS ScannedRealInput ScanRealInput(
306 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
307 Fortran::common::optional<int> remaining;
308 Fortran::common::optional<char32_t> next;
309 int got{0};
310 Fortran::common::optional<int> radixPointOffset;
311 // The following lambda definition violates the conding style,
312 // but cuda-11.8 nvcc hits an internal error with the brace initialization.
313 auto Put = [&](char ch) -> void {
314 if (got < bufferSize) {
315 buffer[got] = ch;
317 ++got;
319 char sign{ScanNumericPrefix(io, edit, next, remaining)};
320 if (sign == '-') {
321 Put('-');
323 bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
324 int exponent{0};
325 if (!next || (!bzMode && *next == ' ') ||
326 (!(edit.modes.editingFlags & decimalComma) && *next == ',')) {
327 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
328 // An empty/blank field means zero when not list-directed.
329 // A fixed-width field containing only a sign is also zero;
330 // this behavior isn't standard-conforming in F'2023 but it is
331 // required to pass FCVS.
332 Put('0');
334 return {got, exponent, false};
336 char32_t radixPointChar{GetRadixPointChar(edit)};
337 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
338 bool isHexadecimal{false};
339 if (first == 'N' || first == 'I') {
340 // NaN or infinity - convert to upper case
341 // Subtle: a blank field of digits could be followed by 'E' or 'D',
342 for (; next &&
343 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
344 next = io.NextInField(remaining, edit)) {
345 if (*next >= 'a' && *next <= 'z') {
346 Put(*next - 'a' + 'A');
347 } else {
348 Put(*next);
351 if (next && *next == '(') { // NaN(...)
352 Put('(');
353 int depth{1};
354 while (true) {
355 next = io.NextInField(remaining, edit);
356 if (depth == 0) {
357 break;
358 } else if (!next) {
359 return {}; // error
360 } else if (*next == '(') {
361 ++depth;
362 } else if (*next == ')') {
363 --depth;
365 Put(*next);
368 } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
369 (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
370 first == 'D' || first == 'Q') {
371 if (first == '0') {
372 next = io.NextInField(remaining, edit);
373 if (next && (*next == 'x' || *next == 'X')) { // 0X...
374 isHexadecimal = true;
375 next = io.NextInField(remaining, edit);
376 } else {
377 Put('0');
380 // input field is normalized to a fraction
381 if (!isHexadecimal) {
382 Put('.');
384 auto start{got};
385 for (; next; next = io.NextInField(remaining, edit)) {
386 char32_t ch{*next};
387 if (ch == ' ' || ch == '\t') {
388 if (isHexadecimal) {
389 return {}; // error
390 } else if (bzMode) {
391 ch = '0'; // BZ mode - treat blank as if it were zero
392 } else {
393 continue; // ignore blank in fixed field
396 if (ch == '0' && got == start && !radixPointOffset) {
397 // omit leading zeroes before the radix point
398 } else if (ch >= '0' && ch <= '9') {
399 Put(ch);
400 } else if (ch == radixPointChar && !radixPointOffset) {
401 // The radix point character is *not* copied to the buffer.
402 radixPointOffset = got - start; // # of digits before the radix point
403 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
404 Put(ch);
405 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
406 Put(ch - 'a' + 'A'); // normalize to capitals
407 } else {
408 break;
411 if (got == start) {
412 // Nothing but zeroes and maybe a radix point. F'2018 requires
413 // at least one digit, but F'77 did not, and a bare "." shows up in
414 // the FCVS suite.
415 Put('0'); // emit at least one digit
417 // In list-directed input, a bad exponent is not consumed.
418 auto nextBeforeExponent{next};
419 auto startExponent{io.GetConnectionState().positionInRecord};
420 bool hasGoodExponent{false};
421 if (next) {
422 if (isHexadecimal) {
423 if (*next == 'p' || *next == 'P') {
424 next = io.NextInField(remaining, edit);
425 } else {
426 // The binary exponent is not optional in the standard.
427 return {}; // error
429 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
430 *next == 'q' || *next == 'Q') {
431 // Optional exponent letter. Blanks are allowed between the
432 // optional exponent letter and the exponent value.
433 io.SkipSpaces(remaining);
434 next = io.NextInField(remaining, edit);
437 if (next &&
438 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
439 *next == ' ' || *next == '\t')) {
440 bool negExpo{*next == '-'};
441 if (negExpo || *next == '+') {
442 next = io.NextInField(remaining, edit);
444 for (; next; next = io.NextInField(remaining, edit)) {
445 if (*next >= '0' && *next <= '9') {
446 hasGoodExponent = true;
447 if (exponent < 10000) {
448 exponent = 10 * exponent + *next - '0';
450 } else if (*next == ' ' || *next == '\t') {
451 if (isHexadecimal) {
452 break;
453 } else if (bzMode) {
454 hasGoodExponent = true;
455 exponent = 10 * exponent;
457 } else {
458 break;
461 if (negExpo) {
462 exponent = -exponent;
465 if (!hasGoodExponent) {
466 if (isHexadecimal) {
467 return {}; // error
469 // There isn't a good exponent; do not consume it.
470 next = nextBeforeExponent;
471 io.HandleAbsolutePosition(startExponent);
472 // The default exponent is -kP, but the scale factor doesn't affect
473 // an explicit exponent.
474 exponent = -edit.modes.scale;
476 // Adjust exponent by number of digits before the radix point.
477 if (isHexadecimal) {
478 // Exponents for hexadecimal input are binary.
479 exponent += radixPointOffset.value_or(got - start) * 4;
480 } else if (radixPointOffset) {
481 exponent += *radixPointOffset;
482 } else {
483 // When no redix point (or comma) appears in the value, the 'd'
484 // part of the edit descriptor must be interpreted as the number of
485 // digits in the value to be interpreted as being to the *right* of
486 // the assumed radix point (13.7.2.3.2)
487 exponent += got - start - edit.digits.value_or(0);
490 // Consume the trailing ')' of a list-directed or NAMELIST complex
491 // input value.
492 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
493 if (next && (*next == ' ' || *next == '\t')) {
494 io.SkipSpaces(remaining);
495 next = io.NextInField(remaining, edit);
497 if (!next) { // NextInField fails on separators like ')'
498 std::size_t byteCount{0};
499 next = io.GetCurrentChar(byteCount);
500 if (next && *next == ')') {
501 io.HandleRelativePosition(byteCount);
504 } else if (remaining) {
505 while (next && (*next == ' ' || *next == '\t')) {
506 next = io.NextInField(remaining, edit);
508 if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) {
509 return {}; // error: unused nonblank character in fixed-width field
512 return {got, exponent, isHexadecimal};
515 static RT_API_ATTRS void RaiseFPExceptions(
516 decimal::ConversionResultFlags flags) {
517 #undef RAISE
518 #if defined(RT_DEVICE_COMPILATION)
519 Terminator terminator(__FILE__, __LINE__);
520 #define RAISE(e) \
521 terminator.Crash( \
522 "not implemented yet: raising FP exception in device code: %s", #e);
523 #else // !defined(RT_DEVICE_COMPILATION)
524 #ifdef feraisexcept // a macro in some environments; omit std::
525 #define RAISE feraiseexcept
526 #else
527 #define RAISE std::feraiseexcept
528 #endif
529 #endif // !defined(RT_DEVICE_COMPILATION)
531 // Some environment (e.g. emscripten, musl) don't define FE_OVERFLOW as allowed
532 // by c99 (but not c++11) :-/
533 #if defined(FE_OVERFLOW) || defined(RT_DEVICE_COMPILATION)
534 if (flags & decimal::ConversionResultFlags::Overflow) {
535 RAISE(FE_OVERFLOW);
537 #endif
538 #if defined(FE_UNDERFLOW) || defined(RT_DEVICE_COMPILATION)
539 if (flags & decimal::ConversionResultFlags::Underflow) {
540 RAISE(FE_UNDERFLOW);
542 #endif
543 #if defined(FE_INEXACT) || defined(RT_DEVICE_COMPILATION)
544 if (flags & decimal::ConversionResultFlags::Inexact) {
545 RAISE(FE_INEXACT);
547 #endif
548 #if defined(FE_INVALID) || defined(RT_DEVICE_COMPILATION)
549 if (flags & decimal::ConversionResultFlags::Invalid) {
550 RAISE(FE_INVALID);
552 #endif
553 #undef RAISE
556 // If no special modes are in effect and the form of the input value
557 // that's present in the input stream is acceptable to the decimal->binary
558 // converter without modification, this fast path for real input
559 // saves time by avoiding memory copies and reformatting of the exponent.
560 template <int PRECISION>
561 static RT_API_ATTRS bool TryFastPathRealDecimalInput(
562 IoStatementState &io, const DataEdit &edit, void *n) {
563 if (edit.modes.editingFlags & (blankZero | decimalComma)) {
564 return false;
566 if (edit.modes.scale != 0) {
567 return false;
569 const ConnectionState &connection{io.GetConnectionState()};
570 if (connection.internalIoCharKind > 1) {
571 return false; // reading non-default character
573 const char *str{nullptr};
574 std::size_t got{io.GetNextInputBytes(str)};
575 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
576 return false; // could not access reliably-terminated input stream
578 const char *p{str};
579 std::int64_t maxConsume{
580 std::min<std::int64_t>(got, edit.width.value_or(got))};
581 const char *limit{str + maxConsume};
582 decimal::ConversionToBinaryResult<PRECISION> converted{
583 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
584 if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
585 return false;
587 if (edit.digits.value_or(0) != 0) {
588 // Edit descriptor is Fw.d (or other) with d != 0, which
589 // implies scaling
590 const char *q{str};
591 for (; q < limit; ++q) {
592 if (*q == '.' || *q == 'n' || *q == 'N') {
593 break;
596 if (q == limit) {
597 // No explicit decimal point, and not NaN/Inf.
598 return false;
601 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
602 // Need to consume a trailing ')', possibly with leading spaces
603 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
605 if (p < limit && *p == ')') {
606 ++p;
607 } else {
608 return false;
610 } else if (edit.IsListDirected()) {
611 if (p < limit && !IsCharValueSeparator(edit, *p)) {
612 return false;
614 } else {
615 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
617 if (edit.width && p < str + *edit.width) {
618 return false; // unconverted characters remain in fixed width field
621 // Success on the fast path!
622 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
623 converted.binary;
624 io.HandleRelativePosition(p - str);
625 // Set FP exception flags
626 if (converted.flags != decimal::ConversionResultFlags::Exact) {
627 RaiseFPExceptions(converted.flags);
629 return true;
632 template <int binaryPrecision>
633 RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
634 ConvertHexadecimal(
635 const char *&p, enum decimal::FortranRounding rounding, int expo) {
636 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
637 using RawType = typename RealType::RawType;
638 bool isNegative{*p == '-'};
639 constexpr RawType one{1};
640 RawType signBit{0};
641 if (isNegative) {
642 ++p;
643 signBit = one << (RealType::bits - 1);
645 RawType fraction{0};
646 // Adjust the incoming binary P+/- exponent to shift the radix point
647 // to below the LSB and add in the bias.
648 expo += binaryPrecision - 1 + RealType::exponentBias;
649 // Input the fraction.
650 int roundingBit{0};
651 int guardBit{0};
652 for (; *p; ++p) {
653 fraction <<= 4;
654 expo -= 4;
655 if (*p >= '0' && *p <= '9') {
656 fraction |= *p - '0';
657 } else if (*p >= 'A' && *p <= 'F') {
658 fraction |= *p - 'A' + 10; // data were normalized to capitals
659 } else {
660 break;
662 if (fraction >> binaryPrecision) {
663 while (fraction >> binaryPrecision) {
664 guardBit |= roundingBit;
665 roundingBit = (int)fraction & 1;
666 fraction >>= 1;
667 ++expo;
669 // Consume excess digits
670 while (*++p) {
671 if (*p == '0') {
672 } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) {
673 guardBit = 1;
674 } else {
675 break;
678 break;
681 if (fraction) {
682 // Boost biased expo if too small
683 while (expo < 1) {
684 guardBit |= roundingBit;
685 roundingBit = (int)fraction & 1;
686 fraction >>= 1;
687 ++expo;
689 // Normalize
690 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
691 fraction <<= 1;
692 --expo;
693 guardBit = roundingBit = 0;
696 // Rounding
697 bool increase{false};
698 switch (rounding) {
699 case decimal::RoundNearest: // RN & RP
700 increase = roundingBit && (guardBit | ((int)fraction & 1));
701 break;
702 case decimal::RoundUp: // RU
703 increase = !isNegative && (roundingBit | guardBit);
704 break;
705 case decimal::RoundDown: // RD
706 increase = isNegative && (roundingBit | guardBit);
707 break;
708 case decimal::RoundToZero: // RZ
709 break;
710 case decimal::RoundCompatible: // RC
711 increase = roundingBit != 0;
712 break;
714 if (increase) {
715 ++fraction;
716 if (fraction >> binaryPrecision) {
717 fraction >>= 1;
718 ++expo;
721 // Package & return result
722 constexpr RawType significandMask{(one << RealType::significandBits) - 1};
723 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
724 if (!fraction) {
725 expo = 0;
726 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
727 expo = 0; // subnormal
728 flags |= decimal::Underflow;
729 } else if (expo >= RealType::maxExponent) {
730 if (rounding == decimal::RoundToZero ||
731 (rounding == decimal::RoundDown && !isNegative) ||
732 (rounding == decimal::RoundUp && isNegative)) {
733 expo = RealType::maxExponent - 1; // +/-HUGE()
734 fraction = significandMask;
735 } else {
736 expo = RealType::maxExponent; // +/-Inf
737 fraction = 0;
738 flags |= decimal::Overflow;
740 } else {
741 fraction &= significandMask; // remove explicit normalization unless x87
743 return decimal::ConversionToBinaryResult<binaryPrecision>{
744 RealType{static_cast<RawType>(signBit |
745 static_cast<RawType>(expo) << RealType::significandBits | fraction)},
746 static_cast<decimal::ConversionResultFlags>(flags)};
749 template <int KIND>
750 RT_API_ATTRS bool EditCommonRealInput(
751 IoStatementState &io, const DataEdit &edit, void *n) {
752 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
753 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
754 return CheckCompleteListDirectedField(io, edit);
756 // Fast path wasn't available or didn't work; go the more general route
757 static constexpr int maxDigits{
758 common::MaxDecimalConversionDigits(binaryPrecision)};
759 static constexpr int bufferSize{maxDigits + 18};
760 char buffer[bufferSize];
761 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
762 int got{scanned.got};
763 if (got >= maxDigits + 2) {
764 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
765 return false;
767 if (got == 0) {
768 const auto &connection{io.GetConnectionState()};
769 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
770 "Bad real input data at column %d of record %d",
771 static_cast<int>(connection.positionInRecord + 1),
772 static_cast<int>(connection.currentRecordNumber));
773 return false;
775 decimal::ConversionToBinaryResult<binaryPrecision> converted;
776 const char *p{buffer};
777 if (scanned.isHexadecimal) {
778 buffer[got] = '\0';
779 converted = ConvertHexadecimal<binaryPrecision>(
780 p, edit.modes.round, scanned.exponent);
781 } else {
782 bool hadExtra{got > maxDigits};
783 int exponent{scanned.exponent};
784 if (exponent != 0) {
785 buffer[got++] = 'e';
786 if (exponent < 0) {
787 buffer[got++] = '-';
788 exponent = -exponent;
790 if (exponent > 9999) {
791 exponent = 9999; // will convert to +/-Inf
793 if (exponent > 999) {
794 int dig{exponent / 1000};
795 buffer[got++] = '0' + dig;
796 int rest{exponent - 1000 * dig};
797 dig = rest / 100;
798 buffer[got++] = '0' + dig;
799 rest -= 100 * dig;
800 dig = rest / 10;
801 buffer[got++] = '0' + dig;
802 buffer[got++] = '0' + (rest - 10 * dig);
803 } else if (exponent > 99) {
804 int dig{exponent / 100};
805 buffer[got++] = '0' + dig;
806 int rest{exponent - 100 * dig};
807 dig = rest / 10;
808 buffer[got++] = '0' + dig;
809 buffer[got++] = '0' + (rest - 10 * dig);
810 } else if (exponent > 9) {
811 int dig{exponent / 10};
812 buffer[got++] = '0' + dig;
813 buffer[got++] = '0' + (exponent - 10 * dig);
814 } else {
815 buffer[got++] = '0' + exponent;
818 buffer[got] = '\0';
819 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
820 if (hadExtra) {
821 converted.flags = static_cast<enum decimal::ConversionResultFlags>(
822 converted.flags | decimal::Inexact);
825 if (*p) { // unprocessed junk after value
826 const auto &connection{io.GetConnectionState()};
827 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
828 "Trailing characters after real input data at column %d of record %d",
829 static_cast<int>(connection.positionInRecord + 1),
830 static_cast<int>(connection.currentRecordNumber));
831 return false;
833 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
834 converted.binary;
835 // Set FP exception flags
836 if (converted.flags != decimal::ConversionResultFlags::Exact) {
837 if (converted.flags & decimal::ConversionResultFlags::Overflow) {
838 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
839 return false;
841 RaiseFPExceptions(converted.flags);
843 return CheckCompleteListDirectedField(io, edit);
846 template <int KIND>
847 RT_API_ATTRS bool EditRealInput(
848 IoStatementState &io, const DataEdit &edit, void *n) {
849 switch (edit.descriptor) {
850 case DataEdit::ListDirected:
851 if (IsNamelistNameOrSlash(io)) {
852 return false;
854 return EditCommonRealInput<KIND>(io, edit, n);
855 case DataEdit::ListDirectedRealPart:
856 case DataEdit::ListDirectedImaginaryPart:
857 case 'F':
858 case 'E': // incl. EN, ES, & EX
859 case 'D':
860 case 'G':
861 return EditCommonRealInput<KIND>(io, edit, n);
862 case 'B':
863 return EditBOZInput<1>(io, edit, n,
864 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
865 case 'O':
866 return EditBOZInput<3>(io, edit, n,
867 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
868 case 'Z':
869 return EditBOZInput<4>(io, edit, n,
870 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
871 case 'A': // legacy extension
872 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
873 default:
874 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
875 "Data edit descriptor '%c' may not be used for REAL input",
876 edit.descriptor);
877 return false;
881 // 13.7.3 in Fortran 2018
882 RT_API_ATTRS bool EditLogicalInput(
883 IoStatementState &io, const DataEdit &edit, bool &x) {
884 switch (edit.descriptor) {
885 case DataEdit::ListDirected:
886 if (IsNamelistNameOrSlash(io)) {
887 return false;
889 break;
890 case 'L':
891 case 'G':
892 break;
893 default:
894 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
895 "Data edit descriptor '%c' may not be used for LOGICAL input",
896 edit.descriptor);
897 return false;
899 Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
900 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
901 if (next && *next == '.') { // skip optional period
902 next = io.NextInField(remaining, edit);
904 if (!next) {
905 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
906 return false;
908 switch (*next) {
909 case 'T':
910 case 't':
911 x = true;
912 break;
913 case 'F':
914 case 'f':
915 x = false;
916 break;
917 default:
918 io.GetIoErrorHandler().SignalError(
919 "Bad character '%lc' in LOGICAL input field", *next);
920 return false;
922 if (remaining) { // ignore the rest of a fixed-width field
923 io.HandleRelativePosition(*remaining);
924 } else if (edit.descriptor == DataEdit::ListDirected) {
925 while (io.NextInField(remaining, edit)) { // discard rest of field
928 return CheckCompleteListDirectedField(io, edit);
931 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
932 template <typename CHAR>
933 static RT_API_ATTRS bool EditDelimitedCharacterInput(
934 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
935 bool result{true};
936 while (true) {
937 std::size_t byteCount{0};
938 auto ch{io.GetCurrentChar(byteCount)};
939 if (!ch) {
940 if (io.AdvanceRecord()) {
941 continue;
942 } else {
943 result = false; // EOF in character value
944 break;
947 io.HandleRelativePosition(byteCount);
948 if (*ch == delimiter) {
949 auto next{io.GetCurrentChar(byteCount)};
950 if (next && *next == delimiter) {
951 // Repeated delimiter: use as character value
952 io.HandleRelativePosition(byteCount);
953 } else {
954 break; // closing delimiter
957 if (length > 0) {
958 *x++ = *ch;
959 --length;
962 Fortran::runtime::fill_n(x, length, ' ');
963 return result;
966 template <typename CHAR>
967 static RT_API_ATTRS bool EditListDirectedCharacterInput(
968 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
969 std::size_t byteCount{0};
970 auto ch{io.GetCurrentChar(byteCount)};
971 if (ch && (*ch == '\'' || *ch == '"')) {
972 io.HandleRelativePosition(byteCount);
973 return EditDelimitedCharacterInput(io, x, length, *ch);
975 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
976 return false;
978 // Undelimited list-directed character input: stop at a value separator
979 // or the end of the current record. Subtlety: the "remaining" count
980 // here is a dummy that's used to avoid the interpretation of separators
981 // in NextInField.
982 Fortran::common::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
983 while (Fortran::common::optional<char32_t> next{
984 io.NextInField(remaining, edit)}) {
985 bool isSep{false};
986 switch (*next) {
987 case ' ':
988 case '\t':
989 case '/':
990 isSep = true;
991 break;
992 case '&':
993 case '$':
994 isSep = edit.IsNamelist();
995 break;
996 case ',':
997 isSep = !(edit.modes.editingFlags & decimalComma);
998 break;
999 case ';':
1000 isSep = !!(edit.modes.editingFlags & decimalComma);
1001 break;
1002 default:
1003 break;
1005 if (isSep) {
1006 remaining = 0;
1007 } else {
1008 *x++ = *next;
1009 remaining = --length > 0 ? maxUTF8Bytes : 0;
1012 Fortran::runtime::fill_n(x, length, ' ');
1013 return true;
1016 template <typename CHAR>
1017 RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
1018 CHAR *x, std::size_t lengthChars) {
1019 switch (edit.descriptor) {
1020 case DataEdit::ListDirected:
1021 return EditListDirectedCharacterInput(io, x, lengthChars, edit);
1022 case 'A':
1023 case 'G':
1024 break;
1025 case 'B':
1026 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
1027 case 'O':
1028 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
1029 case 'Z':
1030 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
1031 default:
1032 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1033 "Data edit descriptor '%c' may not be used with a CHARACTER data item",
1034 edit.descriptor);
1035 return false;
1037 const ConnectionState &connection{io.GetConnectionState()};
1038 std::size_t remainingChars{lengthChars};
1039 // Skip leading characters.
1040 // Their bytes don't count towards INQUIRE(IOLENGTH=).
1041 std::size_t skipChars{0};
1042 if (edit.width && *edit.width > 0) {
1043 remainingChars = *edit.width;
1044 if (remainingChars > lengthChars) {
1045 skipChars = remainingChars - lengthChars;
1048 // When the field is wider than the variable, we drop the leading
1049 // characters. When the variable is wider than the field, there can be
1050 // trailing padding or an EOR condition.
1051 const char *input{nullptr};
1052 std::size_t readyBytes{0};
1053 // Transfer payload bytes; these do count.
1054 while (remainingChars > 0) {
1055 if (readyBytes == 0) {
1056 readyBytes = io.GetNextInputBytes(input);
1057 if (readyBytes == 0 ||
1058 (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1059 if (io.CheckForEndOfRecord(readyBytes)) {
1060 if (readyBytes == 0) {
1061 // PAD='YES' and no more data
1062 Fortran::runtime::fill_n(x, lengthChars, ' ');
1063 return !io.GetIoErrorHandler().InError();
1064 } else {
1065 // Do partial read(s) then pad on last iteration
1067 } else {
1068 return !io.GetIoErrorHandler().InError();
1072 std::size_t chunkBytes;
1073 std::size_t chunkChars{1};
1074 bool skipping{skipChars > 0};
1075 if (connection.isUTF8) {
1076 chunkBytes = MeasureUTF8Bytes(*input);
1077 if (skipping) {
1078 --skipChars;
1079 } else if (auto ucs{DecodeUTF8(input)}) {
1080 if ((sizeof *x == 1 && *ucs > 0xff) ||
1081 (sizeof *x == 2 && *ucs > 0xffff)) {
1082 *x++ = '?';
1083 } else {
1084 *x++ = *ucs;
1086 --lengthChars;
1087 } else if (chunkBytes == 0) {
1088 // error recovery: skip bad encoding
1089 chunkBytes = 1;
1091 } else if (connection.internalIoCharKind > 1) {
1092 // Reading from non-default character internal unit
1093 chunkBytes = connection.internalIoCharKind;
1094 if (skipping) {
1095 --skipChars;
1096 } else {
1097 char32_t buffer{0};
1098 std::memcpy(&buffer, input, chunkBytes);
1099 if ((sizeof *x == 1 && buffer > 0xff) ||
1100 (sizeof *x == 2 && buffer > 0xffff)) {
1101 *x++ = '?';
1102 } else {
1103 *x++ = buffer;
1105 --lengthChars;
1107 } else if constexpr (sizeof *x > 1) {
1108 // Read single byte with expansion into multi-byte CHARACTER
1109 chunkBytes = 1;
1110 if (skipping) {
1111 --skipChars;
1112 } else {
1113 *x++ = static_cast<unsigned char>(*input);
1114 --lengthChars;
1116 } else { // single bytes -> default CHARACTER
1117 if (skipping) {
1118 chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
1119 chunkChars = chunkBytes;
1120 skipChars -= chunkChars;
1121 } else {
1122 chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
1123 chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
1124 chunkChars = chunkBytes;
1125 std::memcpy(x, input, chunkBytes);
1126 x += chunkBytes;
1127 lengthChars -= chunkChars;
1130 input += chunkBytes;
1131 remainingChars -= chunkChars;
1132 if (!skipping) {
1133 io.GotChar(chunkBytes);
1135 io.HandleRelativePosition(chunkBytes);
1136 readyBytes -= chunkBytes;
1138 // Pad the remainder of the input variable, if any.
1139 Fortran::runtime::fill_n(x, lengthChars, ' ');
1140 return CheckCompleteListDirectedField(io, edit);
1143 template RT_API_ATTRS bool EditRealInput<2>(
1144 IoStatementState &, const DataEdit &, void *);
1145 template RT_API_ATTRS bool EditRealInput<3>(
1146 IoStatementState &, const DataEdit &, void *);
1147 template RT_API_ATTRS bool EditRealInput<4>(
1148 IoStatementState &, const DataEdit &, void *);
1149 template RT_API_ATTRS bool EditRealInput<8>(
1150 IoStatementState &, const DataEdit &, void *);
1151 template RT_API_ATTRS bool EditRealInput<10>(
1152 IoStatementState &, const DataEdit &, void *);
1153 // TODO: double/double
1154 template RT_API_ATTRS bool EditRealInput<16>(
1155 IoStatementState &, const DataEdit &, void *);
1157 template RT_API_ATTRS bool EditCharacterInput(
1158 IoStatementState &, const DataEdit &, char *, std::size_t);
1159 template RT_API_ATTRS bool EditCharacterInput(
1160 IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1161 template RT_API_ATTRS bool EditCharacterInput(
1162 IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1164 RT_OFFLOAD_API_GROUP_END
1165 } // namespace Fortran::runtime::io