1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implements out-of-line member functions of template class FormatControl
11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
14 #include "emit-encoded.h"
18 #include "flang/Common/format.h"
19 #include "flang/Decimal/decimal.h"
20 #include "flang/Runtime/main.h"
25 namespace Fortran::runtime::io
{
27 template <typename CONTEXT
>
28 RT_API_ATTRS FormatControl
<CONTEXT
>::FormatControl(const Terminator
&terminator
,
29 const CharType
*format
, std::size_t formatLength
,
30 const Descriptor
*formatDescriptor
, int maxHeight
)
31 : maxHeight_
{static_cast<std::uint8_t>(maxHeight
)}, format_
{format
},
32 formatLength_
{static_cast<int>(formatLength
)} {
33 RUNTIME_CHECK(terminator
, maxHeight
== maxHeight_
);
34 if (!format
&& formatDescriptor
) {
35 // The format is a character array passed via a descriptor.
36 std::size_t elements
{formatDescriptor
->Elements()};
37 std::size_t elementBytes
{formatDescriptor
->ElementBytes()};
38 formatLength
= elements
* elementBytes
/ sizeof(CharType
);
39 formatLength_
= static_cast<int>(formatLength
);
40 if (formatDescriptor
->IsContiguous()) {
41 // Treat the contiguous array as a single character value.
42 format_
= const_cast<const CharType
*>(
43 reinterpret_cast<CharType
*>(formatDescriptor
->raw().base_addr
));
45 // Concatenate its elements into a temporary array.
46 char *p
{reinterpret_cast<char *>(
47 AllocateMemoryOrCrash(terminator
, formatLength
* sizeof(CharType
)))};
49 SubscriptValue at
[maxRank
];
50 formatDescriptor
->GetLowerBounds(at
);
51 for (std::size_t j
{0}; j
< elements
; ++j
) {
52 std::memcpy(p
, formatDescriptor
->Element
<char>(at
), elementBytes
);
54 formatDescriptor
->IncrementSubscripts(at
);
60 terminator
, formatLength
== static_cast<std::size_t>(formatLength_
));
61 stack_
[0].start
= offset_
;
62 stack_
[0].remaining
= Iteration::unlimited
; // 13.4(8)
65 template <typename CONTEXT
>
66 RT_API_ATTRS
int FormatControl
<CONTEXT
>::GetIntField(
67 IoErrorHandler
&handler
, CharType firstCh
, bool *hadError
) {
68 CharType ch
{firstCh
? firstCh
: PeekNext()};
69 bool negate
{ch
== '-'};
70 if (negate
|| ch
== '+') {
78 if (ch
< '0' || ch
> '9') {
79 handler
.SignalError(IostatErrorInFormat
,
80 "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch
));
87 while (ch
>= '0' && ch
<= '9') {
88 constexpr int tenth
{std::numeric_limits
<int>::max() / 10};
90 ch
- '0' > std::numeric_limits
<int>::max() - 10 * result
) {
92 IostatErrorInFormat
, "FORMAT integer field out of range");
98 result
= 10 * result
+ ch
- '0';
106 if (negate
&& (result
*= -1) > 0) {
108 IostatErrorInFormat
, "FORMAT integer field out of range");
117 template <typename CONTEXT
>
118 static RT_API_ATTRS
bool RelativeTabbing(CONTEXT
&context
, int n
) {
119 ConnectionState
&connection
{context
.GetConnectionState()};
120 if constexpr (std::is_same_v
<CONTEXT
,
121 ExternalFormattedIoStatementState
<Direction::Input
>> ||
122 std::is_same_v
<CONTEXT
,
123 ExternalFormattedIoStatementState
<Direction::Output
>>) {
124 if (n
!= 0 && connection
.isUTF8
) {
126 if (n
> 0) { // Xn or TRn
127 // Skip 'n' multi-byte characters. If that's more than are in the
128 // current record, that's valid -- the program can position past the
129 // end and then reposition back with Tn or TLn.
130 std::size_t bytesLeft
{context
.ViewBytesInRecord(p
, true)};
131 for (; n
> 0 && bytesLeft
&& p
; --n
) {
132 std::size_t byteCount
{MeasureUTF8Bytes(*p
)};
133 if (byteCount
> bytesLeft
) {
136 context
.HandleRelativePosition(byteCount
);
137 bytesLeft
-= byteCount
;
138 // Don't call GotChar(byteCount), these don't count towards SIZE=
141 } else { // n < 0: TLn
143 if (std::int64_t excess
{connection
.positionInRecord
-
144 connection
.recordLength
.value_or(connection
.positionInRecord
)};
146 // Have tabbed past the end of the record
148 context
.HandleRelativePosition(-n
);
151 context
.HandleRelativePosition(-excess
);
154 std::size_t bytesLeft
{context
.ViewBytesInRecord(p
, false)};
155 // Go back 'n' multi-byte characters.
156 for (; n
> 0 && bytesLeft
&& p
; --n
) {
157 std::size_t byteCount
{MeasurePreviousUTF8Bytes(p
, bytesLeft
)};
158 context
.HandleRelativePosition(-byteCount
);
159 bytesLeft
-= byteCount
;
165 if (connection
.internalIoCharKind
> 1) {
166 n
*= connection
.internalIoCharKind
;
168 context
.HandleRelativePosition(n
);
173 template <typename CONTEXT
>
174 static RT_API_ATTRS
bool AbsoluteTabbing(CONTEXT
&context
, int n
) {
175 ConnectionState
&connection
{context
.GetConnectionState()};
176 n
= n
> 0 ? n
- 1 : 0; // convert 1-based position to 0-based offset
177 if constexpr (std::is_same_v
<CONTEXT
,
178 ExternalFormattedIoStatementState
<Direction::Input
>> ||
179 std::is_same_v
<CONTEXT
,
180 ExternalFormattedIoStatementState
<Direction::Output
>>) {
181 if (connection
.isUTF8
) {
182 // Reset to the beginning of the record, then TR(n-1)
183 connection
.HandleAbsolutePosition(0);
184 return RelativeTabbing(context
, n
);
187 if (connection
.internalIoCharKind
> 1) {
188 n
*= connection
.internalIoCharKind
;
190 context
.HandleAbsolutePosition(n
);
194 template <typename CONTEXT
>
195 static RT_API_ATTRS
void HandleControl(
196 CONTEXT
&context
, char ch
, char next
, int n
) {
197 MutableModes
&modes
{context
.mutableModes()};
201 modes
.editingFlags
|= blankZero
;
205 modes
.editingFlags
&= ~blankZero
;
211 modes
.editingFlags
|= decimalComma
;
215 modes
.editingFlags
&= ~decimalComma
;
221 modes
.scale
= n
; // kP - decimal scaling by 10**k
228 modes
.round
= decimal::RoundNearest
;
231 modes
.round
= decimal::RoundToZero
;
234 modes
.round
= decimal::RoundUp
;
237 modes
.round
= decimal::RoundDown
;
240 modes
.round
= decimal::RoundCompatible
;
243 modes
.round
= executionEnvironment
.defaultOutputRoundingMode
;
250 if (!next
&& RelativeTabbing(context
, n
)) {
256 modes
.editingFlags
|= signPlus
;
259 if (!next
|| next
== 'S') {
260 modes
.editingFlags
&= ~signPlus
;
266 if (AbsoluteTabbing(context
, n
)) {
269 } else if (next
== 'R' || next
== 'L') { // TRn / TLn
270 if (RelativeTabbing(context
, next
== 'L' ? -n
: n
)) {
279 context
.SignalError(IostatErrorInFormat
,
280 "Unknown '%c%c' edit descriptor in FORMAT", ch
, next
);
283 IostatErrorInFormat
, "Unknown '%c' edit descriptor in FORMAT", ch
);
287 // Locates the next data edit descriptor in the format.
288 // Handles all repetition counts and control edit descriptors.
289 // Generally assumes that the format string has survived the common
290 // format validator gauntlet.
291 template <typename CONTEXT
>
292 RT_API_ATTRS
int FormatControl
<CONTEXT
>::CueUpNextDataEdit(
293 Context
&context
, bool stop
) {
294 bool hitUnlimitedLoopEnd
{false};
295 // Do repetitions remain on an unparenthesized data edit?
296 while (height_
> 1 && format_
[stack_
[height_
- 1].start
] != '(') {
297 offset_
= stack_
[height_
- 1].start
;
298 int repeat
{stack_
[height_
- 1].remaining
};
305 Fortran::common::optional
<int> repeat
;
306 bool unlimited
{false};
307 auto maybeReversionPoint
{offset_
};
308 CharType ch
{GetNextChar(context
)};
309 while (ch
== ',' || ch
== ':') {
310 // Skip commas, and don't complain if they're missing; the format
311 // validator does that.
312 if (stop
&& ch
== ':') {
315 ch
= GetNextChar(context
);
317 if (ch
== '-' || ch
== '+' || (ch
>= '0' && ch
<= '9')) {
318 bool hadSign
{ch
== '-' || ch
== '+'};
319 repeat
= GetIntField(context
, ch
);
320 ch
= GetNextChar(context
);
321 if (hadSign
&& ch
!= 'p' && ch
!= 'P') {
322 ReportBadFormat(context
,
323 "Invalid FORMAT: signed integer may appear only before 'P",
324 maybeReversionPoint
);
327 } else if (ch
== '*') {
329 ch
= GetNextChar(context
);
331 ReportBadFormat(context
,
332 "Invalid FORMAT: '*' may appear only before '('",
333 maybeReversionPoint
);
337 ReportBadFormat(context
,
338 "Invalid FORMAT: '*' must be nested in exactly one set of "
340 maybeReversionPoint
);
346 if (height_
>= maxHeight_
) {
347 ReportBadFormat(context
,
348 "FORMAT stack overflow: too many nested parentheses",
349 maybeReversionPoint
);
352 stack_
[height_
].start
= offset_
- 1; // the '('
353 RUNTIME_CHECK(context
, format_
[stack_
[height_
].start
] == '(');
354 if (unlimited
|| height_
== 0) {
355 stack_
[height_
].remaining
= Iteration::unlimited
;
358 *repeat
= 1; // error recovery
360 stack_
[height_
].remaining
= *repeat
- 1;
362 stack_
[height_
].remaining
= 0;
364 if (height_
== 1 && !hitEnd_
) {
365 // Subtle point (F'2018 13.4 para 9): the last parenthesized group
366 // at height 1 becomes the restart point after control reaches the
367 // end of the format, including its repeat count.
368 stack_
[0].start
= maybeReversionPoint
;
371 } else if (height_
== 0) {
372 ReportBadFormat(context
, "FORMAT lacks initial '('", maybeReversionPoint
);
374 } else if (ch
== ')') {
378 return 0; // end of FORMAT and no data items remain
380 context
.AdvanceRecord(); // implied / before rightmost )
382 auto restart
{stack_
[height_
- 1].start
};
383 if (format_
[restart
] == '(') {
386 if (stack_
[height_
- 1].remaining
== Iteration::unlimited
) {
387 if (height_
> 1 && GetNextChar(context
) != ')') {
388 ReportBadFormat(context
,
389 "Unlimited repetition in FORMAT may not be followed by more "
394 if (hitUnlimitedLoopEnd
) {
395 ReportBadFormat(context
,
396 "Unlimited repetition in FORMAT lacks data edit descriptors",
400 hitUnlimitedLoopEnd
= true;
402 } else if (stack_
[height_
- 1].remaining
-- > 0) {
407 } else if (ch
== '\'' || ch
== '"') {
408 // Quoted 'character literal'
411 while (offset_
< formatLength_
&& format_
[offset_
] != quote
) {
414 if (offset_
>= formatLength_
) {
415 ReportBadFormat(context
,
416 "FORMAT missing closing quote on character literal",
417 maybeReversionPoint
);
422 static_cast<std::size_t>(&format_
[offset_
] - &format_
[start
])};
423 if (offset_
< formatLength_
&& format_
[offset_
] == quote
) {
424 // subtle: handle doubled quote character in a literal by including
425 // the first in the output, then treating the second as the start
426 // of another character literal.
430 EmitAscii(context
, format_
+ start
, chars
);
431 } else if (ch
== 'H') {
433 if (!repeat
|| *repeat
< 1 || offset_
+ *repeat
> formatLength_
) {
434 ReportBadFormat(context
, "Invalid width on Hollerith in FORMAT",
435 maybeReversionPoint
);
438 EmitAscii(context
, format_
+ offset_
, static_cast<std::size_t>(*repeat
));
440 } else if (ch
>= 'A' && ch
<= 'Z') {
441 int start
{offset_
- 1};
443 if (ch
!= 'P') { // 1PE5.2 - comma not required (C1302)
444 CharType peek
{Capitalize(PeekNext())};
445 if (peek
>= 'A' && peek
<= 'Z') {
446 if ((ch
== 'A' && peek
== 'T' /* anticipate F'202X AT editing */) ||
447 ch
== 'B' || ch
== 'D' || ch
== 'E' || ch
== 'R' || ch
== 'S' ||
449 // Assume a two-letter edit descriptor
453 // extension: assume a comma between 'ch' and 'peek'
458 (ch
== 'A' || ch
== 'I' || ch
== 'B' || ch
== 'E' || ch
== 'D' ||
459 ch
== 'O' || ch
== 'Z' || ch
== 'F' || ch
== 'G' ||
461 (ch
== 'E' && (next
== 'N' || next
== 'S' || next
== 'X')) ||
462 (ch
== 'D' && next
== 'T')) {
463 // Data edit descriptor found
465 return repeat
&& *repeat
> 0 ? *repeat
: 1;
467 // Control edit descriptor
468 if (ch
== 'T') { // Tn, TLn, TRn
469 repeat
= GetIntField(context
);
471 HandleControl(context
, static_cast<char>(ch
), static_cast<char>(next
),
472 repeat
? *repeat
: 1);
474 } else if (ch
== '/') {
475 context
.AdvanceRecord(repeat
&& *repeat
> 0 ? *repeat
: 1);
476 } else if (ch
== '$' || ch
== '\\') {
477 context
.mutableModes().nonAdvancing
= true;
478 } else if (ch
== '\t' || ch
== '\v') {
480 // TODO: any other raw characters?
481 EmitAscii(context
, format_
+ offset_
- 1, 1);
484 context
, "Invalid character in FORMAT", maybeReversionPoint
);
490 // Returns the next data edit descriptor
491 template <typename CONTEXT
>
492 RT_API_ATTRS
Fortran::common::optional
<DataEdit
>
493 FormatControl
<CONTEXT
>::GetNextDataEdit(Context
&context
, int maxRepeat
) {
494 int repeat
{CueUpNextDataEdit(context
)};
497 edit
.modes
= context
.mutableModes();
498 // Handle repeated nonparenthesized edit descriptors
499 edit
.repeat
= std::min(repeat
, maxRepeat
); // 0 if maxRepeat==0
500 if (repeat
> maxRepeat
) {
501 stack_
[height_
].start
= start
; // after repeat count
502 stack_
[height_
].remaining
= repeat
- edit
.repeat
;
505 edit
.descriptor
= static_cast<char>(Capitalize(GetNextChar(context
)));
506 if (edit
.descriptor
== 'D' && Capitalize(PeekNext()) == 'T') {
507 // DT['iotype'][(v_list)] defined I/O
508 edit
.descriptor
= DataEdit::DefinedDerivedType
;
510 if (auto quote
{static_cast<char>(PeekNext())};
511 quote
== '\'' || quote
== '"') {
512 // Capture the quoted 'iotype'
514 for (++offset_
; offset_
< formatLength_
;) {
515 auto ch
{static_cast<char>(format_
[offset_
++])};
517 (offset_
== formatLength_
||
518 static_cast<char>(format_
[offset_
]) != quote
)) {
520 break; // that was terminating quote
522 if (edit
.ioTypeChars
>= edit
.maxIoTypeChars
) {
523 ReportBadFormat(context
, "Excessive DT'iotype' in FORMAT", start
);
524 return Fortran::common::nullopt
;
526 edit
.ioType
[edit
.ioTypeChars
++] = ch
;
532 ReportBadFormat(context
, "Unclosed DT'iotype' in FORMAT", start
);
533 return Fortran::common::nullopt
;
536 if (PeekNext() == '(') {
537 // Capture the v_list arguments
539 for (++offset_
; offset_
< formatLength_
;) {
540 bool hadError
{false};
541 int n
{GetIntField(context
, '\0', &hadError
)};
546 if (edit
.vListEntries
>= edit
.maxVListEntries
) {
547 ReportBadFormat(context
, "Excessive DT(v_list) in FORMAT", start
);
548 return Fortran::common::nullopt
;
550 edit
.vList
[edit
.vListEntries
++] = n
;
551 auto ch
{static_cast<char>(GetNextChar(context
))};
558 ReportBadFormat(context
, "Unclosed DT(v_list) in FORMAT", start
);
559 return Fortran::common::nullopt
;
562 } else { // not DT'iotype'
563 if (edit
.descriptor
== 'E') {
564 if (auto next
{static_cast<char>(Capitalize(PeekNext()))};
565 next
== 'N' || next
== 'S' || next
== 'X') {
566 edit
.variation
= next
;
570 // Width is optional for A[w] in the standard and optional
571 // for Lw in most compilers.
572 // Intel & (presumably, from bug report) Fujitsu allow
573 // a missing 'w' & 'd'/'m' for other edit descriptors -- but not
574 // 'd'/'m' with a missing 'w' -- and so interpret "(E)" as "(E0)".
575 if (CharType ch
{PeekNext()}; (ch
>= '0' && ch
<= '9') || ch
== '.') {
576 edit
.width
= GetIntField(context
);
577 if constexpr (std::is_base_of_v
<InputStatementState
, CONTEXT
>) {
578 if (edit
.width
.value_or(-1) == 0) {
579 ReportBadFormat(context
, "Input field width is zero", start
);
582 if (PeekNext() == '.') {
584 edit
.digits
= GetIntField(context
);
585 if (CharType ch
{PeekNext()};
586 ch
== 'e' || ch
== 'E' || ch
== 'd' || ch
== 'D') {
588 edit
.expoDigits
= GetIntField(context
);
596 template <typename CONTEXT
>
597 RT_API_ATTRS
void FormatControl
<CONTEXT
>::Finish(Context
&context
) {
598 CueUpNextDataEdit(context
, true /* stop at colon or end of FORMAT */);
600 FreeMemory(const_cast<CharType
*>(format_
));
603 } // namespace Fortran::runtime::io
604 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_