1 //===-- lib/Parser/token-parsers.h ------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef FORTRAN_PARSER_TOKEN_PARSERS_H_
10 #define FORTRAN_PARSER_TOKEN_PARSERS_H_
12 // These parsers are driven by the parsers of the Fortran grammar to consume
13 // the prescanned character stream and recognize context-sensitive tokens.
15 #include "basic-parsers.h"
16 #include "type-parsers.h"
17 #include "flang/Common/idioms.h"
18 #include "flang/Parser/char-set.h"
19 #include "flang/Parser/characters.h"
20 #include "flang/Parser/instrumented-parser.h"
21 #include "flang/Parser/provenance.h"
30 namespace Fortran::parser
{
32 // "xyz"_ch matches one instance of the characters x, y, or z without skipping
33 // any spaces before or after. The parser returns the location of the character
37 using resultType
= const char *;
38 constexpr AnyOfChars(const AnyOfChars
&) = default;
39 constexpr AnyOfChars(SetOfChars set
) : set_
{set
} {}
40 std::optional
<const char *> Parse(ParseState
&state
) const {
41 if (std::optional
<const char *> at
{state
.PeekAtNextChar()}) {
43 state
.UncheckedAdvance();
44 state
.set_anyTokenMatched();
48 state
.Say(MessageExpectedText
{set_
});
53 const SetOfChars set_
;
56 constexpr AnyOfChars
operator""_ch(const char str
[], std::size_t n
) {
57 return AnyOfChars
{SetOfChars(str
, n
)};
60 constexpr auto letter
{"abcdefghijklmnopqrstuvwxyz"_ch
};
61 constexpr auto digit
{"0123456789"_ch
};
63 // Skips over optional spaces. Always succeeds.
65 using resultType
= Success
;
67 static std::optional
<Success
> Parse(ParseState
&state
) {
68 while (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
72 state
.UncheckedAdvance();
77 constexpr Space space
;
79 // Skips a space that in free form requires a warning if it precedes a
80 // character that could begin an identifier or keyword. Always succeeds.
81 inline void MissingSpace(ParseState
&state
) {
82 if (!state
.inFixedForm()) {
84 LanguageFeature::OptionalFreeFormSpace
, "missing space"_port_en_US
);
89 using resultType
= Success
;
90 constexpr SpaceCheck() {}
91 static std::optional
<Success
> Parse(ParseState
&state
) {
92 if (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
95 state
.UncheckedAdvance();
96 return space
.Parse(state
);
98 if (IsLegalInIdentifier(ch
)) {
105 constexpr SpaceCheck spaceCheck
;
107 // Matches a token string. Spaces in the token string denote where
108 // spaces may appear in the source; they can be made mandatory for
109 // some free form keyword sequences. Missing mandatory spaces in free
110 // form elicit a warning; they are not necessary for recognition.
111 // Spaces before and after the token are also skipped.
113 // Token strings appear in the grammar as C++ user-defined literals
114 // like "BIND ( C )"_tok and "SYNC ALL"_sptok. The _tok suffix is implied
115 // when a string literal appears before the sequencing operator >> or
116 // after the sequencing operator /. The literal "..."_id parses a
117 // token that cannot be a prefix of a longer identifier.
118 template <bool MandatoryFreeFormSpace
= false, bool MustBeComplete
= false>
119 class TokenStringMatch
{
121 using resultType
= Success
;
122 constexpr TokenStringMatch(const TokenStringMatch
&) = default;
123 constexpr TokenStringMatch(const char *str
, std::size_t n
)
124 : str_
{str
}, bytes_
{n
} {}
125 explicit constexpr TokenStringMatch(const char *str
) : str_
{str
} {}
126 std::optional
<Success
> Parse(ParseState
&state
) const {
128 const char *start
{state
.GetLocation()};
130 std::optional
<const char *> at
; // initially empty
131 for (std::size_t j
{0}; j
< bytes_
&& *p
!= '\0'; ++j
, ++p
) {
132 bool spaceSkipping
{*p
== ' '};
134 if (j
+ 1 == bytes_
|| p
[1] == ' ' || p
[1] == '\0') {
135 continue; // redundant; ignore
139 at
= nextCh
.Parse(state
);
146 at
= nextCh
.Parse(state
);
150 } else if constexpr (MandatoryFreeFormSpace
) {
153 // 'at' remains full for next iteration
154 } else if (**at
== ToLowerCaseLetter(*p
)) {
157 state
.Say(start
, MessageExpectedText
{str_
, bytes_
});
161 if constexpr (MustBeComplete
) {
162 if (auto after
{state
.PeekAtNextChar()}) {
163 if (IsLegalInIdentifier(**after
)) {
164 state
.Say(start
, MessageExpectedText
{str_
, bytes_
});
169 state
.set_anyTokenMatched();
170 if (IsLegalInIdentifier(p
[-1])) {
171 return spaceCheck
.Parse(state
);
173 return space
.Parse(state
);
178 const char *const str_
;
179 const std::size_t bytes_
{std::string::npos
};
182 constexpr TokenStringMatch
<> operator""_tok(const char str
[], std::size_t n
) {
186 constexpr TokenStringMatch
<true> operator""_sptok(
187 const char str
[], std::size_t n
) {
191 constexpr TokenStringMatch
<false, true> operator""_id(
192 const char str
[], std::size_t n
) {
197 inline constexpr std::enable_if_t
<std::is_class_v
<PA
>,
198 SequenceParser
<TokenStringMatch
<>, PA
>>
199 operator>>(const char *str
, const PA
&p
) {
200 return SequenceParser
<TokenStringMatch
<>, PA
>{TokenStringMatch
<>{str
}, p
};
204 inline constexpr std::enable_if_t
<std::is_class_v
<PA
>,
205 FollowParser
<PA
, TokenStringMatch
<>>>
206 operator/(const PA
&p
, const char *str
) {
207 return FollowParser
<PA
, TokenStringMatch
<>>{p
, TokenStringMatch
<>{str
}};
210 template <class PA
> inline constexpr auto parenthesized(const PA
&p
) {
211 return "(" >> p
/ ")";
214 template <class PA
> inline constexpr auto bracketed(const PA
&p
) {
215 return "[" >> p
/ "]";
218 // Quoted character literal constants.
219 struct CharLiteralChar
{
220 using resultType
= std::pair
<char, bool /* was escaped */>;
221 static std::optional
<resultType
> Parse(ParseState
&state
) {
222 auto at
{state
.GetLocation()};
223 if (std::optional
<const char *> cp
{nextCh
.Parse(state
)}) {
226 state
.Say(CharBlock
{at
, state
.GetLocation()},
227 "Unclosed character constant"_err_en_US
);
231 // Most escape sequences in character literals are processed later,
232 // but we have to look for quotes here so that doubled quotes work.
233 if (std::optional
<const char *> next
{state
.PeekAtNextChar()}) {
234 char escaped
{**next
};
235 if (escaped
== '\'' || escaped
== '"' || escaped
== '\\') {
236 state
.UncheckedAdvance();
237 return std::make_pair(escaped
, true);
241 return std::make_pair(ch
, false);
247 template <char quote
> struct CharLiteral
{
248 using resultType
= std::string
;
249 static std::optional
<std::string
> Parse(ParseState
&state
) {
251 static constexpr auto nextch
{attempt(CharLiteralChar
{})};
252 while (auto ch
{nextch
.Parse(state
)}) {
255 } else if (ch
->first
== quote
) {
256 static constexpr auto doubled
{attempt(AnyOfChars
{SetOfChars
{quote
}})};
257 if (!doubled
.Parse(state
)) {
267 // Parse "BOZ" binary literal quoted constants.
268 // As extensions, support X as an alternate hexadecimal marker, and allow
269 // BOZX markers to appear as suffixes.
271 using resultType
= std::string
;
272 static std::optional
<resultType
> Parse(ParseState
&state
) {
274 auto baseChar
{[&base
](char ch
) -> bool {
290 const char *start
{state
.GetLocation()};
291 std::optional
<const char *> at
{nextCh
.Parse(state
)};
296 !state
.IsNonstandardOk(LanguageFeature::BOZExtensions
,
297 "nonstandard BOZ literal"_port_en_US
)) {
300 if (baseChar(**at
)) {
301 at
= nextCh
.Parse(state
);
308 if (quote
!= '\'' && quote
!= '"') {
314 at
= nextCh
.Parse(state
);
324 if (!IsHexadecimalDigit(**at
)) {
327 content
+= ToLowerCaseLetter(**at
);
331 // extension: base allowed to appear as suffix, too
332 if (!(at
= nextCh
.Parse(state
)) || !baseChar(**at
) ||
333 !state
.IsNonstandardOk(LanguageFeature::BOZExtensions
,
334 "nonstandard BOZ literal"_port_en_US
)) {
337 spaceCheck
.Parse(state
);
340 if (content
.empty()) {
341 state
.Say(start
, "no digit in BOZ literal"_err_en_US
);
344 return {std::string
{base
} + '"' + content
+ '"'};
348 // R711 digit-string -> digit [digit]...
349 // N.B. not a token -- no space is skipped
351 using resultType
= CharBlock
;
352 static std::optional
<resultType
> Parse(ParseState
&state
) {
353 if (std::optional
<const char *> ch1
{state
.PeekAtNextChar()}) {
354 if (IsDecimalDigit(**ch1
)) {
355 state
.UncheckedAdvance();
356 while (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
357 if (!IsDecimalDigit(**p
)) {
360 state
.UncheckedAdvance();
362 return CharBlock
{*ch1
, state
.GetLocation()};
368 constexpr DigitString digitString
;
370 struct SignedIntLiteralConstantWithoutKind
{
371 using resultType
= CharBlock
;
372 static std::optional
<resultType
> Parse(ParseState
&state
) {
373 resultType result
{state
.GetLocation()};
374 static constexpr auto sign
{maybe("+-"_ch
/ space
)};
375 if (sign
.Parse(state
)) {
376 if (auto digits
{digitString
.Parse(state
)}) {
377 result
.ExtendToCover(*digits
);
385 struct DigitString64
{
386 using resultType
= std::uint64_t;
387 static std::optional
<std::uint64_t> Parse(ParseState
&state
) {
388 std::optional
<const char *> firstDigit
{digit
.Parse(state
)};
392 std::uint64_t value
= **firstDigit
- '0';
393 bool overflow
{false};
394 static constexpr auto getDigit
{attempt(digit
)};
395 while (auto nextDigit
{getDigit
.Parse(state
)}) {
396 if (value
> std::numeric_limits
<std::uint64_t>::max() / 10) {
400 int digitValue
= **nextDigit
- '0';
401 if (value
> std::numeric_limits
<std::uint64_t>::max() - digitValue
) {
407 state
.Say(*firstDigit
, "overflow in decimal literal"_err_en_US
);
412 constexpr DigitString64 digitString64
;
414 // R707 signed-int-literal-constant -> [sign] int-literal-constant
415 // N.B. Spaces are consumed before and after the sign, since the sign
416 // and the int-literal-constant are distinct tokens. Does not
417 // handle a trailing kind parameter.
418 static std::optional
<std::int64_t> SignedInteger(
419 const std::optional
<std::uint64_t> &x
, Location at
, bool negate
,
424 std::uint64_t limit
{std::numeric_limits
<std::int64_t>::max()};
426 limit
= -(limit
+ 1);
429 state
.Say(at
, "overflow in signed decimal literal"_err_en_US
);
431 std::int64_t value
= *x
;
432 return std::make_optional
<std::int64_t>(negate
? -value
: value
);
435 // R710 signed-digit-string -> [sign] digit-string
436 // N.B. Not a complete token -- no space is skipped.
437 // Used only in the exponent parts of real literal constants.
438 struct SignedDigitString
{
439 using resultType
= std::int64_t;
440 static std::optional
<std::int64_t> Parse(ParseState
&state
) {
441 std::optional
<const char *> sign
{state
.PeekAtNextChar()};
445 bool negate
{**sign
== '-'};
446 if (negate
|| **sign
== '+') {
447 state
.UncheckedAdvance();
449 return SignedInteger(digitString64
.Parse(state
), *sign
, negate
, state
);
453 // Variants of the above for use in FORMAT specifications, where spaces
455 struct DigitStringIgnoreSpaces
{
456 using resultType
= std::uint64_t;
457 static std::optional
<std::uint64_t> Parse(ParseState
&state
) {
458 static constexpr auto getFirstDigit
{space
>> digit
};
459 std::optional
<const char *> firstDigit
{getFirstDigit
.Parse(state
)};
463 std::uint64_t value
= **firstDigit
- '0';
464 bool overflow
{false};
465 static constexpr auto getDigit
{space
>> attempt(digit
)};
466 while (auto nextDigit
{getDigit
.Parse(state
)}) {
467 if (value
> std::numeric_limits
<std::uint64_t>::max() / 10) {
471 int digitValue
= **nextDigit
- '0';
472 if (value
> std::numeric_limits
<std::uint64_t>::max() - digitValue
) {
478 state
.Say(*firstDigit
, "overflow in decimal literal"_err_en_US
);
484 struct PositiveDigitStringIgnoreSpaces
{
485 using resultType
= std::int64_t;
486 static std::optional
<std::int64_t> Parse(ParseState
&state
) {
487 Location at
{state
.GetLocation()};
488 return SignedInteger(
489 DigitStringIgnoreSpaces
{}.Parse(state
), at
, false /*positive*/, state
);
493 struct SignedDigitStringIgnoreSpaces
{
494 using resultType
= std::int64_t;
495 static std::optional
<std::int64_t> Parse(ParseState
&state
) {
496 static constexpr auto getSign
{space
>> attempt("+-"_ch
)};
498 if (std::optional
<const char *> sign
{getSign
.Parse(state
)}) {
499 negate
= **sign
== '-';
501 Location at
{state
.GetLocation()};
502 return SignedInteger(
503 DigitStringIgnoreSpaces
{}.Parse(state
), at
, negate
, state
);
507 // Legacy feature: Hollerith literal constants
508 struct HollerithLiteral
{
509 using resultType
= std::string
;
510 static std::optional
<std::string
> Parse(ParseState
&state
) {
512 const char *start
{state
.GetLocation()};
513 std::optional
<std::uint64_t> charCount
{
514 DigitStringIgnoreSpaces
{}.Parse(state
)};
515 if (!charCount
|| *charCount
< 1) {
518 static constexpr auto letterH
{"h"_ch
};
519 std::optional
<const char *> h
{letterH
.Parse(state
)};
524 for (auto j
{*charCount
}; j
-- > 0;) {
525 int chBytes
{UTF_8CharacterBytes(state
.GetLocation())};
526 for (int bytes
{chBytes
}; bytes
> 0; --bytes
) {
527 if (std::optional
<const char *> at
{nextCh
.Parse(state
)}) {
528 if (chBytes
== 1 && !IsPrintable(**at
)) {
529 state
.Say(start
, "Bad character in Hollerith"_err_en_US
);
534 state
.Say(start
, "Insufficient characters in Hollerith"_err_en_US
);
543 struct ConsumedAllInputParser
{
544 using resultType
= Success
;
545 constexpr ConsumedAllInputParser() {}
546 static inline std::optional
<Success
> Parse(ParseState
&state
) {
547 if (state
.IsAtEnd()) {
553 constexpr ConsumedAllInputParser consumedAllInput
;
555 template <char goal
> struct SkipPast
{
556 using resultType
= Success
;
557 constexpr SkipPast() {}
558 constexpr SkipPast(const SkipPast
&) {}
559 static std::optional
<Success
> Parse(ParseState
&state
) {
560 while (std::optional
<const char *> p
{state
.GetNextChar()}) {
563 } else if (**p
== '\n') {
571 template <char goal
> struct SkipTo
{
572 using resultType
= Success
;
573 constexpr SkipTo() {}
574 constexpr SkipTo(const SkipTo
&) {}
575 static std::optional
<Success
> Parse(ParseState
&state
) {
576 while (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
579 } else if (**p
== '\n') {
582 state
.UncheckedAdvance();
589 template <char left
, char right
> struct SkipPastNested
{
590 using resultType
= Success
;
591 constexpr SkipPastNested() {}
592 constexpr SkipPastNested(const SkipPastNested
&) {}
593 static std::optional
<Success
> Parse(ParseState
&state
) {
595 while (std::optional
<const char *> p
{state
.GetNextChar()}) {
600 } else if (**p
== left
) {
602 } else if (**p
== '\n') {
610 // A common idiom in the Fortran grammar is an optional item (usually
611 // a nonempty comma-separated list) that, if present, must follow a comma
612 // and precede a doubled colon. When the item is absent, the comma must
613 // not appear, and the doubled colons are optional.
614 // [[, xyz] ::] is optionalBeforeColons(xyz)
615 // [[, xyz]... ::] is optionalBeforeColons(nonemptyList(xyz))
616 template <typename PA
> inline constexpr auto optionalBeforeColons(const PA
&p
) {
617 using resultType
= std::optional
<typename
PA::resultType
>;
618 return "," >> construct
<resultType
>(p
) / "::" ||
619 ("::"_tok
|| !","_tok
) >> pure
<resultType
>();
621 template <typename PA
>
622 inline constexpr auto optionalListBeforeColons(const PA
&p
) {
623 using resultType
= std::list
<typename
PA::resultType
>;
624 return "," >> nonemptyList(p
) / "::" ||
625 ("::"_tok
|| !","_tok
) >> pure
<resultType
>();
628 // Skip over empty lines, leading spaces, and some compiler directives (viz.,
629 // the ones that specify the source form) that might appear before the
630 // next statement. Skip over empty statements (bare semicolons) when
631 // not in strict standard conformance mode. Always succeeds.
632 struct SkipStuffBeforeStatement
{
633 using resultType
= Success
;
634 static std::optional
<Success
> Parse(ParseState
&state
) {
635 if (UserState
* ustate
{state
.userState()}) {
636 if (ParsingLog
* log
{ustate
->log()}) {
637 // Save memory: vacate the parsing log before each statement unless
638 // we're logging the whole parse for debugging.
639 if (!ustate
->instrumentedParse()) {
644 while (std::optional
<const char *> at
{state
.PeekAtNextChar()}) {
645 if (**at
== '\n' || **at
== ' ') {
646 state
.UncheckedAdvance();
647 } else if (**at
== '!') {
648 static const char fixed
[] = "!dir$ fixed\n", free
[] = "!dir$ free\n";
649 static constexpr std::size_t fixedBytes
{sizeof fixed
- 1};
650 static constexpr std::size_t freeBytes
{sizeof free
- 1};
651 std::size_t remain
{state
.BytesRemaining()};
652 if (remain
>= fixedBytes
&& std::memcmp(*at
, fixed
, fixedBytes
) == 0) {
653 state
.set_inFixedForm(true).UncheckedAdvance(fixedBytes
);
654 } else if (remain
>= freeBytes
&&
655 std::memcmp(*at
, free
, freeBytes
) == 0) {
656 state
.set_inFixedForm(false).UncheckedAdvance(freeBytes
);
660 } else if (**at
== ';' &&
661 state
.IsNonstandardOk(
662 LanguageFeature::EmptyStatement
, "empty statement"_port_en_US
)) {
663 state
.UncheckedAdvance();
671 constexpr SkipStuffBeforeStatement skipStuffBeforeStatement
;
673 // R602 underscore -> _
674 constexpr auto underscore
{"_"_ch
};
676 // Characters besides letters and digits that may appear in names.
677 // N.B. Don't accept an underscore if it is immediately followed by a
678 // quotation mark, so that kindParam_"character literal" is parsed properly.
679 // PGI and ifort accept '$' in identifiers, even as the initial character.
680 // Cray and gfortran accept '$', but not as the first character.
681 // Cray accepts '@' as well.
682 constexpr auto otherIdChar
{underscore
/ !"'\""_ch
||
683 extension
<LanguageFeature::PunctuationInNames
>(
684 "nonstandard usage: punctuation in name"_port_en_US
, "$@"_ch
)};
686 constexpr auto logicalTRUE
{
688 extension
<LanguageFeature::LogicalAbbreviations
>(
689 "nonstandard usage: .T. spelling of .TRUE."_port_en_US
,
692 constexpr auto logicalFALSE
{
694 extension
<LanguageFeature::LogicalAbbreviations
>(
695 "nonstandard usage: .F. spelling of .FALSE."_port_en_US
,
699 // deprecated: Hollerith literals
700 constexpr auto rawHollerithLiteral
{
701 deprecated
<LanguageFeature::Hollerith
>(HollerithLiteral
{})};
703 template <typename A
> constexpr decltype(auto) verbatim(A x
) {
704 return sourced(construct
<Verbatim
>(x
));
707 } // namespace Fortran::parser
708 #endif // FORTRAN_PARSER_TOKEN_PARSERS_H_