1 //===-- lib/Parser/token-parsers.h ------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef FORTRAN_PARSER_TOKEN_PARSERS_H_
10 #define FORTRAN_PARSER_TOKEN_PARSERS_H_
12 // These parsers are driven by the parsers of the Fortran grammar to consume
13 // the prescanned character stream and recognize context-sensitive tokens.
15 #include "basic-parsers.h"
16 #include "type-parsers.h"
17 #include "flang/Common/idioms.h"
18 #include "flang/Parser/char-set.h"
19 #include "flang/Parser/characters.h"
20 #include "flang/Parser/instrumented-parser.h"
21 #include "flang/Parser/provenance.h"
31 namespace Fortran::parser
{
33 // "xyz"_ch matches one instance of the characters x, y, or z without skipping
34 // any spaces before or after. The parser returns the location of the character
38 using resultType
= const char *;
39 constexpr AnyOfChars(const AnyOfChars
&) = default;
40 constexpr AnyOfChars(SetOfChars set
) : set_
{set
} {}
41 std::optional
<const char *> Parse(ParseState
&state
) const {
42 if (std::optional
<const char *> at
{state
.PeekAtNextChar()}) {
44 state
.UncheckedAdvance();
45 state
.set_anyTokenMatched();
49 state
.Say(MessageExpectedText
{set_
});
54 const SetOfChars set_
;
57 constexpr AnyOfChars
operator""_ch(const char str
[], std::size_t n
) {
58 return AnyOfChars
{SetOfChars(str
, n
)};
61 constexpr auto letter
{"abcdefghijklmnopqrstuvwxyz"_ch
};
62 constexpr auto digit
{"0123456789"_ch
};
64 // Skips over optional spaces. Always succeeds.
66 using resultType
= Success
;
68 static std::optional
<Success
> Parse(ParseState
&state
) {
69 while (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
73 state
.UncheckedAdvance();
78 constexpr Space space
;
80 // Skips a space that in free form requires a warning if it precedes a
81 // character that could begin an identifier or keyword. Always succeeds.
82 inline void MissingSpace(ParseState
&state
) {
83 if (!state
.inFixedForm()) {
85 LanguageFeature::OptionalFreeFormSpace
, "missing space"_port_en_US
);
90 using resultType
= Success
;
91 constexpr SpaceCheck() {}
92 static std::optional
<Success
> Parse(ParseState
&state
) {
93 if (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
96 state
.UncheckedAdvance();
97 return space
.Parse(state
);
99 if (IsLegalInIdentifier(ch
)) {
106 constexpr SpaceCheck spaceCheck
;
108 // Matches a token string. Spaces in the token string denote where
109 // spaces may appear in the source; they can be made mandatory for
110 // some free form keyword sequences. Missing mandatory spaces in free
111 // form elicit a warning; they are not necessary for recognition.
112 // Spaces before and after the token are also skipped.
114 // Token strings appear in the grammar as C++ user-defined literals
115 // like "BIND ( C )"_tok and "SYNC ALL"_sptok. The _tok suffix is implied
116 // when a string literal appears before the sequencing operator >> or
117 // after the sequencing operator /. The literal "..."_id parses a
118 // token that cannot be a prefix of a longer identifier.
119 template <bool MandatoryFreeFormSpace
= false, bool MustBeComplete
= false>
120 class TokenStringMatch
{
122 using resultType
= Success
;
123 constexpr TokenStringMatch(const TokenStringMatch
&) = default;
124 constexpr TokenStringMatch(const char *str
, std::size_t n
)
125 : str_
{str
}, bytes_
{n
} {}
126 explicit constexpr TokenStringMatch(const char *str
) : str_
{str
} {}
127 std::optional
<Success
> Parse(ParseState
&state
) const {
129 const char *start
{state
.GetLocation()};
131 std::optional
<const char *> at
; // initially empty
132 for (std::size_t j
{0}; j
< bytes_
&& *p
!= '\0'; ++j
, ++p
) {
133 bool spaceSkipping
{*p
== ' '};
135 if (j
+ 1 == bytes_
|| p
[1] == ' ' || p
[1] == '\0') {
136 continue; // redundant; ignore
140 at
= nextCh
.Parse(state
);
147 at
= nextCh
.Parse(state
);
151 } else if constexpr (MandatoryFreeFormSpace
) {
154 // 'at' remains full for next iteration
155 } else if (**at
== ToLowerCaseLetter(*p
)) {
158 state
.Say(start
, MessageExpectedText
{str_
, bytes_
});
162 if constexpr (MustBeComplete
) {
163 if (auto after
{state
.PeekAtNextChar()}) {
164 if (IsLegalInIdentifier(**after
)) {
165 state
.Say(start
, MessageExpectedText
{str_
, bytes_
});
170 state
.set_anyTokenMatched();
171 if (IsLegalInIdentifier(p
[-1])) {
172 return spaceCheck
.Parse(state
);
174 return space
.Parse(state
);
179 const char *const str_
;
180 const std::size_t bytes_
{std::string::npos
};
183 constexpr TokenStringMatch
<> operator""_tok(const char str
[], std::size_t n
) {
187 constexpr TokenStringMatch
<true> operator""_sptok(
188 const char str
[], std::size_t n
) {
192 constexpr TokenStringMatch
<false, true> operator""_id(
193 const char str
[], std::size_t n
) {
198 inline constexpr std::enable_if_t
<std::is_class_v
<PA
>,
199 SequenceParser
<TokenStringMatch
<>, PA
>>
200 operator>>(const char *str
, const PA
&p
) {
201 return SequenceParser
<TokenStringMatch
<>, PA
>{TokenStringMatch
<>{str
}, p
};
205 inline constexpr std::enable_if_t
<std::is_class_v
<PA
>,
206 FollowParser
<PA
, TokenStringMatch
<>>>
207 operator/(const PA
&p
, const char *str
) {
208 return FollowParser
<PA
, TokenStringMatch
<>>{p
, TokenStringMatch
<>{str
}};
211 template <class PA
> inline constexpr auto parenthesized(const PA
&p
) {
212 return "(" >> p
/ ")";
215 template <class PA
> inline constexpr auto bracketed(const PA
&p
) {
216 return "[" >> p
/ "]";
219 // Quoted character literal constants.
220 struct CharLiteralChar
{
221 using resultType
= std::pair
<char, bool /* was escaped */>;
222 static std::optional
<resultType
> Parse(ParseState
&state
) {
223 auto at
{state
.GetLocation()};
224 if (std::optional
<const char *> cp
{nextCh
.Parse(state
)}) {
227 state
.Say(CharBlock
{at
, state
.GetLocation()},
228 "Unclosed character constant"_err_en_US
);
232 // Most escape sequences in character literals are processed later,
233 // but we have to look for quotes here so that doubled quotes work.
234 if (std::optional
<const char *> next
{state
.PeekAtNextChar()}) {
235 char escaped
{**next
};
236 if (escaped
== '\'' || escaped
== '"' || escaped
== '\\') {
237 state
.UncheckedAdvance();
238 return std::make_pair(escaped
, true);
242 return std::make_pair(ch
, false);
248 template <char quote
> struct CharLiteral
{
249 using resultType
= std::string
;
250 static std::optional
<std::string
> Parse(ParseState
&state
) {
252 static constexpr auto nextch
{attempt(CharLiteralChar
{})};
253 while (auto ch
{nextch
.Parse(state
)}) {
256 } else if (ch
->first
== quote
) {
257 static constexpr auto doubled
{attempt(AnyOfChars
{SetOfChars
{quote
}})};
258 if (!doubled
.Parse(state
)) {
268 // Parse "BOZ" binary literal quoted constants.
269 // As extensions, support X as an alternate hexadecimal marker, and allow
270 // BOZX markers to appear as suffixes.
272 using resultType
= std::string
;
273 static std::optional
<resultType
> Parse(ParseState
&state
) {
275 auto baseChar
{[&base
](char ch
) -> bool {
291 const char *start
{state
.GetLocation()};
292 std::optional
<const char *> at
{nextCh
.Parse(state
)};
297 !state
.IsNonstandardOk(LanguageFeature::BOZExtensions
,
298 "nonstandard BOZ literal"_port_en_US
)) {
301 if (baseChar(**at
)) {
302 at
= nextCh
.Parse(state
);
309 if (quote
!= '\'' && quote
!= '"') {
315 at
= nextCh
.Parse(state
);
325 if (!IsHexadecimalDigit(**at
)) {
328 content
+= ToLowerCaseLetter(**at
);
332 // extension: base allowed to appear as suffix, too
333 if (!(at
= nextCh
.Parse(state
)) || !baseChar(**at
) ||
334 !state
.IsNonstandardOk(LanguageFeature::BOZExtensions
,
335 "nonstandard BOZ literal"_port_en_US
)) {
338 spaceCheck
.Parse(state
);
341 if (content
.empty()) {
342 state
.Say(start
, "no digit in BOZ literal"_err_en_US
);
345 return {std::string
{base
} + '"' + content
+ '"'};
349 // R711 digit-string -> digit [digit]...
350 // N.B. not a token -- no space is skipped
352 using resultType
= CharBlock
;
353 static std::optional
<resultType
> Parse(ParseState
&state
) {
354 if (std::optional
<const char *> ch1
{state
.PeekAtNextChar()}) {
355 if (IsDecimalDigit(**ch1
)) {
356 state
.UncheckedAdvance();
357 while (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
358 if (!IsDecimalDigit(**p
)) {
361 state
.UncheckedAdvance();
363 return CharBlock
{*ch1
, state
.GetLocation()};
369 constexpr DigitString digitString
;
371 struct SignedIntLiteralConstantWithoutKind
{
372 using resultType
= CharBlock
;
373 static std::optional
<resultType
> Parse(ParseState
&state
) {
374 resultType result
{state
.GetLocation()};
375 static constexpr auto sign
{maybe("+-"_ch
/ space
)};
376 if (sign
.Parse(state
)) {
377 if (auto digits
{digitString
.Parse(state
)}) {
378 result
.ExtendToCover(*digits
);
386 struct DigitString64
{
387 using resultType
= std::uint64_t;
388 static std::optional
<std::uint64_t> Parse(ParseState
&state
) {
389 std::optional
<const char *> firstDigit
{digit
.Parse(state
)};
393 std::uint64_t value
= **firstDigit
- '0';
394 bool overflow
{false};
395 static constexpr auto getDigit
{attempt(digit
)};
396 while (auto nextDigit
{getDigit
.Parse(state
)}) {
397 if (value
> std::numeric_limits
<std::uint64_t>::max() / 10) {
401 int digitValue
= **nextDigit
- '0';
402 if (value
> std::numeric_limits
<std::uint64_t>::max() - digitValue
) {
408 state
.Say(*firstDigit
, "overflow in decimal literal"_err_en_US
);
413 constexpr DigitString64 digitString64
;
415 // R707 signed-int-literal-constant -> [sign] int-literal-constant
416 // N.B. Spaces are consumed before and after the sign, since the sign
417 // and the int-literal-constant are distinct tokens. Does not
418 // handle a trailing kind parameter.
419 static std::optional
<std::int64_t> SignedInteger(
420 const std::optional
<std::uint64_t> &x
, Location at
, bool negate
,
425 std::uint64_t limit
{std::numeric_limits
<std::int64_t>::max()};
427 limit
= -(limit
+ 1);
430 state
.Say(at
, "overflow in signed decimal literal"_err_en_US
);
432 std::int64_t value
= *x
;
433 return std::make_optional
<std::int64_t>(negate
? -value
: value
);
436 // R710 signed-digit-string -> [sign] digit-string
437 // N.B. Not a complete token -- no space is skipped.
438 // Used only in the exponent parts of real literal constants.
439 struct SignedDigitString
{
440 using resultType
= std::int64_t;
441 static std::optional
<std::int64_t> Parse(ParseState
&state
) {
442 std::optional
<const char *> sign
{state
.PeekAtNextChar()};
446 bool negate
{**sign
== '-'};
447 if (negate
|| **sign
== '+') {
448 state
.UncheckedAdvance();
450 return SignedInteger(digitString64
.Parse(state
), *sign
, negate
, state
);
454 // Variants of the above for use in FORMAT specifications, where spaces
456 struct DigitStringIgnoreSpaces
{
457 using resultType
= std::uint64_t;
458 static std::optional
<std::uint64_t> Parse(ParseState
&state
) {
459 static constexpr auto getFirstDigit
{space
>> digit
};
460 std::optional
<const char *> firstDigit
{getFirstDigit
.Parse(state
)};
464 std::uint64_t value
= **firstDigit
- '0';
465 bool overflow
{false};
466 static constexpr auto getDigit
{space
>> attempt(digit
)};
467 while (auto nextDigit
{getDigit
.Parse(state
)}) {
468 if (value
> std::numeric_limits
<std::uint64_t>::max() / 10) {
472 int digitValue
= **nextDigit
- '0';
473 if (value
> std::numeric_limits
<std::uint64_t>::max() - digitValue
) {
479 state
.Say(*firstDigit
, "overflow in decimal literal"_err_en_US
);
485 struct PositiveDigitStringIgnoreSpaces
{
486 using resultType
= std::int64_t;
487 static std::optional
<std::int64_t> Parse(ParseState
&state
) {
488 Location at
{state
.GetLocation()};
489 return SignedInteger(
490 DigitStringIgnoreSpaces
{}.Parse(state
), at
, false /*positive*/, state
);
494 struct SignedDigitStringIgnoreSpaces
{
495 using resultType
= std::int64_t;
496 static std::optional
<std::int64_t> Parse(ParseState
&state
) {
497 static constexpr auto getSign
{space
>> attempt("+-"_ch
)};
499 if (std::optional
<const char *> sign
{getSign
.Parse(state
)}) {
500 negate
= **sign
== '-';
502 Location at
{state
.GetLocation()};
503 return SignedInteger(
504 DigitStringIgnoreSpaces
{}.Parse(state
), at
, negate
, state
);
508 // Legacy feature: Hollerith literal constants
509 struct HollerithLiteral
{
510 using resultType
= std::string
;
511 static std::optional
<std::string
> Parse(ParseState
&state
) {
513 const char *start
{state
.GetLocation()};
514 std::optional
<std::uint64_t> charCount
{
515 DigitStringIgnoreSpaces
{}.Parse(state
)};
516 if (!charCount
|| *charCount
< 1) {
519 static constexpr auto letterH
{"h"_ch
};
520 std::optional
<const char *> h
{letterH
.Parse(state
)};
525 for (auto j
{*charCount
}; j
-- > 0;) {
526 int chBytes
{UTF_8CharacterBytes(state
.GetLocation())};
527 for (int bytes
{chBytes
}; bytes
> 0; --bytes
) {
528 if (std::optional
<const char *> at
{nextCh
.Parse(state
)}) {
529 if (chBytes
== 1 && !std::isprint(**at
)) {
530 state
.Say(start
, "Bad character in Hollerith"_err_en_US
);
535 state
.Say(start
, "Insufficient characters in Hollerith"_err_en_US
);
544 struct ConsumedAllInputParser
{
545 using resultType
= Success
;
546 constexpr ConsumedAllInputParser() {}
547 static inline std::optional
<Success
> Parse(ParseState
&state
) {
548 if (state
.IsAtEnd()) {
554 constexpr ConsumedAllInputParser consumedAllInput
;
556 template <char goal
> struct SkipPast
{
557 using resultType
= Success
;
558 constexpr SkipPast() {}
559 constexpr SkipPast(const SkipPast
&) {}
560 static std::optional
<Success
> Parse(ParseState
&state
) {
561 while (std::optional
<const char *> p
{state
.GetNextChar()}) {
570 template <char goal
> struct SkipTo
{
571 using resultType
= Success
;
572 constexpr SkipTo() {}
573 constexpr SkipTo(const SkipTo
&) {}
574 static std::optional
<Success
> Parse(ParseState
&state
) {
575 while (std::optional
<const char *> p
{state
.PeekAtNextChar()}) {
579 state
.UncheckedAdvance();
585 // A common idiom in the Fortran grammar is an optional item (usually
586 // a nonempty comma-separated list) that, if present, must follow a comma
587 // and precede a doubled colon. When the item is absent, the comma must
588 // not appear, and the doubled colons are optional.
589 // [[, xyz] ::] is optionalBeforeColons(xyz)
590 // [[, xyz]... ::] is optionalBeforeColons(nonemptyList(xyz))
591 template <typename PA
> inline constexpr auto optionalBeforeColons(const PA
&p
) {
592 using resultType
= std::optional
<typename
PA::resultType
>;
593 return "," >> construct
<resultType
>(p
) / "::" ||
594 ("::"_tok
|| !","_tok
) >> pure
<resultType
>();
596 template <typename PA
>
597 inline constexpr auto optionalListBeforeColons(const PA
&p
) {
598 using resultType
= std::list
<typename
PA::resultType
>;
599 return "," >> nonemptyList(p
) / "::" ||
600 ("::"_tok
|| !","_tok
) >> pure
<resultType
>();
603 // Skip over empty lines, leading spaces, and some compiler directives (viz.,
604 // the ones that specify the source form) that might appear before the
605 // next statement. Skip over empty statements (bare semicolons) when
606 // not in strict standard conformance mode. Always succeeds.
607 struct SkipStuffBeforeStatement
{
608 using resultType
= Success
;
609 static std::optional
<Success
> Parse(ParseState
&state
) {
610 if (UserState
* ustate
{state
.userState()}) {
611 if (ParsingLog
* log
{ustate
->log()}) {
612 // Save memory: vacate the parsing log before each statement unless
613 // we're logging the whole parse for debugging.
614 if (!ustate
->instrumentedParse()) {
619 while (std::optional
<const char *> at
{state
.PeekAtNextChar()}) {
620 if (**at
== '\n' || **at
== ' ') {
621 state
.UncheckedAdvance();
622 } else if (**at
== '!') {
623 static const char fixed
[] = "!dir$ fixed\n", free
[] = "!dir$ free\n";
624 static constexpr std::size_t fixedBytes
{sizeof fixed
- 1};
625 static constexpr std::size_t freeBytes
{sizeof free
- 1};
626 std::size_t remain
{state
.BytesRemaining()};
627 if (remain
>= fixedBytes
&& std::memcmp(*at
, fixed
, fixedBytes
) == 0) {
628 state
.set_inFixedForm(true).UncheckedAdvance(fixedBytes
);
629 } else if (remain
>= freeBytes
&&
630 std::memcmp(*at
, free
, freeBytes
) == 0) {
631 state
.set_inFixedForm(false).UncheckedAdvance(freeBytes
);
635 } else if (**at
== ';' &&
636 state
.IsNonstandardOk(
637 LanguageFeature::EmptyStatement
, "empty statement"_port_en_US
)) {
638 state
.UncheckedAdvance();
646 constexpr SkipStuffBeforeStatement skipStuffBeforeStatement
;
648 // R602 underscore -> _
649 constexpr auto underscore
{"_"_ch
};
651 // Characters besides letters and digits that may appear in names.
652 // N.B. Don't accept an underscore if it is immediately followed by a
653 // quotation mark, so that kindParam_"character literal" is parsed properly.
654 // PGI and ifort accept '$' in identifiers, even as the initial character.
655 // Cray and gfortran accept '$', but not as the first character.
656 // Cray accepts '@' as well.
657 constexpr auto otherIdChar
{underscore
/ !"'\""_ch
||
658 extension
<LanguageFeature::PunctuationInNames
>(
659 "nonstandard usage: punctuation in name"_port_en_US
, "$@"_ch
)};
661 constexpr auto logicalTRUE
{
663 extension
<LanguageFeature::LogicalAbbreviations
>(
664 "nonstandard usage: .T. spelling of .TRUE."_port_en_US
,
667 constexpr auto logicalFALSE
{
669 extension
<LanguageFeature::LogicalAbbreviations
>(
670 "nonstandard usage: .F. spelling of .FALSE."_port_en_US
,
674 // deprecated: Hollerith literals
675 constexpr auto rawHollerithLiteral
{
676 deprecated
<LanguageFeature::Hollerith
>(HollerithLiteral
{})};
678 template <typename A
> constexpr decltype(auto) verbatim(A x
) {
679 return sourced(construct
<Verbatim
>(x
));
682 } // namespace Fortran::parser
683 #endif // FORTRAN_PARSER_TOKEN_PARSERS_H_