1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef FORTRAN_PARSER_PRESCAN_H_
10 #define FORTRAN_PARSER_PRESCAN_H_
12 // Defines a fast Fortran source prescanning phase that implements some
13 // character-level features of the language that can be inefficient to
14 // support directly in a backtracking parser. This phase handles Fortran
15 // line continuation, comment removal, card image margins, padding out
16 // fixed form character literals on truncated card images, file
17 // inclusion, and driving the Fortran source preprocessor.
19 #include "flang/Common/Fortran-features.h"
20 #include "flang/Parser/characters.h"
21 #include "flang/Parser/message.h"
22 #include "flang/Parser/provenance.h"
23 #include "flang/Parser/token-sequence.h"
27 #include <unordered_set>
29 namespace Fortran::parser
{
36 Prescanner(Messages
&, CookedSource
&, Preprocessor
&,
37 common::LanguageFeatureControl
);
39 const Prescanner
&, Preprocessor
&, bool isNestedInIncludeDirective
);
40 Prescanner(const Prescanner
&) = delete;
41 Prescanner(Prescanner
&&) = delete;
43 const AllSources
&allSources() const { return allSources_
; }
44 AllSources
&allSources() { return allSources_
; }
45 const Messages
&messages() const { return messages_
; }
46 Messages
&messages() { return messages_
; }
47 const Preprocessor
&preprocessor() const { return preprocessor_
; }
48 Preprocessor
&preprocessor() { return preprocessor_
; }
49 common::LanguageFeatureControl
&features() { return features_
; }
51 Prescanner
&set_preprocessingOnly(bool yes
) {
52 preprocessingOnly_
= yes
;
55 Prescanner
&set_expandIncludeLines(bool yes
) {
56 expandIncludeLines_
= yes
;
59 Prescanner
&set_fixedForm(bool yes
) {
63 Prescanner
&set_encoding(Encoding code
) {
67 Prescanner
&set_fixedFormColumnLimit(int limit
) {
68 fixedFormColumnLimit_
= limit
;
72 Prescanner
&AddCompilerDirectiveSentinel(const std::string
&);
74 void Prescan(ProvenanceRange
);
78 // Callbacks for use by Preprocessor.
79 bool IsAtEnd() const { return nextLine_
>= limit_
; }
80 bool IsNextLinePreprocessorDirective() const;
81 TokenSequence
TokenizePreprocessorDirective();
82 Provenance
GetCurrentProvenance() const { return GetProvenance(at_
); }
84 const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
85 const char *IsCompilerDirectiveSentinel(CharBlock
) const;
86 // 'first' is the sentinel, 'second' is beginning of payload
87 std::optional
<std::pair
<const char *, const char *>>
88 IsCompilerDirectiveSentinel(const char *p
) const;
90 template <typename
... A
> Message
&Say(A
&&...a
) {
91 return messages_
.Say(std::forward
<A
>(a
)...);
95 struct LineClassification
{
98 ConditionalCompilationDirective
,
99 IncludeDirective
, // #include
100 DefinitionDirective
, // #define & #undef
101 PreprocessorDirective
,
102 IncludeLine
, // Fortran INCLUDE
106 LineClassification(Kind k
, std::size_t po
= 0, const char *s
= nullptr)
107 : kind
{k
}, payloadOffset
{po
}, sentinel
{s
} {}
108 LineClassification(LineClassification
&&) = default;
109 LineClassification
&operator=(LineClassification
&&) = default;
111 std::size_t payloadOffset
; // byte offset of content
112 const char *sentinel
; // if it's a compiler directive
115 void BeginSourceLine(const char *at
) {
118 tabInCurrentLine_
= false;
121 void BeginSourceLineAndAdvance() {
122 BeginSourceLine(nextLine_
);
126 void BeginStatementAndAdvance() {
127 BeginSourceLineAndAdvance();
128 slashInCurrentStatement_
= false;
129 preventHollerith_
= false;
130 parenthesisNesting_
= 0;
131 continuationLines_
= 0;
132 isPossibleMacroCall_
= false;
133 disableSourceContinuation_
= false;
136 Provenance
GetProvenance(const char *sourceChar
) const {
137 return startProvenance_
+ (sourceChar
- start_
);
140 ProvenanceRange
GetProvenanceRange(
141 const char *first
, const char *afterLast
) const {
142 std::size_t bytes
= afterLast
- first
;
143 return {startProvenance_
+ (first
- start_
), bytes
};
146 void EmitChar(TokenSequence
&tokens
, char ch
) {
147 tokens
.PutNextTokenChar(ch
, GetCurrentProvenance());
150 void EmitInsertedChar(TokenSequence
&tokens
, char ch
) {
151 Provenance provenance
{allSources_
.CompilerInsertionProvenance(ch
)};
152 tokens
.PutNextTokenChar(ch
, provenance
);
155 char EmitCharAndAdvance(TokenSequence
&tokens
, char ch
) {
156 EmitChar(tokens
, ch
);
161 bool InCompilerDirective() const { return directiveSentinel_
!= nullptr; }
162 bool InFixedFormSource() const {
163 return inFixedForm_
&& !inPreprocessorDirective_
&& !InCompilerDirective();
166 bool IsCComment(const char *p
) const {
167 return p
[0] == '/' && p
[1] == '*' &&
168 (inPreprocessorDirective_
||
171 common::LanguageFeature::ClassicCComments
)));
174 void CheckAndEmitLine(TokenSequence
&, Provenance newlineProvenance
);
175 void LabelField(TokenSequence
&);
176 void EnforceStupidEndStatementRules(const TokenSequence
&);
177 void SkipToEndOfLine();
178 bool MustSkipToEndOfLine() const;
180 // True when input flowed to a continuation line
181 bool SkipToNextSignificantCharacter();
182 void SkipCComments();
184 static const char *SkipWhiteSpace(const char *);
185 const char *SkipWhiteSpaceAndCComments(const char *) const;
186 const char *SkipCComment(const char *) const;
187 bool NextToken(TokenSequence
&);
188 bool ExponentAndKind(TokenSequence
&);
189 void QuotedCharacterLiteral(TokenSequence
&, const char *start
);
190 void Hollerith(TokenSequence
&, int count
, const char *start
);
191 bool PadOutCharacterLiteral(TokenSequence
&);
192 bool SkipCommentLine(bool afterAmpersand
);
193 bool IsFixedFormCommentLine(const char *) const;
194 const char *IsFreeFormComment(const char *) const;
195 std::optional
<std::size_t> IsIncludeLine(const char *) const;
196 void FortranInclude(const char *quote
);
197 const char *IsPreprocessorDirectiveLine(const char *) const;
198 const char *FixedFormContinuationLine(bool mightNeedSpace
);
199 const char *FreeFormContinuationLine(bool ampersand
);
200 bool IsImplicitContinuation() const;
201 bool FixedFormContinuation(bool mightNeedSpace
);
202 bool FreeFormContinuation();
203 bool Continuation(bool mightNeedFixedFormSpace
);
204 std::optional
<LineClassification
> IsFixedFormCompilerDirectiveLine(
206 std::optional
<LineClassification
> IsFreeFormCompilerDirectiveLine(
208 LineClassification
ClassifyLine(const char *) const;
209 LineClassification
ClassifyLine(
210 TokenSequence
&, Provenance newlineProvenance
) const;
211 void SourceFormChange(std::string
&&);
212 bool CompilerDirectiveContinuation(TokenSequence
&, const char *sentinel
);
213 bool SourceLineContinuation(TokenSequence
&);
216 CookedSource
&cooked_
;
217 Preprocessor
&preprocessor_
;
218 AllSources
&allSources_
;
219 common::LanguageFeatureControl features_
;
220 bool preprocessingOnly_
{false};
221 bool expandIncludeLines_
{true};
222 bool isNestedInIncludeDirective_
{false};
223 bool backslashFreeFormContinuation_
{false};
224 bool inFixedForm_
{false};
225 int fixedFormColumnLimit_
{72};
226 Encoding encoding_
{Encoding::UTF_8
};
227 int parenthesisNesting_
{0};
228 int prescannerNesting_
{0};
229 int continuationLines_
{0};
230 bool isPossibleMacroCall_
{false};
231 bool afterPreprocessingDirective_
{false};
232 bool disableSourceContinuation_
{false};
234 Provenance startProvenance_
;
235 const char *start_
{nullptr}; // beginning of current source file content
236 const char *limit_
{nullptr}; // first address after end of current source
237 const char *nextLine_
{nullptr}; // next line to process; <= limit_
238 const char *directiveSentinel_
{nullptr}; // current compiler directive
240 // These data members are state for processing the source line containing
241 // "at_", which goes to up to the newline character before "nextLine_".
242 const char *at_
{nullptr}; // next character to process; < nextLine_
243 int column_
{1}; // card image column position of next character
244 bool tabInCurrentLine_
{false};
245 bool slashInCurrentStatement_
{false};
246 bool preventHollerith_
{false}; // CHARACTER*4HIMOM not Hollerith
247 bool inCharLiteral_
{false};
248 bool continuationInCharLiteral_
{false};
249 bool inPreprocessorDirective_
{false};
251 // In some edge cases of compiler directive continuation lines, it
252 // is necessary to treat the line break as a space character by
253 // setting this flag, which is cleared by EmitChar().
254 bool insertASpace_
{false};
256 // When a free form continuation marker (&) appears at the end of a line
257 // before a INCLUDE or #include, we delete it and omit the newline, so
258 // that the first line of the included file is truly a continuation of
259 // the line before. Also used when the & appears at the end of the last
260 // line in an include file.
261 bool omitNewline_
{false};
262 bool skipLeadingAmpersand_
{false};
264 const std::size_t firstCookedCharacterOffset_
{cooked_
.BufferedBytes()};
266 const Provenance spaceProvenance_
{
267 allSources_
.CompilerInsertionProvenance(' ')};
268 const Provenance backslashProvenance_
{
269 allSources_
.CompilerInsertionProvenance('\\')};
271 // To avoid probing the set of active compiler directive sentinel strings
272 // on every comment line, they're checked first with a cheap Bloom filter.
273 static const int prime1
{1019}, prime2
{1021};
274 std::bitset
<prime2
> compilerDirectiveBloomFilter_
; // 128 bytes
275 std::unordered_set
<std::string
> compilerDirectiveSentinels_
;
277 } // namespace Fortran::parser
278 #endif // FORTRAN_PARSER_PRESCAN_H_