1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef FORTRAN_PARSER_PRESCAN_H_
10 #define FORTRAN_PARSER_PRESCAN_H_
12 // Defines a fast Fortran source prescanning phase that implements some
13 // character-level features of the language that can be inefficient to
14 // support directly in a backtracking parser. This phase handles Fortran
15 // line continuation, comment removal, card image margins, padding out
16 // fixed form character literals on truncated card images, file
17 // inclusion, and driving the Fortran source preprocessor.
19 #include "token-sequence.h"
20 #include "flang/Common/Fortran-features.h"
21 #include "flang/Parser/characters.h"
22 #include "flang/Parser/message.h"
23 #include "flang/Parser/provenance.h"
27 #include <unordered_set>
29 namespace Fortran::parser
{
36 Prescanner(Messages
&, CookedSource
&, Preprocessor
&,
37 common::LanguageFeatureControl
);
38 Prescanner(const Prescanner
&);
40 const AllSources
&allSources() const { return allSources_
; }
41 AllSources
&allSources() { return allSources_
; }
42 const Messages
&messages() const { return messages_
; }
43 Messages
&messages() { return messages_
; }
44 const Preprocessor
&preprocessor() const { return preprocessor_
; }
45 Preprocessor
&preprocessor() { return preprocessor_
; }
47 Prescanner
&set_fixedForm(bool yes
) {
51 Prescanner
&set_encoding(Encoding code
) {
55 Prescanner
&set_fixedFormColumnLimit(int limit
) {
56 fixedFormColumnLimit_
= limit
;
60 Prescanner
&AddCompilerDirectiveSentinel(const std::string
&);
62 void Prescan(ProvenanceRange
);
66 // Callbacks for use by Preprocessor.
67 bool IsAtEnd() const { return nextLine_
>= limit_
; }
68 bool IsNextLinePreprocessorDirective() const;
69 TokenSequence
TokenizePreprocessorDirective();
70 Provenance
GetCurrentProvenance() const { return GetProvenance(at_
); }
72 const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
73 const char *IsCompilerDirectiveSentinel(CharBlock
) const;
75 template <typename
... A
> Message
&Say(A
&&...a
) {
76 return messages_
.Say(std::forward
<A
>(a
)...);
80 struct LineClassification
{
83 ConditionalCompilationDirective
,
84 IncludeDirective
, // #include
85 DefinitionDirective
, // #define & #undef
86 PreprocessorDirective
,
87 IncludeLine
, // Fortran INCLUDE
91 LineClassification(Kind k
, std::size_t po
= 0, const char *s
= nullptr)
92 : kind
{k
}, payloadOffset
{po
}, sentinel
{s
} {}
93 LineClassification(LineClassification
&&) = default;
95 std::size_t payloadOffset
; // byte offset of content
96 const char *sentinel
; // if it's a compiler directive
99 void BeginSourceLine(const char *at
) {
102 tabInCurrentLine_
= false;
105 void BeginSourceLineAndAdvance() {
106 BeginSourceLine(nextLine_
);
110 void BeginStatementAndAdvance() {
111 BeginSourceLineAndAdvance();
112 slashInCurrentStatement_
= false;
113 preventHollerith_
= false;
114 parenthesisNesting_
= 0;
115 continuationLines_
= 0;
116 isPossibleMacroCall_
= false;
119 Provenance
GetProvenance(const char *sourceChar
) const {
120 return startProvenance_
+ (sourceChar
- start_
);
123 ProvenanceRange
GetProvenanceRange(
124 const char *first
, const char *afterLast
) const {
125 std::size_t bytes
= afterLast
- first
;
126 return {startProvenance_
+ (first
- start_
), bytes
};
129 void EmitChar(TokenSequence
&tokens
, char ch
) {
130 tokens
.PutNextTokenChar(ch
, GetCurrentProvenance());
133 void EmitInsertedChar(TokenSequence
&tokens
, char ch
) {
134 Provenance provenance
{allSources_
.CompilerInsertionProvenance(ch
)};
135 tokens
.PutNextTokenChar(ch
, provenance
);
138 char EmitCharAndAdvance(TokenSequence
&tokens
, char ch
) {
139 EmitChar(tokens
, ch
);
144 bool InCompilerDirective() const { return directiveSentinel_
!= nullptr; }
145 bool InFixedFormSource() const {
146 return inFixedForm_
&& !inPreprocessorDirective_
&& !InCompilerDirective();
149 bool IsCComment(const char *p
) const {
150 return p
[0] == '/' && p
[1] == '*' &&
151 (inPreprocessorDirective_
||
154 common::LanguageFeature::ClassicCComments
)));
157 void LabelField(TokenSequence
&);
158 void EnforceStupidEndStatementRules(const TokenSequence
&);
159 void SkipToEndOfLine();
160 bool MustSkipToEndOfLine() const;
162 void SkipToNextSignificantCharacter();
163 void SkipCComments();
165 static const char *SkipWhiteSpace(const char *);
166 const char *SkipWhiteSpaceAndCComments(const char *) const;
167 const char *SkipCComment(const char *) const;
168 bool NextToken(TokenSequence
&);
169 bool ExponentAndKind(TokenSequence
&);
170 void QuotedCharacterLiteral(TokenSequence
&, const char *start
);
171 void Hollerith(TokenSequence
&, int count
, const char *start
);
172 bool PadOutCharacterLiteral(TokenSequence
&);
173 bool SkipCommentLine(bool afterAmpersand
);
174 bool IsFixedFormCommentLine(const char *) const;
175 const char *IsFreeFormComment(const char *) const;
176 std::optional
<std::size_t> IsIncludeLine(const char *) const;
177 void FortranInclude(const char *quote
);
178 const char *IsPreprocessorDirectiveLine(const char *) const;
179 const char *FixedFormContinuationLine(bool mightNeedSpace
);
180 const char *FreeFormContinuationLine(bool ampersand
);
181 bool IsImplicitContinuation() const;
182 bool FixedFormContinuation(bool mightNeedSpace
);
183 bool FreeFormContinuation();
184 bool Continuation(bool mightNeedFixedFormSpace
);
185 std::optional
<LineClassification
> IsFixedFormCompilerDirectiveLine(
187 std::optional
<LineClassification
> IsFreeFormCompilerDirectiveLine(
189 LineClassification
ClassifyLine(const char *) const;
190 void SourceFormChange(std::string
&&);
191 bool CompilerDirectiveContinuation(TokenSequence
&, const char *sentinel
);
192 bool SourceLineContinuation(TokenSequence
&);
195 CookedSource
&cooked_
;
196 Preprocessor
&preprocessor_
;
197 AllSources
&allSources_
;
198 common::LanguageFeatureControl features_
;
199 bool inFixedForm_
{false};
200 int fixedFormColumnLimit_
{72};
201 Encoding encoding_
{Encoding::UTF_8
};
202 int parenthesisNesting_
{0};
203 int prescannerNesting_
{0};
204 int continuationLines_
{0};
205 bool isPossibleMacroCall_
{false};
207 Provenance startProvenance_
;
208 const char *start_
{nullptr}; // beginning of current source file content
209 const char *limit_
{nullptr}; // first address after end of current source
210 const char *nextLine_
{nullptr}; // next line to process; <= limit_
211 const char *directiveSentinel_
{nullptr}; // current compiler directive
213 // These data members are state for processing the source line containing
214 // "at_", which goes to up to the newline character before "nextLine_".
215 const char *at_
{nullptr}; // next character to process; < nextLine_
216 int column_
{1}; // card image column position of next character
217 bool tabInCurrentLine_
{false};
218 bool slashInCurrentStatement_
{false};
219 bool preventHollerith_
{false}; // CHARACTER*4HIMOM not Hollerith
220 bool inCharLiteral_
{false};
221 bool continuationInCharLiteral_
{false};
222 bool inPreprocessorDirective_
{false};
224 // In some edge cases of compiler directive continuation lines, it
225 // is necessary to treat the line break as a space character by
226 // setting this flag, which is cleared by EmitChar().
227 bool insertASpace_
{false};
229 // When a free form continuation marker (&) appears at the end of a line
230 // before a INCLUDE or #include, we delete it and omit the newline, so
231 // that the first line of the included file is truly a continuation of
232 // the line before. Also used when the & appears at the end of the last
233 // line in an include file.
234 bool omitNewline_
{false};
235 bool skipLeadingAmpersand_
{false};
237 const Provenance spaceProvenance_
{
238 allSources_
.CompilerInsertionProvenance(' ')};
239 const Provenance backslashProvenance_
{
240 allSources_
.CompilerInsertionProvenance('\\')};
242 // To avoid probing the set of active compiler directive sentinel strings
243 // on every comment line, they're checked first with a cheap Bloom filter.
244 static const int prime1
{1019}, prime2
{1021};
245 std::bitset
<prime2
> compilerDirectiveBloomFilter_
; // 128 bytes
246 std::unordered_set
<std::string
> compilerDirectiveSentinels_
;
248 } // namespace Fortran::parser
249 #endif // FORTRAN_PARSER_PRESCAN_H_