Break circular dependency between FIR dialect and utilities
[llvm-project.git] / flang / lib / Parser / prescan.h
blobad8b90b0560e51039e6d11a7e51a0cd2099a0509
1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef FORTRAN_PARSER_PRESCAN_H_
10 #define FORTRAN_PARSER_PRESCAN_H_
12 // Defines a fast Fortran source prescanning phase that implements some
13 // character-level features of the language that can be inefficient to
14 // support directly in a backtracking parser. This phase handles Fortran
15 // line continuation, comment removal, card image margins, padding out
16 // fixed form character literals on truncated card images, file
17 // inclusion, and driving the Fortran source preprocessor.
19 #include "token-sequence.h"
20 #include "flang/Common/Fortran-features.h"
21 #include "flang/Parser/characters.h"
22 #include "flang/Parser/message.h"
23 #include "flang/Parser/provenance.h"
24 #include <bitset>
25 #include <optional>
26 #include <string>
27 #include <unordered_set>
29 namespace Fortran::parser {
31 class Messages;
32 class Preprocessor;
34 class Prescanner {
35 public:
36 Prescanner(Messages &, CookedSource &, Preprocessor &,
37 common::LanguageFeatureControl);
38 Prescanner(const Prescanner &);
40 const AllSources &allSources() const { return allSources_; }
41 AllSources &allSources() { return allSources_; }
42 const Messages &messages() const { return messages_; }
43 Messages &messages() { return messages_; }
44 const Preprocessor &preprocessor() const { return preprocessor_; }
45 Preprocessor &preprocessor() { return preprocessor_; }
47 Prescanner &set_fixedForm(bool yes) {
48 inFixedForm_ = yes;
49 return *this;
51 Prescanner &set_encoding(Encoding code) {
52 encoding_ = code;
53 return *this;
55 Prescanner &set_fixedFormColumnLimit(int limit) {
56 fixedFormColumnLimit_ = limit;
57 return *this;
60 Prescanner &AddCompilerDirectiveSentinel(const std::string &);
62 void Prescan(ProvenanceRange);
63 void Statement();
64 void NextLine();
66 // Callbacks for use by Preprocessor.
67 bool IsAtEnd() const { return nextLine_ >= limit_; }
68 bool IsNextLinePreprocessorDirective() const;
69 TokenSequence TokenizePreprocessorDirective();
70 Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
72 template <typename... A> Message &Say(A &&...a) {
73 return messages_.Say(std::forward<A>(a)...);
76 private:
77 struct LineClassification {
78 enum class Kind {
79 Comment,
80 ConditionalCompilationDirective,
81 IncludeDirective, // #include
82 DefinitionDirective, // #define & #undef
83 PreprocessorDirective,
84 IncludeLine, // Fortran INCLUDE
85 CompilerDirective,
86 Source
88 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
89 : kind{k}, payloadOffset{po}, sentinel{s} {}
90 LineClassification(LineClassification &&) = default;
91 Kind kind;
92 std::size_t payloadOffset; // byte offset of content
93 const char *sentinel; // if it's a compiler directive
96 void BeginSourceLine(const char *at) {
97 at_ = at;
98 column_ = 1;
99 tabInCurrentLine_ = false;
102 void BeginSourceLineAndAdvance() {
103 BeginSourceLine(nextLine_);
104 NextLine();
107 void BeginStatementAndAdvance() {
108 BeginSourceLineAndAdvance();
109 slashInCurrentStatement_ = false;
110 preventHollerith_ = false;
111 delimiterNesting_ = 0;
112 continuationLines_ = 0;
115 Provenance GetProvenance(const char *sourceChar) const {
116 return startProvenance_ + (sourceChar - start_);
119 ProvenanceRange GetProvenanceRange(
120 const char *first, const char *afterLast) const {
121 std::size_t bytes = afterLast - first;
122 return {startProvenance_ + (first - start_), bytes};
125 void EmitChar(TokenSequence &tokens, char ch) {
126 tokens.PutNextTokenChar(ch, GetCurrentProvenance());
129 void EmitInsertedChar(TokenSequence &tokens, char ch) {
130 Provenance provenance{allSources_.CompilerInsertionProvenance(ch)};
131 tokens.PutNextTokenChar(ch, provenance);
134 char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
135 EmitChar(tokens, ch);
136 NextChar();
137 return *at_;
140 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
141 bool InFixedFormSource() const {
142 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
145 bool IsCComment(const char *p) const {
146 return p[0] == '/' && p[1] == '*' &&
147 (inPreprocessorDirective_ ||
148 (!inCharLiteral_ &&
149 features_.IsEnabled(
150 common::LanguageFeature::ClassicCComments)));
153 void LabelField(TokenSequence &);
154 void EnforceStupidEndStatementRules(const TokenSequence &);
155 void SkipToEndOfLine();
156 bool MustSkipToEndOfLine() const;
157 void NextChar();
158 void SkipToNextSignificantCharacter();
159 void SkipCComments();
160 void SkipSpaces();
161 static const char *SkipWhiteSpace(const char *);
162 const char *SkipWhiteSpaceAndCComments(const char *) const;
163 const char *SkipCComment(const char *) const;
164 bool NextToken(TokenSequence &);
165 bool ExponentAndKind(TokenSequence &);
166 void QuotedCharacterLiteral(TokenSequence &, const char *start);
167 void Hollerith(TokenSequence &, int count, const char *start);
168 bool PadOutCharacterLiteral(TokenSequence &);
169 bool SkipCommentLine(bool afterAmpersand);
170 bool IsFixedFormCommentLine(const char *) const;
171 const char *IsFreeFormComment(const char *) const;
172 std::optional<std::size_t> IsIncludeLine(const char *) const;
173 void FortranInclude(const char *quote);
174 const char *IsPreprocessorDirectiveLine(const char *) const;
175 const char *FixedFormContinuationLine(bool mightNeedSpace);
176 const char *FreeFormContinuationLine(bool ampersand);
177 bool IsImplicitContinuation() const;
178 bool FixedFormContinuation(bool mightNeedSpace);
179 bool FreeFormContinuation();
180 bool Continuation(bool mightNeedFixedFormSpace);
181 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
182 const char *) const;
183 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
184 const char *) const;
185 const char *IsCompilerDirectiveSentinel(const char *) const;
186 LineClassification ClassifyLine(const char *) const;
187 void SourceFormChange(std::string &&);
189 Messages &messages_;
190 CookedSource &cooked_;
191 Preprocessor &preprocessor_;
192 AllSources &allSources_;
193 common::LanguageFeatureControl features_;
194 bool inFixedForm_{false};
195 int fixedFormColumnLimit_{72};
196 Encoding encoding_{Encoding::UTF_8};
197 int delimiterNesting_{0};
198 int prescannerNesting_{0};
199 int continuationLines_{0};
201 Provenance startProvenance_;
202 const char *start_{nullptr}; // beginning of current source file content
203 const char *limit_{nullptr}; // first address after end of current source
204 const char *nextLine_{nullptr}; // next line to process; <= limit_
205 const char *directiveSentinel_{nullptr}; // current compiler directive
207 // This data members are state for processing the source line containing
208 // "at_", which goes to up to the newline character before "nextLine_".
209 const char *at_{nullptr}; // next character to process; < nextLine_
210 int column_{1}; // card image column position of next character
211 bool tabInCurrentLine_{false};
212 bool slashInCurrentStatement_{false};
213 bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith
214 bool inCharLiteral_{false};
215 bool inPreprocessorDirective_{false};
217 // In some edge cases of compiler directive continuation lines, it
218 // is necessary to treat the line break as a space character by
219 // setting this flag, which is cleared by EmitChar().
220 bool insertASpace_{false};
222 // When a free form continuation marker (&) appears at the end of a line
223 // before a INCLUDE or #include, we delete it and omit the newline, so
224 // that the first line of the included file is truly a continuation of
225 // the line before. Also used when the & appears at the end of the last
226 // line in an include file.
227 bool omitNewline_{false};
228 bool skipLeadingAmpersand_{false};
230 const Provenance spaceProvenance_{
231 allSources_.CompilerInsertionProvenance(' ')};
232 const Provenance backslashProvenance_{
233 allSources_.CompilerInsertionProvenance('\\')};
235 // To avoid probing the set of active compiler directive sentinel strings
236 // on every comment line, they're checked first with a cheap Bloom filter.
237 static const int prime1{1019}, prime2{1021};
238 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
239 std::unordered_set<std::string> compilerDirectiveSentinels_;
241 } // namespace Fortran::parser
242 #endif // FORTRAN_PARSER_PRESCAN_H_