[VectorCombine] foldInsExtVectorToShuffle - canonicalize new shuffle(undef,x) ->...
[llvm-project.git] / flang / lib / Parser / prescan.h
blob08041f93b14b6c771a83982f954e86bfa4dd19f4
1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef FORTRAN_PARSER_PRESCAN_H_
10 #define FORTRAN_PARSER_PRESCAN_H_
12 // Defines a fast Fortran source prescanning phase that implements some
13 // character-level features of the language that can be inefficient to
14 // support directly in a backtracking parser. This phase handles Fortran
15 // line continuation, comment removal, card image margins, padding out
16 // fixed form character literals on truncated card images, file
17 // inclusion, and driving the Fortran source preprocessor.
19 #include "flang/Common/Fortran-features.h"
20 #include "flang/Parser/characters.h"
21 #include "flang/Parser/message.h"
22 #include "flang/Parser/provenance.h"
23 #include "flang/Parser/token-sequence.h"
24 #include <bitset>
25 #include <optional>
26 #include <string>
27 #include <unordered_set>
29 namespace Fortran::parser {
31 class Messages;
32 class Preprocessor;
34 class Prescanner {
35 public:
36 Prescanner(Messages &, CookedSource &, Preprocessor &,
37 common::LanguageFeatureControl);
38 Prescanner(
39 const Prescanner &, Preprocessor &, bool isNestedInIncludeDirective);
40 Prescanner(const Prescanner &) = delete;
41 Prescanner(Prescanner &&) = delete;
43 const AllSources &allSources() const { return allSources_; }
44 AllSources &allSources() { return allSources_; }
45 const Messages &messages() const { return messages_; }
46 Messages &messages() { return messages_; }
47 const Preprocessor &preprocessor() const { return preprocessor_; }
48 Preprocessor &preprocessor() { return preprocessor_; }
49 common::LanguageFeatureControl &features() { return features_; }
51 Prescanner &set_preprocessingOnly(bool yes) {
52 preprocessingOnly_ = yes;
53 return *this;
55 Prescanner &set_expandIncludeLines(bool yes) {
56 expandIncludeLines_ = yes;
57 return *this;
59 Prescanner &set_fixedForm(bool yes) {
60 inFixedForm_ = yes;
61 return *this;
63 Prescanner &set_encoding(Encoding code) {
64 encoding_ = code;
65 return *this;
67 Prescanner &set_fixedFormColumnLimit(int limit) {
68 fixedFormColumnLimit_ = limit;
69 return *this;
72 Prescanner &AddCompilerDirectiveSentinel(const std::string &);
74 void Prescan(ProvenanceRange);
75 void Statement();
76 void NextLine();
78 // Callbacks for use by Preprocessor.
79 bool IsAtEnd() const { return nextLine_ >= limit_; }
80 bool IsNextLinePreprocessorDirective() const;
81 TokenSequence TokenizePreprocessorDirective();
82 Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
84 const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
85 const char *IsCompilerDirectiveSentinel(CharBlock) const;
86 // 'first' is the sentinel, 'second' is beginning of payload
87 std::optional<std::pair<const char *, const char *>>
88 IsCompilerDirectiveSentinel(const char *p) const;
90 template <typename... A> Message &Say(A &&...a) {
91 return messages_.Say(std::forward<A>(a)...);
94 private:
95 struct LineClassification {
96 enum class Kind {
97 Comment,
98 ConditionalCompilationDirective,
99 IncludeDirective, // #include
100 DefinitionDirective, // #define & #undef
101 PreprocessorDirective,
102 IncludeLine, // Fortran INCLUDE
103 CompilerDirective,
104 Source
106 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
107 : kind{k}, payloadOffset{po}, sentinel{s} {}
108 LineClassification(LineClassification &&) = default;
109 LineClassification &operator=(LineClassification &&) = default;
110 Kind kind;
111 std::size_t payloadOffset; // byte offset of content
112 const char *sentinel; // if it's a compiler directive
115 void BeginSourceLine(const char *at) {
116 at_ = at;
117 column_ = 1;
118 tabInCurrentLine_ = false;
121 void BeginSourceLineAndAdvance() {
122 BeginSourceLine(nextLine_);
123 NextLine();
126 void BeginStatementAndAdvance() {
127 BeginSourceLineAndAdvance();
128 slashInCurrentStatement_ = false;
129 preventHollerith_ = false;
130 parenthesisNesting_ = 0;
131 continuationLines_ = 0;
132 isPossibleMacroCall_ = false;
133 disableSourceContinuation_ = false;
136 Provenance GetProvenance(const char *sourceChar) const {
137 return startProvenance_ + (sourceChar - start_);
140 ProvenanceRange GetProvenanceRange(
141 const char *first, const char *afterLast) const {
142 std::size_t bytes = afterLast - first;
143 return {startProvenance_ + (first - start_), bytes};
146 void EmitChar(TokenSequence &tokens, char ch) {
147 tokens.PutNextTokenChar(ch, GetCurrentProvenance());
150 void EmitInsertedChar(TokenSequence &tokens, char ch) {
151 Provenance provenance{allSources_.CompilerInsertionProvenance(ch)};
152 tokens.PutNextTokenChar(ch, provenance);
155 char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
156 EmitChar(tokens, ch);
157 NextChar();
158 return *at_;
161 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
162 bool InFixedFormSource() const {
163 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
166 bool IsCComment(const char *p) const {
167 return p[0] == '/' && p[1] == '*' &&
168 (inPreprocessorDirective_ ||
169 (!inCharLiteral_ &&
170 features_.IsEnabled(
171 common::LanguageFeature::ClassicCComments)));
174 void CheckAndEmitLine(TokenSequence &, Provenance newlineProvenance);
175 void LabelField(TokenSequence &);
176 void EnforceStupidEndStatementRules(const TokenSequence &);
177 void SkipToEndOfLine();
178 bool MustSkipToEndOfLine() const;
179 void NextChar();
180 // True when input flowed to a continuation line
181 bool SkipToNextSignificantCharacter();
182 void SkipCComments();
183 void SkipSpaces();
184 static const char *SkipWhiteSpace(const char *);
185 const char *SkipWhiteSpaceAndCComments(const char *) const;
186 const char *SkipCComment(const char *) const;
187 bool NextToken(TokenSequence &);
188 bool ExponentAndKind(TokenSequence &);
189 void QuotedCharacterLiteral(TokenSequence &, const char *start);
190 void Hollerith(TokenSequence &, int count, const char *start);
191 bool PadOutCharacterLiteral(TokenSequence &);
192 bool SkipCommentLine(bool afterAmpersand);
193 bool IsFixedFormCommentLine(const char *) const;
194 const char *IsFreeFormComment(const char *) const;
195 std::optional<std::size_t> IsIncludeLine(const char *) const;
196 void FortranInclude(const char *quote);
197 const char *IsPreprocessorDirectiveLine(const char *) const;
198 const char *FixedFormContinuationLine(bool mightNeedSpace);
199 const char *FreeFormContinuationLine(bool ampersand);
200 bool IsImplicitContinuation() const;
201 bool FixedFormContinuation(bool mightNeedSpace);
202 bool FreeFormContinuation();
203 bool Continuation(bool mightNeedFixedFormSpace);
204 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
205 const char *) const;
206 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
207 const char *) const;
208 LineClassification ClassifyLine(const char *) const;
209 LineClassification ClassifyLine(
210 TokenSequence &, Provenance newlineProvenance) const;
211 void SourceFormChange(std::string &&);
212 bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel);
213 bool SourceLineContinuation(TokenSequence &);
215 Messages &messages_;
216 CookedSource &cooked_;
217 Preprocessor &preprocessor_;
218 AllSources &allSources_;
219 common::LanguageFeatureControl features_;
220 bool preprocessingOnly_{false};
221 bool expandIncludeLines_{true};
222 bool isNestedInIncludeDirective_{false};
223 bool backslashFreeFormContinuation_{false};
224 bool inFixedForm_{false};
225 int fixedFormColumnLimit_{72};
226 Encoding encoding_{Encoding::UTF_8};
227 int parenthesisNesting_{0};
228 int prescannerNesting_{0};
229 int continuationLines_{0};
230 bool isPossibleMacroCall_{false};
231 bool afterPreprocessingDirective_{false};
232 bool disableSourceContinuation_{false};
234 Provenance startProvenance_;
235 const char *start_{nullptr}; // beginning of current source file content
236 const char *limit_{nullptr}; // first address after end of current source
237 const char *nextLine_{nullptr}; // next line to process; <= limit_
238 const char *directiveSentinel_{nullptr}; // current compiler directive
240 // These data members are state for processing the source line containing
241 // "at_", which goes to up to the newline character before "nextLine_".
242 const char *at_{nullptr}; // next character to process; < nextLine_
243 int column_{1}; // card image column position of next character
244 bool tabInCurrentLine_{false};
245 bool slashInCurrentStatement_{false};
246 bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith
247 bool inCharLiteral_{false};
248 bool continuationInCharLiteral_{false};
249 bool inPreprocessorDirective_{false};
251 // In some edge cases of compiler directive continuation lines, it
252 // is necessary to treat the line break as a space character by
253 // setting this flag, which is cleared by EmitChar().
254 bool insertASpace_{false};
256 // When a free form continuation marker (&) appears at the end of a line
257 // before a INCLUDE or #include, we delete it and omit the newline, so
258 // that the first line of the included file is truly a continuation of
259 // the line before. Also used when the & appears at the end of the last
260 // line in an include file.
261 bool omitNewline_{false};
262 bool skipLeadingAmpersand_{false};
264 const std::size_t firstCookedCharacterOffset_{cooked_.BufferedBytes()};
266 const Provenance spaceProvenance_{
267 allSources_.CompilerInsertionProvenance(' ')};
268 const Provenance backslashProvenance_{
269 allSources_.CompilerInsertionProvenance('\\')};
271 // To avoid probing the set of active compiler directive sentinel strings
272 // on every comment line, they're checked first with a cheap Bloom filter.
273 static const int prime1{1019}, prime2{1021};
274 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
275 std::unordered_set<std::string> compilerDirectiveSentinels_;
277 } // namespace Fortran::parser
278 #endif // FORTRAN_PARSER_PRESCAN_H_