Clang] Fix expansion of response files in -Wp after integrated-cc1 change
[llvm-project.git] / llvm / lib / TableGen / TGLexer.h
blob6d10af348674061211539890b16a05c3ec29f8c4
1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class represents the Lexer for tablegen files.
11 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14 #define LLVM_LIB_TABLEGEN_TGLEXER_H
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/DataTypes.h"
20 #include "llvm/Support/SMLoc.h"
21 #include <cassert>
22 #include <memory>
23 #include <set>
24 #include <string>
26 namespace llvm {
27 class SourceMgr;
28 class SMLoc;
29 class Twine;
31 namespace tgtok {
32 enum TokKind {
33 // Markers
34 Eof, Error,
36 // Tokens with no info.
37 minus, plus, // - +
38 l_square, r_square, // [ ]
39 l_brace, r_brace, // { }
40 l_paren, r_paren, // ( )
41 less, greater, // < >
42 colon, semi, // : ;
43 comma, period, // , .
44 equal, question, // = ?
45 paste, // #
47 // Keywords. ('ElseKW' is named to distinguish it from the existing 'Else'
48 // that means the preprocessor #else.)
49 Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
50 MultiClass, String, Defset, Defvar, If, Then, ElseKW,
52 // !keywords.
53 XConcat, XADD, XMUL, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XListSplat,
54 XStrConcat, XCast, XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty,
55 XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetOp, XGetOp,
57 // Integer value.
58 IntVal,
60 // Binary constant. Note that these are sized according to the number of
61 // bits given.
62 BinaryIntVal,
64 // String valued tokens.
65 Id, StrVal, VarName, CodeFragment,
67 // Preprocessing tokens for internal usage by the lexer.
68 // They are never returned as a result of Lex().
69 Ifdef, Ifndef, Else, Endif, Define
73 /// TGLexer - TableGen Lexer class.
74 class TGLexer {
75 SourceMgr &SrcMgr;
77 const char *CurPtr = nullptr;
78 StringRef CurBuf;
80 // Information about the current token.
81 const char *TokStart = nullptr;
82 tgtok::TokKind CurCode = tgtok::TokKind::Eof;
83 std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
84 int64_t CurIntVal = 0; // This is valid for INTVAL.
86 /// CurBuffer - This is the current buffer index we're lexing from as managed
87 /// by the SourceMgr object.
88 unsigned CurBuffer = 0;
90 public:
91 typedef std::set<std::string> DependenciesSetTy;
93 private:
94 /// Dependencies - This is the list of all included files.
95 DependenciesSetTy Dependencies;
97 public:
98 TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
100 tgtok::TokKind Lex() {
101 return CurCode = LexToken(CurPtr == CurBuf.begin());
104 const DependenciesSetTy &getDependencies() const {
105 return Dependencies;
108 tgtok::TokKind getCode() const { return CurCode; }
110 const std::string &getCurStrVal() const {
111 assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
112 CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
113 "This token doesn't have a string value");
114 return CurStrVal;
116 int64_t getCurIntVal() const {
117 assert(CurCode == tgtok::IntVal && "This token isn't an integer");
118 return CurIntVal;
120 std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
121 assert(CurCode == tgtok::BinaryIntVal &&
122 "This token isn't a binary integer");
123 return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
126 SMLoc getLoc() const;
128 private:
129 /// LexToken - Read the next token and return its code.
130 tgtok::TokKind LexToken(bool FileOrLineStart = false);
132 tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
133 tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
135 int getNextChar();
136 int peekNextChar(int Index) const;
137 void SkipBCPLComment();
138 bool SkipCComment();
139 tgtok::TokKind LexIdentifier();
140 bool LexInclude();
141 tgtok::TokKind LexString();
142 tgtok::TokKind LexVarName();
143 tgtok::TokKind LexNumber();
144 tgtok::TokKind LexBracket();
145 tgtok::TokKind LexExclaim();
147 // Process EOF encountered in LexToken().
148 // If EOF is met in an include file, then the method will update
149 // CurPtr, CurBuf and preprocessing include stack, and return true.
150 // If EOF is met in the top-level file, then the method will
151 // update and check the preprocessing include stack, and return false.
152 bool processEOF();
154 // *** Structures and methods for preprocessing support ***
156 // A set of macro names that are defined either via command line or
157 // by using:
158 // #define NAME
159 StringSet<> DefinedMacros;
161 // Each of #ifdef and #else directives has a descriptor associated
162 // with it.
164 // An ordered list of preprocessing controls defined by #ifdef/#else
165 // directives that are in effect currently is called preprocessing
166 // control stack. It is represented as a vector of PreprocessorControlDesc's.
168 // The control stack is updated according to the following rules:
170 // For each #ifdef we add an element to the control stack.
171 // For each #else we replace the top element with a descriptor
172 // with an inverted IsDefined value.
173 // For each #endif we pop the top element from the control stack.
175 // When CurPtr reaches the current buffer's end, the control stack
176 // must be empty, i.e. #ifdef and the corresponding #endif
177 // must be located in the same file.
178 struct PreprocessorControlDesc {
179 // Either tgtok::Ifdef or tgtok::Else.
180 tgtok::TokKind Kind;
182 // True, if the condition for this directive is true, false - otherwise.
183 // Examples:
184 // #ifdef NAME : true, if NAME is defined, false - otherwise.
185 // ...
186 // #else : false, if NAME is defined, true - otherwise.
187 bool IsDefined;
189 // Pointer into CurBuf to the beginning of the preprocessing directive
190 // word, e.g.:
191 // #ifdef NAME
192 // ^ - SrcPos
193 SMLoc SrcPos;
196 // We want to disallow code like this:
197 // file1.td:
198 // #define NAME
199 // #ifdef NAME
200 // include "file2.td"
201 // EOF
202 // file2.td:
203 // #endif
204 // EOF
206 // To do this, we clear the preprocessing control stack on entry
207 // to each of the included file. PrepIncludeStack is used to store
208 // preprocessing control stacks for the current file and all its
209 // parent files. The back() element is the preprocessing control
210 // stack for the current file.
211 std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
212 PrepIncludeStack;
214 // Validate that the current preprocessing control stack is empty,
215 // since we are about to exit a file, and pop the include stack.
217 // If IncludeStackMustBeEmpty is true, the include stack must be empty
218 // after the popping, otherwise, the include stack must not be empty
219 // after the popping. Basically, the include stack must be empty
220 // only if we exit the "top-level" file (i.e. finish lexing).
222 // The method returns false, if the current preprocessing control stack
223 // is not empty (e.g. there is an unterminated #ifdef/#else),
224 // true - otherwise.
225 bool prepExitInclude(bool IncludeStackMustBeEmpty);
227 // Look ahead for a preprocessing directive starting from CurPtr. The caller
228 // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
229 // a preprocessing directive word followed by a whitespace, then it returns
230 // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
232 // CurPtr is not adjusted by this method.
233 tgtok::TokKind prepIsDirective() const;
235 // Given a preprocessing token kind, adjusts CurPtr to the end
236 // of the preprocessing directive word. Returns true, unless
237 // an unsupported token kind is passed in.
239 // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
240 // to avoid adjusting CurPtr before we are sure that '#' is followed
241 // by a preprocessing directive. If it is not, then we fall back to
242 // tgtok::paste interpretation of '#'.
243 bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
245 // The main "exit" point from the token parsing to preprocessor.
247 // The method is called for CurPtr, when prepIsDirective() returns
248 // true. The first parameter matches the result of prepIsDirective(),
249 // denoting the actual preprocessor directive to be processed.
251 // If the preprocessing directive disables the tokens processing, e.g.:
252 // #ifdef NAME // NAME is undefined
253 // then lexPreprocessor() enters the lines-skipping mode.
254 // In this mode, it does not parse any tokens, because the code under
255 // the #ifdef may not even be a correct tablegen code. The preprocessor
256 // looks for lines containing other preprocessing directives, which
257 // may be prepended with whitespaces and C-style comments. If the line
258 // does not contain a preprocessing directive, it is skipped completely.
259 // Otherwise, the preprocessing directive is processed by recursively
260 // calling lexPreprocessor(). The processing of the encountered
261 // preprocessing directives includes updating preprocessing control stack
262 // and adding new macros into DefinedMacros set.
264 // The second parameter controls whether lexPreprocessor() is called from
265 // LexToken() (true) or recursively from lexPreprocessor() (false).
267 // If ReturnNextLiveToken is true, the method returns the next
268 // LEX token following the current directive or following the end
269 // of the disabled preprocessing region corresponding to this directive.
270 // If ReturnNextLiveToken is false, the method returns the first parameter,
271 // unless there were errors encountered in the disabled preprocessing
272 // region - in this case, it returns tgtok::Error.
273 tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
274 bool ReturnNextLiveToken = true);
276 // Worker method for lexPreprocessor() to skip lines after some
277 // preprocessing directive up to the buffer end or to the directive
278 // that re-enables token processing. The method returns true
279 // upon processing the next directive that re-enables tokens
280 // processing. False is returned if an error was encountered.
282 // Note that prepSkipRegion() calls lexPreprocessor() to process
283 // encountered preprocessing directives. In this case, the second
284 // parameter to lexPreprocessor() is set to false. Being passed
285 // false ReturnNextLiveToken, lexPreprocessor() must never call
286 // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
287 // to prepSkipRegion() and checking that it is never set to false.
288 bool prepSkipRegion(bool MustNeverBeFalse);
290 // Lex name of the macro after either #ifdef or #define. We could have used
291 // LexIdentifier(), but it has special handling of "include" word, which
292 // could result in awkward diagnostic errors. Consider:
293 // ----
294 // #ifdef include
295 // class ...
296 // ----
297 // LexIdentifier() will engage LexInclude(), which will complain about
298 // missing file with name "class". Instead, prepLexMacroName() will treat
299 // "include" as a normal macro name.
301 // On entry, CurPtr points to the end of a preprocessing directive word.
302 // The method allows for whitespaces between the preprocessing directive
303 // and the macro name. The allowed whitespaces are ' ' and '\t'.
305 // If the first non-whitespace symbol after the preprocessing directive
306 // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
307 // the method updates TokStart to the position of the first non-whitespace
308 // symbol, sets CurPtr to the position of the macro name's last symbol,
309 // and returns a string reference to the macro name. Otherwise,
310 // TokStart is set to the first non-whitespace symbol after the preprocessing
311 // directive, and the method returns an empty string reference.
313 // In all cases, TokStart may be used to point to the word following
314 // the preprocessing directive.
315 StringRef prepLexMacroName();
317 // Skip any whitespaces starting from CurPtr. The method is used
318 // only in the lines-skipping mode to find the first non-whitespace
319 // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
320 // and '\r'. The method skips C-style comments as well, because
321 // it is used to find the beginning of the preprocessing directive.
322 // If we do not handle C-style comments the following code would
323 // result in incorrect detection of a preprocessing directive:
324 // /*
325 // #ifdef NAME
326 // */
327 // As long as we skip C-style comments, the following code is correctly
328 // recognized as a preprocessing directive:
329 // /* first line comment
330 // second line comment */ #ifdef NAME
332 // The method returns true upon reaching the first non-whitespace symbol
333 // or EOF, CurPtr is set to point to this symbol. The method returns false,
334 // if an error occured during skipping of a C-style comment.
335 bool prepSkipLineBegin();
337 // Skip any whitespaces or comments after a preprocessing directive.
338 // The method returns true upon reaching either end of the line
339 // or end of the file. If there is a multiline C-style comment
340 // after the preprocessing directive, the method skips
341 // the comment, so the final CurPtr may point to one of the next lines.
342 // The method returns false, if an error occured during skipping
343 // C- or C++-style comment, or a non-whitespace symbol appears
344 // after the preprocessing directive.
346 // The method maybe called both during lines-skipping and tokens
347 // processing. It actually verifies that only whitespaces or/and
348 // comments follow a preprocessing directive.
350 // After the execution of this mehod, CurPtr points either to new line
351 // symbol, buffer end or non-whitespace symbol following the preprocesing
352 // directive.
353 bool prepSkipDirectiveEnd();
355 // Skip all symbols to the end of the line/file.
356 // The method adjusts CurPtr, so that it points to either new line
357 // symbol in the current line or the buffer end.
358 void prepSkipToLineEnd();
360 // Return true, if the current preprocessor control stack is such that
361 // we should allow lexer to process the next token, false - otherwise.
363 // In particular, the method returns true, if all the #ifdef/#else
364 // controls on the stack have their IsDefined member set to true.
365 bool prepIsProcessingEnabled();
367 // Report an error, if we reach EOF with non-empty preprocessing control
368 // stack. This means there is no matching #endif for the previous
369 // #ifdef/#else.
370 void prepReportPreprocessorStackError();
373 } // end namespace llvm
375 #endif