1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This class represents the Lexer for tablegen files.
11 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14 #define LLVM_LIB_TABLEGEN_TGLEXER_H
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/DataTypes.h"
20 #include "llvm/Support/SMLoc.h"
36 // Tokens with no info.
38 l_square
, r_square
, // [ ]
39 l_brace
, r_brace
, // { }
40 l_paren
, r_paren
, // ( )
44 equal
, question
, // = ?
47 // Keywords. ('ElseKW' is named to distinguish it from the existing 'Else'
48 // that means the preprocessor #else.)
49 Bit
, Bits
, Class
, Code
, Dag
, Def
, Foreach
, Defm
, Field
, In
, Int
, Let
, List
,
50 MultiClass
, String
, Defset
, Defvar
, If
, Then
, ElseKW
,
53 XConcat
, XADD
, XMUL
, XAND
, XOR
, XSRA
, XSRL
, XSHL
, XListConcat
, XListSplat
,
54 XStrConcat
, XCast
, XSubst
, XForEach
, XFoldl
, XHead
, XTail
, XSize
, XEmpty
,
55 XIf
, XCond
, XEq
, XIsA
, XDag
, XNe
, XLe
, XLt
, XGe
, XGt
, XSetOp
, XGetOp
,
60 // Binary constant. Note that these are sized according to the number of
64 // String valued tokens.
65 Id
, StrVal
, VarName
, CodeFragment
,
67 // Preprocessing tokens for internal usage by the lexer.
68 // They are never returned as a result of Lex().
69 Ifdef
, Ifndef
, Else
, Endif
, Define
73 /// TGLexer - TableGen Lexer class.
77 const char *CurPtr
= nullptr;
80 // Information about the current token.
81 const char *TokStart
= nullptr;
82 tgtok::TokKind CurCode
= tgtok::TokKind::Eof
;
83 std::string CurStrVal
; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
84 int64_t CurIntVal
= 0; // This is valid for INTVAL.
86 /// CurBuffer - This is the current buffer index we're lexing from as managed
87 /// by the SourceMgr object.
88 unsigned CurBuffer
= 0;
91 typedef std::set
<std::string
> DependenciesSetTy
;
94 /// Dependencies - This is the list of all included files.
95 DependenciesSetTy Dependencies
;
98 TGLexer(SourceMgr
&SrcMgr
, ArrayRef
<std::string
> Macros
);
100 tgtok::TokKind
Lex() {
101 return CurCode
= LexToken(CurPtr
== CurBuf
.begin());
104 const DependenciesSetTy
&getDependencies() const {
108 tgtok::TokKind
getCode() const { return CurCode
; }
110 const std::string
&getCurStrVal() const {
111 assert((CurCode
== tgtok::Id
|| CurCode
== tgtok::StrVal
||
112 CurCode
== tgtok::VarName
|| CurCode
== tgtok::CodeFragment
) &&
113 "This token doesn't have a string value");
116 int64_t getCurIntVal() const {
117 assert(CurCode
== tgtok::IntVal
&& "This token isn't an integer");
120 std::pair
<int64_t, unsigned> getCurBinaryIntVal() const {
121 assert(CurCode
== tgtok::BinaryIntVal
&&
122 "This token isn't a binary integer");
123 return std::make_pair(CurIntVal
, (CurPtr
- TokStart
)-2);
126 SMLoc
getLoc() const;
129 /// LexToken - Read the next token and return its code.
130 tgtok::TokKind
LexToken(bool FileOrLineStart
= false);
132 tgtok::TokKind
ReturnError(SMLoc Loc
, const Twine
&Msg
);
133 tgtok::TokKind
ReturnError(const char *Loc
, const Twine
&Msg
);
136 int peekNextChar(int Index
) const;
137 void SkipBCPLComment();
139 tgtok::TokKind
LexIdentifier();
141 tgtok::TokKind
LexString();
142 tgtok::TokKind
LexVarName();
143 tgtok::TokKind
LexNumber();
144 tgtok::TokKind
LexBracket();
145 tgtok::TokKind
LexExclaim();
147 // Process EOF encountered in LexToken().
148 // If EOF is met in an include file, then the method will update
149 // CurPtr, CurBuf and preprocessing include stack, and return true.
150 // If EOF is met in the top-level file, then the method will
151 // update and check the preprocessing include stack, and return false.
154 // *** Structures and methods for preprocessing support ***
156 // A set of macro names that are defined either via command line or
159 StringSet
<> DefinedMacros
;
161 // Each of #ifdef and #else directives has a descriptor associated
164 // An ordered list of preprocessing controls defined by #ifdef/#else
165 // directives that are in effect currently is called preprocessing
166 // control stack. It is represented as a vector of PreprocessorControlDesc's.
168 // The control stack is updated according to the following rules:
170 // For each #ifdef we add an element to the control stack.
171 // For each #else we replace the top element with a descriptor
172 // with an inverted IsDefined value.
173 // For each #endif we pop the top element from the control stack.
175 // When CurPtr reaches the current buffer's end, the control stack
176 // must be empty, i.e. #ifdef and the corresponding #endif
177 // must be located in the same file.
178 struct PreprocessorControlDesc
{
179 // Either tgtok::Ifdef or tgtok::Else.
182 // True, if the condition for this directive is true, false - otherwise.
184 // #ifdef NAME : true, if NAME is defined, false - otherwise.
186 // #else : false, if NAME is defined, true - otherwise.
189 // Pointer into CurBuf to the beginning of the preprocessing directive
196 // We want to disallow code like this:
200 // include "file2.td"
206 // To do this, we clear the preprocessing control stack on entry
207 // to each of the included file. PrepIncludeStack is used to store
208 // preprocessing control stacks for the current file and all its
209 // parent files. The back() element is the preprocessing control
210 // stack for the current file.
211 std::vector
<std::unique_ptr
<std::vector
<PreprocessorControlDesc
>>>
214 // Validate that the current preprocessing control stack is empty,
215 // since we are about to exit a file, and pop the include stack.
217 // If IncludeStackMustBeEmpty is true, the include stack must be empty
218 // after the popping, otherwise, the include stack must not be empty
219 // after the popping. Basically, the include stack must be empty
220 // only if we exit the "top-level" file (i.e. finish lexing).
222 // The method returns false, if the current preprocessing control stack
223 // is not empty (e.g. there is an unterminated #ifdef/#else),
225 bool prepExitInclude(bool IncludeStackMustBeEmpty
);
227 // Look ahead for a preprocessing directive starting from CurPtr. The caller
228 // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
229 // a preprocessing directive word followed by a whitespace, then it returns
230 // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
232 // CurPtr is not adjusted by this method.
233 tgtok::TokKind
prepIsDirective() const;
235 // Given a preprocessing token kind, adjusts CurPtr to the end
236 // of the preprocessing directive word. Returns true, unless
237 // an unsupported token kind is passed in.
239 // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
240 // to avoid adjusting CurPtr before we are sure that '#' is followed
241 // by a preprocessing directive. If it is not, then we fall back to
242 // tgtok::paste interpretation of '#'.
243 bool prepEatPreprocessorDirective(tgtok::TokKind Kind
);
245 // The main "exit" point from the token parsing to preprocessor.
247 // The method is called for CurPtr, when prepIsDirective() returns
248 // true. The first parameter matches the result of prepIsDirective(),
249 // denoting the actual preprocessor directive to be processed.
251 // If the preprocessing directive disables the tokens processing, e.g.:
252 // #ifdef NAME // NAME is undefined
253 // then lexPreprocessor() enters the lines-skipping mode.
254 // In this mode, it does not parse any tokens, because the code under
255 // the #ifdef may not even be a correct tablegen code. The preprocessor
256 // looks for lines containing other preprocessing directives, which
257 // may be prepended with whitespaces and C-style comments. If the line
258 // does not contain a preprocessing directive, it is skipped completely.
259 // Otherwise, the preprocessing directive is processed by recursively
260 // calling lexPreprocessor(). The processing of the encountered
261 // preprocessing directives includes updating preprocessing control stack
262 // and adding new macros into DefinedMacros set.
264 // The second parameter controls whether lexPreprocessor() is called from
265 // LexToken() (true) or recursively from lexPreprocessor() (false).
267 // If ReturnNextLiveToken is true, the method returns the next
268 // LEX token following the current directive or following the end
269 // of the disabled preprocessing region corresponding to this directive.
270 // If ReturnNextLiveToken is false, the method returns the first parameter,
271 // unless there were errors encountered in the disabled preprocessing
272 // region - in this case, it returns tgtok::Error.
273 tgtok::TokKind
lexPreprocessor(tgtok::TokKind Kind
,
274 bool ReturnNextLiveToken
= true);
276 // Worker method for lexPreprocessor() to skip lines after some
277 // preprocessing directive up to the buffer end or to the directive
278 // that re-enables token processing. The method returns true
279 // upon processing the next directive that re-enables tokens
280 // processing. False is returned if an error was encountered.
282 // Note that prepSkipRegion() calls lexPreprocessor() to process
283 // encountered preprocessing directives. In this case, the second
284 // parameter to lexPreprocessor() is set to false. Being passed
285 // false ReturnNextLiveToken, lexPreprocessor() must never call
286 // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
287 // to prepSkipRegion() and checking that it is never set to false.
288 bool prepSkipRegion(bool MustNeverBeFalse
);
290 // Lex name of the macro after either #ifdef or #define. We could have used
291 // LexIdentifier(), but it has special handling of "include" word, which
292 // could result in awkward diagnostic errors. Consider:
297 // LexIdentifier() will engage LexInclude(), which will complain about
298 // missing file with name "class". Instead, prepLexMacroName() will treat
299 // "include" as a normal macro name.
301 // On entry, CurPtr points to the end of a preprocessing directive word.
302 // The method allows for whitespaces between the preprocessing directive
303 // and the macro name. The allowed whitespaces are ' ' and '\t'.
305 // If the first non-whitespace symbol after the preprocessing directive
306 // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
307 // the method updates TokStart to the position of the first non-whitespace
308 // symbol, sets CurPtr to the position of the macro name's last symbol,
309 // and returns a string reference to the macro name. Otherwise,
310 // TokStart is set to the first non-whitespace symbol after the preprocessing
311 // directive, and the method returns an empty string reference.
313 // In all cases, TokStart may be used to point to the word following
314 // the preprocessing directive.
315 StringRef
prepLexMacroName();
317 // Skip any whitespaces starting from CurPtr. The method is used
318 // only in the lines-skipping mode to find the first non-whitespace
319 // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
320 // and '\r'. The method skips C-style comments as well, because
321 // it is used to find the beginning of the preprocessing directive.
322 // If we do not handle C-style comments the following code would
323 // result in incorrect detection of a preprocessing directive:
327 // As long as we skip C-style comments, the following code is correctly
328 // recognized as a preprocessing directive:
329 // /* first line comment
330 // second line comment */ #ifdef NAME
332 // The method returns true upon reaching the first non-whitespace symbol
333 // or EOF, CurPtr is set to point to this symbol. The method returns false,
334 // if an error occured during skipping of a C-style comment.
335 bool prepSkipLineBegin();
337 // Skip any whitespaces or comments after a preprocessing directive.
338 // The method returns true upon reaching either end of the line
339 // or end of the file. If there is a multiline C-style comment
340 // after the preprocessing directive, the method skips
341 // the comment, so the final CurPtr may point to one of the next lines.
342 // The method returns false, if an error occured during skipping
343 // C- or C++-style comment, or a non-whitespace symbol appears
344 // after the preprocessing directive.
346 // The method maybe called both during lines-skipping and tokens
347 // processing. It actually verifies that only whitespaces or/and
348 // comments follow a preprocessing directive.
350 // After the execution of this mehod, CurPtr points either to new line
351 // symbol, buffer end or non-whitespace symbol following the preprocesing
353 bool prepSkipDirectiveEnd();
355 // Skip all symbols to the end of the line/file.
356 // The method adjusts CurPtr, so that it points to either new line
357 // symbol in the current line or the buffer end.
358 void prepSkipToLineEnd();
360 // Return true, if the current preprocessor control stack is such that
361 // we should allow lexer to process the next token, false - otherwise.
363 // In particular, the method returns true, if all the #ifdef/#else
364 // controls on the stack have their IsDefined member set to true.
365 bool prepIsProcessingEnabled();
367 // Report an error, if we reach EOF with non-empty preprocessing control
368 // stack. This means there is no matching #endif for the previous
370 void prepReportPreprocessorStackError();
373 } // end namespace llvm