1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This class represents the Lexer for tablegen files.
11 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14 #define LLVM_LIB_TABLEGEN_TGLEXER_H
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/DataTypes.h"
19 #include "llvm/Support/SMLoc.h"
27 template <typename T
> class ArrayRef
;
36 // Tokens with no info.
38 l_square
, r_square
, // [ ]
39 l_brace
, r_brace
, // { }
40 l_paren
, r_paren
, // ( )
44 equal
, question
, // = ?
48 // Reserved keywords. ('ElseKW' is named to distinguish it from the
49 // existing 'Else' that means the preprocessor #else.)
50 Assert
, Bit
, Bits
, Class
, Code
, Dag
, Def
, Defm
, Defset
, Defvar
, ElseKW
,
51 FalseKW
, Field
, Foreach
, If
, In
, Include
, Int
, Let
, List
, MultiClass
,
55 XConcat
, XADD
, XSUB
, XMUL
, XNOT
, XAND
, XOR
, XXOR
, XSRA
, XSRL
, XSHL
,
56 XListConcat
, XListSplat
, XStrConcat
, XInterleave
, XSubstr
, XFind
, XCast
,
57 XSubst
, XForEach
, XFilter
, XFoldl
, XHead
, XTail
, XSize
, XEmpty
, XIf
,
58 XCond
, XEq
, XIsA
, XDag
, XNe
, XLe
, XLt
, XGe
, XGt
, XSetDagOp
, XGetDagOp
,
66 // Binary constant. Note that these are sized according to the number of
70 // String valued tokens.
71 Id
, StrVal
, VarName
, CodeFragment
,
73 // Preprocessing tokens for internal usage by the lexer.
74 // They are never returned as a result of Lex().
75 Ifdef
, Ifndef
, Else
, Endif
, Define
79 /// TGLexer - TableGen Lexer class.
83 const char *CurPtr
= nullptr;
86 // Information about the current token.
87 const char *TokStart
= nullptr;
88 tgtok::TokKind CurCode
= tgtok::TokKind::Eof
;
89 std::string CurStrVal
; // This is valid for Id, StrVal, VarName, CodeFragment
90 int64_t CurIntVal
= 0; // This is valid for IntVal.
92 /// CurBuffer - This is the current buffer index we're lexing from as managed
93 /// by the SourceMgr object.
94 unsigned CurBuffer
= 0;
97 typedef std::set
<std::string
> DependenciesSetTy
;
100 /// Dependencies - This is the list of all included files.
101 DependenciesSetTy Dependencies
;
104 TGLexer(SourceMgr
&SrcMgr
, ArrayRef
<std::string
> Macros
);
106 tgtok::TokKind
Lex() {
107 return CurCode
= LexToken(CurPtr
== CurBuf
.begin());
110 const DependenciesSetTy
&getDependencies() const {
114 tgtok::TokKind
getCode() const { return CurCode
; }
116 const std::string
&getCurStrVal() const {
117 assert((CurCode
== tgtok::Id
|| CurCode
== tgtok::StrVal
||
118 CurCode
== tgtok::VarName
|| CurCode
== tgtok::CodeFragment
) &&
119 "This token doesn't have a string value");
122 int64_t getCurIntVal() const {
123 assert(CurCode
== tgtok::IntVal
&& "This token isn't an integer");
126 std::pair
<int64_t, unsigned> getCurBinaryIntVal() const {
127 assert(CurCode
== tgtok::BinaryIntVal
&&
128 "This token isn't a binary integer");
129 return std::make_pair(CurIntVal
, (CurPtr
- TokStart
)-2);
132 SMLoc
getLoc() const;
135 /// LexToken - Read the next token and return its code.
136 tgtok::TokKind
LexToken(bool FileOrLineStart
= false);
138 tgtok::TokKind
ReturnError(SMLoc Loc
, const Twine
&Msg
);
139 tgtok::TokKind
ReturnError(const char *Loc
, const Twine
&Msg
);
142 int peekNextChar(int Index
) const;
143 void SkipBCPLComment();
145 tgtok::TokKind
LexIdentifier();
147 tgtok::TokKind
LexString();
148 tgtok::TokKind
LexVarName();
149 tgtok::TokKind
LexNumber();
150 tgtok::TokKind
LexBracket();
151 tgtok::TokKind
LexExclaim();
153 // Process EOF encountered in LexToken().
154 // If EOF is met in an include file, then the method will update
155 // CurPtr, CurBuf and preprocessing include stack, and return true.
156 // If EOF is met in the top-level file, then the method will
157 // update and check the preprocessing include stack, and return false.
160 // *** Structures and methods for preprocessing support ***
162 // A set of macro names that are defined either via command line or
165 StringSet
<> DefinedMacros
;
167 // Each of #ifdef and #else directives has a descriptor associated
170 // An ordered list of preprocessing controls defined by #ifdef/#else
171 // directives that are in effect currently is called preprocessing
172 // control stack. It is represented as a vector of PreprocessorControlDesc's.
174 // The control stack is updated according to the following rules:
176 // For each #ifdef we add an element to the control stack.
177 // For each #else we replace the top element with a descriptor
178 // with an inverted IsDefined value.
179 // For each #endif we pop the top element from the control stack.
181 // When CurPtr reaches the current buffer's end, the control stack
182 // must be empty, i.e. #ifdef and the corresponding #endif
183 // must be located in the same file.
184 struct PreprocessorControlDesc
{
185 // Either tgtok::Ifdef or tgtok::Else.
188 // True, if the condition for this directive is true, false - otherwise.
190 // #ifdef NAME : true, if NAME is defined, false - otherwise.
192 // #else : false, if NAME is defined, true - otherwise.
195 // Pointer into CurBuf to the beginning of the preprocessing directive
202 // We want to disallow code like this:
206 // include "file2.td"
212 // To do this, we clear the preprocessing control stack on entry
213 // to each of the included file. PrepIncludeStack is used to store
214 // preprocessing control stacks for the current file and all its
215 // parent files. The back() element is the preprocessing control
216 // stack for the current file.
217 std::vector
<std::unique_ptr
<std::vector
<PreprocessorControlDesc
>>>
220 // Validate that the current preprocessing control stack is empty,
221 // since we are about to exit a file, and pop the include stack.
223 // If IncludeStackMustBeEmpty is true, the include stack must be empty
224 // after the popping, otherwise, the include stack must not be empty
225 // after the popping. Basically, the include stack must be empty
226 // only if we exit the "top-level" file (i.e. finish lexing).
228 // The method returns false, if the current preprocessing control stack
229 // is not empty (e.g. there is an unterminated #ifdef/#else),
231 bool prepExitInclude(bool IncludeStackMustBeEmpty
);
233 // Look ahead for a preprocessing directive starting from CurPtr. The caller
234 // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
235 // a preprocessing directive word followed by a whitespace, then it returns
236 // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
238 // CurPtr is not adjusted by this method.
239 tgtok::TokKind
prepIsDirective() const;
241 // Given a preprocessing token kind, adjusts CurPtr to the end
242 // of the preprocessing directive word. Returns true, unless
243 // an unsupported token kind is passed in.
245 // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
246 // to avoid adjusting CurPtr before we are sure that '#' is followed
247 // by a preprocessing directive. If it is not, then we fall back to
248 // tgtok::paste interpretation of '#'.
249 bool prepEatPreprocessorDirective(tgtok::TokKind Kind
);
251 // The main "exit" point from the token parsing to preprocessor.
253 // The method is called for CurPtr, when prepIsDirective() returns
254 // true. The first parameter matches the result of prepIsDirective(),
255 // denoting the actual preprocessor directive to be processed.
257 // If the preprocessing directive disables the tokens processing, e.g.:
258 // #ifdef NAME // NAME is undefined
259 // then lexPreprocessor() enters the lines-skipping mode.
260 // In this mode, it does not parse any tokens, because the code under
261 // the #ifdef may not even be a correct tablegen code. The preprocessor
262 // looks for lines containing other preprocessing directives, which
263 // may be prepended with whitespaces and C-style comments. If the line
264 // does not contain a preprocessing directive, it is skipped completely.
265 // Otherwise, the preprocessing directive is processed by recursively
266 // calling lexPreprocessor(). The processing of the encountered
267 // preprocessing directives includes updating preprocessing control stack
268 // and adding new macros into DefinedMacros set.
270 // The second parameter controls whether lexPreprocessor() is called from
271 // LexToken() (true) or recursively from lexPreprocessor() (false).
273 // If ReturnNextLiveToken is true, the method returns the next
274 // LEX token following the current directive or following the end
275 // of the disabled preprocessing region corresponding to this directive.
276 // If ReturnNextLiveToken is false, the method returns the first parameter,
277 // unless there were errors encountered in the disabled preprocessing
278 // region - in this case, it returns tgtok::Error.
279 tgtok::TokKind
lexPreprocessor(tgtok::TokKind Kind
,
280 bool ReturnNextLiveToken
= true);
282 // Worker method for lexPreprocessor() to skip lines after some
283 // preprocessing directive up to the buffer end or to the directive
284 // that re-enables token processing. The method returns true
285 // upon processing the next directive that re-enables tokens
286 // processing. False is returned if an error was encountered.
288 // Note that prepSkipRegion() calls lexPreprocessor() to process
289 // encountered preprocessing directives. In this case, the second
290 // parameter to lexPreprocessor() is set to false. Being passed
291 // false ReturnNextLiveToken, lexPreprocessor() must never call
292 // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
293 // to prepSkipRegion() and checking that it is never set to false.
294 bool prepSkipRegion(bool MustNeverBeFalse
);
296 // Lex name of the macro after either #ifdef or #define. We could have used
297 // LexIdentifier(), but it has special handling of "include" word, which
298 // could result in awkward diagnostic errors. Consider:
303 // LexIdentifier() will engage LexInclude(), which will complain about
304 // missing file with name "class". Instead, prepLexMacroName() will treat
305 // "include" as a normal macro name.
307 // On entry, CurPtr points to the end of a preprocessing directive word.
308 // The method allows for whitespaces between the preprocessing directive
309 // and the macro name. The allowed whitespaces are ' ' and '\t'.
311 // If the first non-whitespace symbol after the preprocessing directive
312 // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
313 // the method updates TokStart to the position of the first non-whitespace
314 // symbol, sets CurPtr to the position of the macro name's last symbol,
315 // and returns a string reference to the macro name. Otherwise,
316 // TokStart is set to the first non-whitespace symbol after the preprocessing
317 // directive, and the method returns an empty string reference.
319 // In all cases, TokStart may be used to point to the word following
320 // the preprocessing directive.
321 StringRef
prepLexMacroName();
323 // Skip any whitespaces starting from CurPtr. The method is used
324 // only in the lines-skipping mode to find the first non-whitespace
325 // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
326 // and '\r'. The method skips C-style comments as well, because
327 // it is used to find the beginning of the preprocessing directive.
328 // If we do not handle C-style comments the following code would
329 // result in incorrect detection of a preprocessing directive:
333 // As long as we skip C-style comments, the following code is correctly
334 // recognized as a preprocessing directive:
335 // /* first line comment
336 // second line comment */ #ifdef NAME
338 // The method returns true upon reaching the first non-whitespace symbol
339 // or EOF, CurPtr is set to point to this symbol. The method returns false,
340 // if an error occured during skipping of a C-style comment.
341 bool prepSkipLineBegin();
343 // Skip any whitespaces or comments after a preprocessing directive.
344 // The method returns true upon reaching either end of the line
345 // or end of the file. If there is a multiline C-style comment
346 // after the preprocessing directive, the method skips
347 // the comment, so the final CurPtr may point to one of the next lines.
348 // The method returns false, if an error occured during skipping
349 // C- or C++-style comment, or a non-whitespace symbol appears
350 // after the preprocessing directive.
352 // The method maybe called both during lines-skipping and tokens
353 // processing. It actually verifies that only whitespaces or/and
354 // comments follow a preprocessing directive.
356 // After the execution of this mehod, CurPtr points either to new line
357 // symbol, buffer end or non-whitespace symbol following the preprocesing
359 bool prepSkipDirectiveEnd();
361 // Skip all symbols to the end of the line/file.
362 // The method adjusts CurPtr, so that it points to either new line
363 // symbol in the current line or the buffer end.
364 void prepSkipToLineEnd();
366 // Return true, if the current preprocessor control stack is such that
367 // we should allow lexer to process the next token, false - otherwise.
369 // In particular, the method returns true, if all the #ifdef/#else
370 // controls on the stack have their IsDefined member set to true.
371 bool prepIsProcessingEnabled();
373 // Report an error, if we reach EOF with non-empty preprocessing control
374 // stack. This means there is no matching #endif for the previous
376 void prepReportPreprocessorStackError();
379 } // end namespace llvm