1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This class represents the Lexer for tablegen files.
11 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14 #define LLVM_LIB_TABLEGEN_TGLEXER_H
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/DataTypes.h"
19 #include "llvm/Support/SMLoc.h"
27 template <typename T
> class ArrayRef
;
37 // Tokens with no info.
64 // Binary constant. Note that these are sized according to the number of
68 // Preprocessing tokens for internal usage by the lexer.
69 // They are never returned as a result of Lex().
76 // Reserved keywords. ('ElseKW' is named to distinguish it from the
77 // existing 'Else' that means the preprocessor #else.)
93 // Object start tokens.
95 Assert
= OBJECT_START_FIRST
,
106 OBJECT_START_LAST
= MultiClass
,
110 XConcat
= BANG_OPERATOR_FIRST
,
160 BANG_OPERATOR_LAST
= XRepr
,
162 // String valued tokens.
164 Id
= STRING_VALUE_FIRST
,
168 STRING_VALUE_LAST
= CodeFragment
,
171 /// isBangOperator - Return true if this is a bang operator.
172 static inline bool isBangOperator(tgtok::TokKind Kind
) {
173 return tgtok::BANG_OPERATOR_FIRST
<= Kind
&& Kind
<= BANG_OPERATOR_LAST
;
176 /// isObjectStart - Return true if this is a valid first token for a statement.
177 static inline bool isObjectStart(tgtok::TokKind Kind
) {
178 return tgtok::OBJECT_START_FIRST
<= Kind
&& Kind
<= OBJECT_START_LAST
;
181 /// isStringValue - Return true if this is a string value.
182 static inline bool isStringValue(tgtok::TokKind Kind
) {
183 return tgtok::STRING_VALUE_FIRST
<= Kind
&& Kind
<= STRING_VALUE_LAST
;
187 /// TGLexer - TableGen Lexer class.
191 const char *CurPtr
= nullptr;
194 // Information about the current token.
195 const char *TokStart
= nullptr;
196 tgtok::TokKind CurCode
= tgtok::TokKind::Eof
;
197 std::string CurStrVal
; // This is valid for Id, StrVal, VarName, CodeFragment
198 int64_t CurIntVal
= 0; // This is valid for IntVal.
200 /// CurBuffer - This is the current buffer index we're lexing from as managed
201 /// by the SourceMgr object.
202 unsigned CurBuffer
= 0;
205 typedef std::set
<std::string
> DependenciesSetTy
;
208 /// Dependencies - This is the list of all included files.
209 DependenciesSetTy Dependencies
;
212 TGLexer(SourceMgr
&SrcMgr
, ArrayRef
<std::string
> Macros
);
214 tgtok::TokKind
Lex() {
215 return CurCode
= LexToken(CurPtr
== CurBuf
.begin());
218 const DependenciesSetTy
&getDependencies() const {
222 tgtok::TokKind
getCode() const { return CurCode
; }
224 const std::string
&getCurStrVal() const {
225 assert(tgtok::isStringValue(CurCode
) &&
226 "This token doesn't have a string value");
229 int64_t getCurIntVal() const {
230 assert(CurCode
== tgtok::IntVal
&& "This token isn't an integer");
233 std::pair
<int64_t, unsigned> getCurBinaryIntVal() const {
234 assert(CurCode
== tgtok::BinaryIntVal
&&
235 "This token isn't a binary integer");
236 return std::make_pair(CurIntVal
, (CurPtr
- TokStart
)-2);
239 SMLoc
getLoc() const;
240 SMRange
getLocRange() const;
243 /// LexToken - Read the next token and return its code.
244 tgtok::TokKind
LexToken(bool FileOrLineStart
= false);
246 tgtok::TokKind
ReturnError(SMLoc Loc
, const Twine
&Msg
);
247 tgtok::TokKind
ReturnError(const char *Loc
, const Twine
&Msg
);
250 int peekNextChar(int Index
) const;
251 void SkipBCPLComment();
253 tgtok::TokKind
LexIdentifier();
255 tgtok::TokKind
LexString();
256 tgtok::TokKind
LexVarName();
257 tgtok::TokKind
LexNumber();
258 tgtok::TokKind
LexBracket();
259 tgtok::TokKind
LexExclaim();
261 // Process EOF encountered in LexToken().
262 // If EOF is met in an include file, then the method will update
263 // CurPtr, CurBuf and preprocessing include stack, and return true.
264 // If EOF is met in the top-level file, then the method will
265 // update and check the preprocessing include stack, and return false.
268 // *** Structures and methods for preprocessing support ***
270 // A set of macro names that are defined either via command line or
273 StringSet
<> DefinedMacros
;
275 // Each of #ifdef and #else directives has a descriptor associated
278 // An ordered list of preprocessing controls defined by #ifdef/#else
279 // directives that are in effect currently is called preprocessing
280 // control stack. It is represented as a vector of PreprocessorControlDesc's.
282 // The control stack is updated according to the following rules:
284 // For each #ifdef we add an element to the control stack.
285 // For each #else we replace the top element with a descriptor
286 // with an inverted IsDefined value.
287 // For each #endif we pop the top element from the control stack.
289 // When CurPtr reaches the current buffer's end, the control stack
290 // must be empty, i.e. #ifdef and the corresponding #endif
291 // must be located in the same file.
292 struct PreprocessorControlDesc
{
293 // Either tgtok::Ifdef or tgtok::Else.
296 // True, if the condition for this directive is true, false - otherwise.
298 // #ifdef NAME : true, if NAME is defined, false - otherwise.
300 // #else : false, if NAME is defined, true - otherwise.
303 // Pointer into CurBuf to the beginning of the preprocessing directive
310 // We want to disallow code like this:
314 // include "file2.td"
320 // To do this, we clear the preprocessing control stack on entry
321 // to each of the included file. PrepIncludeStack is used to store
322 // preprocessing control stacks for the current file and all its
323 // parent files. The back() element is the preprocessing control
324 // stack for the current file.
325 std::vector
<std::unique_ptr
<std::vector
<PreprocessorControlDesc
>>>
328 // Validate that the current preprocessing control stack is empty,
329 // since we are about to exit a file, and pop the include stack.
331 // If IncludeStackMustBeEmpty is true, the include stack must be empty
332 // after the popping, otherwise, the include stack must not be empty
333 // after the popping. Basically, the include stack must be empty
334 // only if we exit the "top-level" file (i.e. finish lexing).
336 // The method returns false, if the current preprocessing control stack
337 // is not empty (e.g. there is an unterminated #ifdef/#else),
339 bool prepExitInclude(bool IncludeStackMustBeEmpty
);
341 // Look ahead for a preprocessing directive starting from CurPtr. The caller
342 // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
343 // a preprocessing directive word followed by a whitespace, then it returns
344 // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
346 // CurPtr is not adjusted by this method.
347 tgtok::TokKind
prepIsDirective() const;
349 // Given a preprocessing token kind, adjusts CurPtr to the end
350 // of the preprocessing directive word. Returns true, unless
351 // an unsupported token kind is passed in.
353 // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
354 // to avoid adjusting CurPtr before we are sure that '#' is followed
355 // by a preprocessing directive. If it is not, then we fall back to
356 // tgtok::paste interpretation of '#'.
357 bool prepEatPreprocessorDirective(tgtok::TokKind Kind
);
359 // The main "exit" point from the token parsing to preprocessor.
361 // The method is called for CurPtr, when prepIsDirective() returns
362 // true. The first parameter matches the result of prepIsDirective(),
363 // denoting the actual preprocessor directive to be processed.
365 // If the preprocessing directive disables the tokens processing, e.g.:
366 // #ifdef NAME // NAME is undefined
367 // then lexPreprocessor() enters the lines-skipping mode.
368 // In this mode, it does not parse any tokens, because the code under
369 // the #ifdef may not even be a correct tablegen code. The preprocessor
370 // looks for lines containing other preprocessing directives, which
371 // may be prepended with whitespaces and C-style comments. If the line
372 // does not contain a preprocessing directive, it is skipped completely.
373 // Otherwise, the preprocessing directive is processed by recursively
374 // calling lexPreprocessor(). The processing of the encountered
375 // preprocessing directives includes updating preprocessing control stack
376 // and adding new macros into DefinedMacros set.
378 // The second parameter controls whether lexPreprocessor() is called from
379 // LexToken() (true) or recursively from lexPreprocessor() (false).
381 // If ReturnNextLiveToken is true, the method returns the next
382 // LEX token following the current directive or following the end
383 // of the disabled preprocessing region corresponding to this directive.
384 // If ReturnNextLiveToken is false, the method returns the first parameter,
385 // unless there were errors encountered in the disabled preprocessing
386 // region - in this case, it returns tgtok::Error.
387 tgtok::TokKind
lexPreprocessor(tgtok::TokKind Kind
,
388 bool ReturnNextLiveToken
= true);
390 // Worker method for lexPreprocessor() to skip lines after some
391 // preprocessing directive up to the buffer end or to the directive
392 // that re-enables token processing. The method returns true
393 // upon processing the next directive that re-enables tokens
394 // processing. False is returned if an error was encountered.
396 // Note that prepSkipRegion() calls lexPreprocessor() to process
397 // encountered preprocessing directives. In this case, the second
398 // parameter to lexPreprocessor() is set to false. Being passed
399 // false ReturnNextLiveToken, lexPreprocessor() must never call
400 // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
401 // to prepSkipRegion() and checking that it is never set to false.
402 bool prepSkipRegion(bool MustNeverBeFalse
);
404 // Lex name of the macro after either #ifdef or #define. We could have used
405 // LexIdentifier(), but it has special handling of "include" word, which
406 // could result in awkward diagnostic errors. Consider:
411 // LexIdentifier() will engage LexInclude(), which will complain about
412 // missing file with name "class". Instead, prepLexMacroName() will treat
413 // "include" as a normal macro name.
415 // On entry, CurPtr points to the end of a preprocessing directive word.
416 // The method allows for whitespaces between the preprocessing directive
417 // and the macro name. The allowed whitespaces are ' ' and '\t'.
419 // If the first non-whitespace symbol after the preprocessing directive
420 // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
421 // the method updates TokStart to the position of the first non-whitespace
422 // symbol, sets CurPtr to the position of the macro name's last symbol,
423 // and returns a string reference to the macro name. Otherwise,
424 // TokStart is set to the first non-whitespace symbol after the preprocessing
425 // directive, and the method returns an empty string reference.
427 // In all cases, TokStart may be used to point to the word following
428 // the preprocessing directive.
429 StringRef
prepLexMacroName();
431 // Skip any whitespaces starting from CurPtr. The method is used
432 // only in the lines-skipping mode to find the first non-whitespace
433 // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
434 // and '\r'. The method skips C-style comments as well, because
435 // it is used to find the beginning of the preprocessing directive.
436 // If we do not handle C-style comments the following code would
437 // result in incorrect detection of a preprocessing directive:
441 // As long as we skip C-style comments, the following code is correctly
442 // recognized as a preprocessing directive:
443 // /* first line comment
444 // second line comment */ #ifdef NAME
446 // The method returns true upon reaching the first non-whitespace symbol
447 // or EOF, CurPtr is set to point to this symbol. The method returns false,
448 // if an error occurred during skipping of a C-style comment.
449 bool prepSkipLineBegin();
451 // Skip any whitespaces or comments after a preprocessing directive.
452 // The method returns true upon reaching either end of the line
453 // or end of the file. If there is a multiline C-style comment
454 // after the preprocessing directive, the method skips
455 // the comment, so the final CurPtr may point to one of the next lines.
456 // The method returns false, if an error occurred during skipping
457 // C- or C++-style comment, or a non-whitespace symbol appears
458 // after the preprocessing directive.
460 // The method maybe called both during lines-skipping and tokens
461 // processing. It actually verifies that only whitespaces or/and
462 // comments follow a preprocessing directive.
464 // After the execution of this mehod, CurPtr points either to new line
465 // symbol, buffer end or non-whitespace symbol following the preprocesing
467 bool prepSkipDirectiveEnd();
469 // Skip all symbols to the end of the line/file.
470 // The method adjusts CurPtr, so that it points to either new line
471 // symbol in the current line or the buffer end.
472 void prepSkipToLineEnd();
474 // Return true, if the current preprocessor control stack is such that
475 // we should allow lexer to process the next token, false - otherwise.
477 // In particular, the method returns true, if all the #ifdef/#else
478 // controls on the stack have their IsDefined member set to true.
479 bool prepIsProcessingEnabled();
481 // Report an error, if we reach EOF with non-empty preprocessing control
482 // stack. This means there is no matching #endif for the previous
484 void prepReportPreprocessorStackError();
487 } // end namespace llvm