1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
10 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/MC/MCAsmMacro.h"
23 /// A callback class which is notified of each comment in an assembly file as
25 class AsmCommentConsumer
{
27 virtual ~AsmCommentConsumer() = default;
29 /// Callback function for when a comment is lexed. Loc is the start of the
30 /// comment text (excluding the comment-start marker). CommentText is the text
31 /// of the comment, excluding the comment start and end markers, and the
32 /// newline for single-line comments.
33 virtual void HandleComment(SMLoc Loc
, StringRef CommentText
) = 0;
37 /// Generic assembler lexer interface, for use by target specific assembly
40 /// The current token, stored in the base class for faster access.
41 SmallVector
<AsmToken
, 1> CurTok
;
43 /// The location and description of the current error
47 protected: // Can only create subclasses.
48 const char *TokStart
= nullptr;
49 bool SkipSpace
= true;
50 bool AllowAtInIdentifier
;
51 bool IsAtStartOfStatement
= true;
52 bool LexMasmIntegers
= false;
53 AsmCommentConsumer
*CommentConsumer
= nullptr;
57 virtual AsmToken
LexToken() = 0;
59 void SetError(SMLoc errLoc
, const std::string
&err
) {
65 MCAsmLexer(const MCAsmLexer
&) = delete;
66 MCAsmLexer
&operator=(const MCAsmLexer
&) = delete;
67 virtual ~MCAsmLexer();
69 /// Consume the next token from the input stream and return it.
71 /// The lexer will continuously return the end-of-file token once the end of
72 /// the main input file has been reached.
73 const AsmToken
&Lex() {
74 assert(!CurTok
.empty());
75 // Mark if we parsing out a EndOfStatement.
76 IsAtStartOfStatement
= CurTok
.front().getKind() == AsmToken::EndOfStatement
;
77 CurTok
.erase(CurTok
.begin());
78 // LexToken may generate multiple tokens via UnLex but will always return
79 // the first one. Place returned value at head of CurTok vector.
81 AsmToken T
= LexToken();
82 CurTok
.insert(CurTok
.begin(), T
);
84 return CurTok
.front();
87 void UnLex(AsmToken
const &Token
) {
88 IsAtStartOfStatement
= false;
89 CurTok
.insert(CurTok
.begin(), Token
);
92 bool isAtStartOfStatement() { return IsAtStartOfStatement
; }
94 virtual StringRef
LexUntilEndOfStatement() = 0;
96 /// Get the current source location.
99 /// Get the current (last) lexed token.
100 const AsmToken
&getTok() const {
104 /// Look ahead at the next token to be lexed.
105 const AsmToken
peekTok(bool ShouldSkipSpace
= true) {
108 MutableArrayRef
<AsmToken
> Buf(Tok
);
109 size_t ReadCount
= peekTokens(Buf
, ShouldSkipSpace
);
111 assert(ReadCount
== 1);
117 /// Look ahead an arbitrary number of tokens.
118 virtual size_t peekTokens(MutableArrayRef
<AsmToken
> Buf
,
119 bool ShouldSkipSpace
= true) = 0;
121 /// Get the current error location
126 /// Get the current error string
127 const std::string
&getErr() {
131 /// Get the kind of current token.
132 AsmToken::TokenKind
getKind() const { return getTok().getKind(); }
134 /// Check if the current token has kind \p K.
135 bool is(AsmToken::TokenKind K
) const { return getTok().is(K
); }
137 /// Check if the current token has kind \p K.
138 bool isNot(AsmToken::TokenKind K
) const { return getTok().isNot(K
); }
140 /// Set whether spaces should be ignored by the lexer
141 void setSkipSpace(bool val
) { SkipSpace
= val
; }
143 bool getAllowAtInIdentifier() { return AllowAtInIdentifier
; }
144 void setAllowAtInIdentifier(bool v
) { AllowAtInIdentifier
= v
; }
146 void setCommentConsumer(AsmCommentConsumer
*CommentConsumer
) {
147 this->CommentConsumer
= CommentConsumer
;
150 /// Set whether to lex masm-style binary and hex literals. They look like
151 /// 0b1101 and 0ABCh respectively.
152 void setLexMasmIntegers(bool V
) { LexMasmIntegers
= V
; }
155 } // end namespace llvm
157 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H