[LLVM] Fix Maintainers.md formatting (NFC)
[llvm-project.git] / mlir / lib / Tools / PDLL / Parser / Lexer.h
blobcd9c2b770f69a82826085c4b0565c957b54e7b58
1 //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_
10 #define LIB_TOOLS_PDLL_PARSER_LEXER_H_
12 #include "mlir/Support/LLVM.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/SMLoc.h"
16 namespace llvm {
17 class SourceMgr;
18 } // namespace llvm
20 namespace mlir {
21 namespace pdll {
22 class CodeCompleteContext;
24 namespace ast {
25 class DiagnosticEngine;
26 } // namespace ast
28 //===----------------------------------------------------------------------===//
29 // Token
30 //===----------------------------------------------------------------------===//
32 class Token {
33 public:
34 enum Kind {
35 /// Markers.
36 eof,
37 error,
38 /// Token signifying a code completion location.
39 code_complete,
40 /// Token signifying a code completion location within a string.
41 code_complete_string,
43 /// Keywords.
44 KW_BEGIN,
45 /// Dependent keywords, i.e. those that are treated as keywords depending on
46 /// the current parser context.
47 KW_DEPENDENT_BEGIN,
48 kw_attr,
49 kw_op,
50 kw_type,
51 KW_DEPENDENT_END,
53 /// General keywords.
54 kw_Attr,
55 kw_erase,
56 kw_let,
57 kw_Constraint,
58 kw_not,
59 kw_Op,
60 kw_OpName,
61 kw_Pattern,
62 kw_replace,
63 kw_return,
64 kw_rewrite,
65 kw_Rewrite,
66 kw_Type,
67 kw_TypeRange,
68 kw_Value,
69 kw_ValueRange,
70 kw_with,
71 KW_END,
73 /// Punctuation.
74 arrow,
75 colon,
76 comma,
77 dot,
78 equal,
79 equal_arrow,
80 semicolon,
81 /// Paired punctuation.
82 less,
83 greater,
84 l_brace,
85 r_brace,
86 l_paren,
87 r_paren,
88 l_square,
89 r_square,
90 underscore,
92 /// Tokens.
93 directive,
94 identifier,
95 integer,
96 string_block,
97 string
99 Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
101 /// Given a token containing a string literal, return its value, including
102 /// removing the quote characters and unescaping the contents of the string.
103 std::string getStringValue() const;
105 /// Returns true if the current token is a string literal.
106 bool isString() const { return isAny(Token::string, Token::string_block); }
108 /// Returns true if the current token is a keyword.
109 bool isKeyword() const {
110 return kind > Token::KW_BEGIN && kind < Token::KW_END;
113 /// Returns true if the current token is a keyword in a dependent context, and
114 /// in any other situation (e.g. variable names) may be treated as an
115 /// identifier.
116 bool isDependentKeyword() const {
117 return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END;
120 /// Return the bytes that make up this token.
121 StringRef getSpelling() const { return spelling; }
123 /// Return the kind of this token.
124 Kind getKind() const { return kind; }
126 /// Return true if this token is one of the specified kinds.
127 bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
128 template <typename... T>
129 bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
130 return is(k1) || isAny(k2, k3, others...);
133 /// Return if the token does not have the given kind.
134 bool isNot(Kind k) const { return k != kind; }
135 template <typename... T>
136 bool isNot(Kind k1, Kind k2, T... others) const {
137 return !isAny(k1, k2, others...);
140 /// Return if the token has the given kind.
141 bool is(Kind k) const { return kind == k; }
143 /// Return a location for the start of this token.
144 SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); }
145 /// Return a location at the end of this token.
146 SMLoc getEndLoc() const {
147 return SMLoc::getFromPointer(spelling.data() + spelling.size());
149 /// Return a location for the range of this token.
150 SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); }
152 private:
153 /// Discriminator that indicates the kind of token this is.
154 Kind kind;
156 /// A reference to the entire token contents; this is always a pointer into
157 /// a memory buffer owned by the source manager.
158 StringRef spelling;
161 //===----------------------------------------------------------------------===//
162 // Lexer
163 //===----------------------------------------------------------------------===//
165 class Lexer {
166 public:
167 Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
168 CodeCompleteContext *codeCompleteContext);
169 ~Lexer();
171 /// Return a reference to the source manager used by the lexer.
172 llvm::SourceMgr &getSourceMgr() { return srcMgr; }
174 /// Return a reference to the diagnostic engine used by the lexer.
175 ast::DiagnosticEngine &getDiagEngine() { return diagEngine; }
177 /// Push an include of the given file. This will cause the lexer to start
178 /// processing the provided file. Returns failure if the file could not be
179 /// opened, success otherwise.
180 LogicalResult pushInclude(StringRef filename, SMRange includeLoc);
182 /// Lex the next token and return it.
183 Token lexToken();
185 /// Change the position of the lexer cursor. The next token we lex will start
186 /// at the designated point in the input.
187 void resetPointer(const char *newPointer) { curPtr = newPointer; }
189 /// Emit an error to the lexer with the given location and message.
190 Token emitError(SMRange loc, const Twine &msg);
191 Token emitError(const char *loc, const Twine &msg);
192 Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
193 const Twine &note);
195 private:
196 Token formToken(Token::Kind kind, const char *tokStart) {
197 return Token(kind, StringRef(tokStart, curPtr - tokStart));
200 /// Return the next character in the stream.
201 int getNextChar();
203 /// Lex methods.
204 void lexComment();
205 Token lexDirective(const char *tokStart);
206 Token lexIdentifier(const char *tokStart);
207 Token lexNumber(const char *tokStart);
208 Token lexString(const char *tokStart, bool isStringBlock);
210 llvm::SourceMgr &srcMgr;
211 int curBufferID;
212 StringRef curBuffer;
213 const char *curPtr;
215 /// The engine used to emit diagnostics during lexing/parsing.
216 ast::DiagnosticEngine &diagEngine;
218 /// A flag indicating if we added a default diagnostic handler to the provided
219 /// diagEngine.
220 bool addedHandlerToDiagEngine;
222 /// The optional code completion point within the input file.
223 const char *codeCompletionLocation;
225 } // namespace pdll
226 } // namespace mlir
228 #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_