1 //===- Token.cpp - MLIR Token Implementation ------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the Token class for the MLIR textual form.
11 //===----------------------------------------------------------------------===//
14 #include "mlir/Support/LLVM.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/Support/ErrorHandling.h"
24 SMLoc
Token::getLoc() const { return SMLoc::getFromPointer(spelling
.data()); }
26 SMLoc
Token::getEndLoc() const {
27 return SMLoc::getFromPointer(spelling
.data() + spelling
.size());
30 SMRange
Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
32 /// For an integer token, return its value as an unsigned. If it doesn't fit,
33 /// return std::nullopt.
34 std::optional
<unsigned> Token::getUnsignedIntegerValue() const {
35 bool isHex
= spelling
.size() > 1 && spelling
[1] == 'x';
38 if (spelling
.getAsInteger(isHex
? 0 : 10, result
))
43 /// For an integer token, return its value as a uint64_t. If it doesn't fit,
44 /// return std::nullopt.
45 std::optional
<uint64_t> Token::getUInt64IntegerValue(StringRef spelling
) {
46 bool isHex
= spelling
.size() > 1 && spelling
[1] == 'x';
49 if (spelling
.getAsInteger(isHex
? 0 : 10, result
))
54 /// For a floatliteral, return its value as a double. Return std::nullopt if the
55 /// value underflows or overflows.
56 std::optional
<double> Token::getFloatingPointValue() const {
58 if (spelling
.getAsDouble(result
))
63 /// For an inttype token, return its bitwidth.
64 std::optional
<unsigned> Token::getIntTypeBitwidth() const {
65 assert(getKind() == inttype
);
66 unsigned bitwidthStart
= (spelling
[0] == 'i' ? 1 : 2);
68 if (spelling
.drop_front(bitwidthStart
).getAsInteger(10, result
))
73 std::optional
<bool> Token::getIntTypeSignedness() const {
74 assert(getKind() == inttype
);
75 if (spelling
[0] == 'i')
77 if (spelling
[0] == 's')
79 assert(spelling
[0] == 'u');
83 /// Given a token containing a string literal, return its value, including
84 /// removing the quote characters and unescaping the contents of the string. The
85 /// lexer has already verified that this token is valid.
86 std::string
Token::getStringValue() const {
87 assert(getKind() == string
|| getKind() == code_complete
||
88 (getKind() == at_identifier
&& getSpelling()[1] == '"'));
89 // Start by dropping the quotes.
90 StringRef bytes
= getSpelling().drop_front();
91 if (getKind() != Token::code_complete
) {
92 bytes
= bytes
.drop_back();
93 if (getKind() == at_identifier
)
94 bytes
= bytes
.drop_front();
98 result
.reserve(bytes
.size());
99 for (unsigned i
= 0, e
= bytes
.size(); i
!= e
;) {
106 assert(i
+ 1 <= e
&& "invalid string should be caught by lexer");
107 auto c1
= bytes
[i
++];
111 result
.push_back(c1
);
114 result
.push_back('\n');
117 result
.push_back('\t');
123 assert(i
+ 1 <= e
&& "invalid string should be caught by lexer");
124 auto c2
= bytes
[i
++];
126 assert(llvm::isHexDigit(c1
) && llvm::isHexDigit(c2
) && "invalid escape");
127 result
.push_back((llvm::hexDigitValue(c1
) << 4) | llvm::hexDigitValue(c2
));
133 /// Given a token containing a hex string literal, return its value or
134 /// std::nullopt if the token does not contain a valid hex string.
135 std::optional
<std::string
> Token::getHexStringValue() const {
136 assert(getKind() == string
);
138 // Get the internal string data, without the quotes.
139 StringRef bytes
= getSpelling().drop_front().drop_back();
141 // Try to extract the binary data from the hex string. We expect the hex
142 // string to start with `0x` and have an even number of hex nibbles (nibbles
143 // should come in pairs).
145 if (!bytes
.consume_front("0x") || (bytes
.size() & 1) ||
146 !llvm::tryGetFromHex(bytes
, hex
))
151 /// Given a token containing a symbol reference, return the unescaped string
153 std::string
Token::getSymbolReference() const {
154 assert(is(Token::at_identifier
) && "expected valid @-identifier");
155 StringRef nameStr
= getSpelling().drop_front();
157 // Check to see if the reference is a string literal, or a bare identifier.
158 if (nameStr
.front() == '"')
159 return getStringValue();
160 return std::string(nameStr
);
163 /// Given a hash_identifier token like #123, try to parse the number out of
164 /// the identifier, returning std::nullopt if it is a named identifier like #x
165 /// or if the integer doesn't fit.
166 std::optional
<unsigned> Token::getHashIdentifierNumber() const {
167 assert(getKind() == hash_identifier
);
169 if (spelling
.drop_front().getAsInteger(10, result
))
174 /// Given a punctuation or keyword token kind, return the spelling of the
175 /// token as a string. Warning: This will abort on markers, identifiers and
176 /// literal tokens since they have no fixed spelling.
177 StringRef
Token::getTokenSpelling(Kind kind
) {
180 llvm_unreachable("This token kind has no fixed spelling");
181 #define TOK_PUNCTUATION(NAME, SPELLING) \
184 #define TOK_KEYWORD(SPELLING) \
185 case kw_##SPELLING: \
187 #include "TokenKinds.def"
191 /// Return true if this is one of the keyword token kinds (e.g. kw_if).
192 bool Token::isKeyword() const {
196 #define TOK_KEYWORD(SPELLING) \
197 case kw_##SPELLING: \
199 #include "TokenKinds.def"
203 bool Token::isCodeCompletionFor(Kind kind
) const {
204 if (!isCodeCompletion() || spelling
.empty())
208 return spelling
[0] == '"';
209 case Kind::hash_identifier
:
210 return spelling
[0] == '#';
211 case Kind::percent_identifier
:
212 return spelling
[0] == '%';
213 case Kind::caret_identifier
:
214 return spelling
[0] == '^';
215 case Kind::exclamation_identifier
:
216 return spelling
[0] == '!';