mlir/lib/AsmParser/Token.cpp

   1 //===- Token.cpp - MLIR Token Implementation ------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the Token class for the MLIR textual form.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "Token.h"
  14 #include "mlir/Support/LLVM.h"
  15 #include "llvm/ADT/StringExtras.h"
  16 #include "llvm/Support/ErrorHandling.h"
  17 #include <cassert>
  18 #include <cstdint>
  19 #include <optional>
  20 #include <string>
  21
  22 using namespace mlir;
  23
  24 SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
  25
  26 SMLoc Token::getEndLoc() const {
  27   return SMLoc::getFromPointer(spelling.data() + spelling.size());
  28 }
  29
  30 SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
  31
  32 /// For an integer token, return its value as an unsigned.  If it doesn't fit,
  33 /// return std::nullopt.
  34 std::optional<unsigned> Token::getUnsignedIntegerValue() const {
  35   bool isHex = spelling.size() > 1 && spelling[1] == 'x';
  36
  37   unsigned result = 0;
  38   if (spelling.getAsInteger(isHex ? 0 : 10, result))
  39     return std::nullopt;
  40   return result;
  41 }
  42
  43 /// For an integer token, return its value as a uint64_t.  If it doesn't fit,
  44 /// return std::nullopt.
  45 std::optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
  46   bool isHex = spelling.size() > 1 && spelling[1] == 'x';
  47
  48   uint64_t result = 0;
  49   if (spelling.getAsInteger(isHex ? 0 : 10, result))
  50     return std::nullopt;
  51   return result;
  52 }
  53
  54 /// For a floatliteral, return its value as a double. Return std::nullopt if the
  55 /// value underflows or overflows.
  56 std::optional<double> Token::getFloatingPointValue() const {
  57   double result = 0;
  58   if (spelling.getAsDouble(result))
  59     return std::nullopt;
  60   return result;
  61 }
  62
  63 /// For an inttype token, return its bitwidth.
  64 std::optional<unsigned> Token::getIntTypeBitwidth() const {
  65   assert(getKind() == inttype);
  66   unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
  67   unsigned result = 0;
  68   if (spelling.drop_front(bitwidthStart).getAsInteger(10, result))
  69     return std::nullopt;
  70   return result;
  71 }
  72
  73 std::optional<bool> Token::getIntTypeSignedness() const {
  74   assert(getKind() == inttype);
  75   if (spelling[0] == 'i')
  76     return std::nullopt;
  77   if (spelling[0] == 's')
  78     return true;
  79   assert(spelling[0] == 'u');
  80   return false;
  81 }
  82
  83 /// Given a token containing a string literal, return its value, including
  84 /// removing the quote characters and unescaping the contents of the string. The
  85 /// lexer has already verified that this token is valid.
  86 std::string Token::getStringValue() const {
  87   assert(getKind() == string || getKind() == code_complete ||
  88          (getKind() == at_identifier && getSpelling()[1] == '"'));
  89   // Start by dropping the quotes.
  90   StringRef bytes = getSpelling().drop_front();
  91   if (getKind() != Token::code_complete) {
  92     bytes = bytes.drop_back();
  93     if (getKind() == at_identifier)
  94       bytes = bytes.drop_front();
  95   }
  96
  97   std::string result;
  98   result.reserve(bytes.size());
  99   for (unsigned i = 0, e = bytes.size(); i != e;) {
 100     auto c = bytes[i++];
 101     if (c != '\\') {
 102       result.push_back(c);
 103       continue;
 104     }
 105
 106     assert(i + 1 <= e && "invalid string should be caught by lexer");
 107     auto c1 = bytes[i++];
 108     switch (c1) {
 109     case '"':
 110     case '\\':
 111       result.push_back(c1);
 112       continue;
 113     case 'n':
 114       result.push_back('\n');
 115       continue;
 116     case 't':
 117       result.push_back('\t');
 118       continue;
 119     default:
 120       break;
 121     }
 122
 123     assert(i + 1 <= e && "invalid string should be caught by lexer");
 124     auto c2 = bytes[i++];
 125
 126     assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
 127     result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
 128   }
 129
 130   return result;
 131 }
 132
 133 /// Given a token containing a hex string literal, return its value or
 134 /// std::nullopt if the token does not contain a valid hex string.
 135 std::optional<std::string> Token::getHexStringValue() const {
 136   assert(getKind() == string);
 137
 138   // Get the internal string data, without the quotes.
 139   StringRef bytes = getSpelling().drop_front().drop_back();
 140
 141   // Try to extract the binary data from the hex string. We expect the hex
 142   // string to start with `0x` and have an even number of hex nibbles (nibbles
 143   // should come in pairs).
 144   std::string hex;
 145   if (!bytes.consume_front("0x") || (bytes.size() & 1) ||
 146       !llvm::tryGetFromHex(bytes, hex))
 147     return std::nullopt;
 148   return hex;
 149 }
 150
 151 /// Given a token containing a symbol reference, return the unescaped string
 152 /// value.
 153 std::string Token::getSymbolReference() const {
 154   assert(is(Token::at_identifier) && "expected valid @-identifier");
 155   StringRef nameStr = getSpelling().drop_front();
 156
 157   // Check to see if the reference is a string literal, or a bare identifier.
 158   if (nameStr.front() == '"')
 159     return getStringValue();
 160   return std::string(nameStr);
 161 }
 162
 163 /// Given a hash_identifier token like #123, try to parse the number out of
 164 /// the identifier, returning std::nullopt if it is a named identifier like #x
 165 /// or if the integer doesn't fit.
 166 std::optional<unsigned> Token::getHashIdentifierNumber() const {
 167   assert(getKind() == hash_identifier);
 168   unsigned result = 0;
 169   if (spelling.drop_front().getAsInteger(10, result))
 170     return std::nullopt;
 171   return result;
 172 }
 173
 174 /// Given a punctuation or keyword token kind, return the spelling of the
 175 /// token as a string.  Warning: This will abort on markers, identifiers and
 176 /// literal tokens since they have no fixed spelling.
 177 StringRef Token::getTokenSpelling(Kind kind) {
 178   switch (kind) {
 179   default:
 180     llvm_unreachable("This token kind has no fixed spelling");
 181 #define TOK_PUNCTUATION(NAME, SPELLING)                                        \
 182   case NAME:                                                                   \
 183     return SPELLING;
 184 #define TOK_KEYWORD(SPELLING)                                                  \
 185   case kw_##SPELLING:                                                          \
 186     return #SPELLING;
 187 #include "TokenKinds.def"
 188   }
 189 }
 190
 191 /// Return true if this is one of the keyword token kinds (e.g. kw_if).
 192 bool Token::isKeyword() const {
 193   switch (kind) {
 194   default:
 195     return false;
 196 #define TOK_KEYWORD(SPELLING)                                                  \
 197   case kw_##SPELLING:                                                          \
 198     return true;
 199 #include "TokenKinds.def"
 200   }
 201 }
 202
 203 bool Token::isCodeCompletionFor(Kind kind) const {
 204   if (!isCodeCompletion() || spelling.empty())
 205     return false;
 206   switch (kind) {
 207   case Kind::string:
 208     return spelling[0] == '"';
 209   case Kind::hash_identifier:
 210     return spelling[0] == '#';
 211   case Kind::percent_identifier:
 212     return spelling[0] == '%';
 213   case Kind::caret_identifier:
 214     return spelling[0] == '^';
 215   case Kind::exclamation_identifier:
 216     return spelling[0] == '!';
 217   default:
 218     return false;
 219   }
 220 }