lib/MC/MCParser/AsmLexer.cpp

   1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This class implements the lexer for assembly files.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/MC/MCParser/AsmLexer.h"
  14 #include "llvm/ADT/APInt.h"
  15 #include "llvm/ADT/ArrayRef.h"
  16 #include "llvm/ADT/StringExtras.h"
  17 #include "llvm/ADT/StringRef.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/MC/MCAsmInfo.h"
  20 #include "llvm/MC/MCParser/MCAsmLexer.h"
  21 #include "llvm/Support/SMLoc.h"
  22 #include "llvm/Support/SaveAndRestore.h"
  23 #include <cassert>
  24 #include <cctype>
  25 #include <cstdio>
  26 #include <cstring>
  27 #include <string>
  28 #include <tuple>
  29 #include <utility>
  30
  31 using namespace llvm;
  32
  33 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
  34   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
  35 }
  36
  37 AsmLexer::~AsmLexer() = default;
  38
  39 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
  40   CurBuf = Buf;
  41
  42   if (ptr)
  43     CurPtr = ptr;
  44   else
  45     CurPtr = CurBuf.begin();
  46
  47   TokStart = nullptr;
  48 }
  49
  50 /// ReturnError - Set the error to the specified string at the specified
  51 /// location.  This is defined to always return AsmToken::Error.
  52 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
  53   SetError(SMLoc::getFromPointer(Loc), Msg);
  54
  55   return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
  56 }
  57
  58 int AsmLexer::getNextChar() {
  59   if (CurPtr == CurBuf.end())
  60     return EOF;
  61   return (unsigned char)*CurPtr++;
  62 }
  63
  64 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
  65 ///
  66 /// The leading integral digit sequence and dot should have already been
  67 /// consumed, some or all of the fractional digit sequence *can* have been
  68 /// consumed.
  69 AsmToken AsmLexer::LexFloatLiteral() {
  70   // Skip the fractional digit sequence.
  71   while (isDigit(*CurPtr))
  72     ++CurPtr;
  73
  74   // Check for exponent; we intentionally accept a slighlty wider set of
  75   // literals here and rely on the upstream client to reject invalid ones (e.g.,
  76   // "1e+").
  77   if (*CurPtr == 'e' || *CurPtr == 'E') {
  78     ++CurPtr;
  79     if (*CurPtr == '-' || *CurPtr == '+')
  80       ++CurPtr;
  81     while (isDigit(*CurPtr))
  82       ++CurPtr;
  83   }
  84
  85   return AsmToken(AsmToken::Real,
  86                   StringRef(TokStart, CurPtr - TokStart));
  87 }
  88
  89 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
  90 /// while making sure there are enough actual digits around for the constant to
  91 /// be valid.
  92 ///
  93 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
  94 /// before we get here.
  95 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
  96   assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
  97          "unexpected parse state in floating hex");
  98   bool NoFracDigits = true;
  99
 100   // Skip the fractional part if there is one
 101   if (*CurPtr == '.') {
 102     ++CurPtr;
 103
 104     const char *FracStart = CurPtr;
 105     while (isHexDigit(*CurPtr))
 106       ++CurPtr;
 107
 108     NoFracDigits = CurPtr == FracStart;
 109   }
 110
 111   if (NoIntDigits && NoFracDigits)
 112     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 113                                  "expected at least one significand digit");
 114
 115   // Make sure we do have some kind of proper exponent part
 116   if (*CurPtr != 'p' && *CurPtr != 'P')
 117     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 118                                  "expected exponent part 'p'");
 119   ++CurPtr;
 120
 121   if (*CurPtr == '+' || *CurPtr == '-')
 122     ++CurPtr;
 123
 124   // N.b. exponent digits are *not* hex
 125   const char *ExpStart = CurPtr;
 126   while (isDigit(*CurPtr))
 127     ++CurPtr;
 128
 129   if (CurPtr == ExpStart)
 130     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 131                                  "expected at least one exponent digit");
 132
 133   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
 134 }
 135
 136 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
 137 static bool IsIdentifierChar(char c, bool AllowAt) {
 138   return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
 139          (c == '@' && AllowAt) || c == '?';
 140 }
 141
 142 AsmToken AsmLexer::LexIdentifier() {
 143   // Check for floating point literals.
 144   if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
 145     // Disambiguate a .1243foo identifier from a floating literal.
 146     while (isDigit(*CurPtr))
 147       ++CurPtr;
 148     if (*CurPtr == 'e' || *CurPtr == 'E' ||
 149         !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
 150       return LexFloatLiteral();
 151   }
 152
 153   while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
 154     ++CurPtr;
 155
 156   // Handle . as a special case.
 157   if (CurPtr == TokStart+1 && TokStart[0] == '.')
 158     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
 159
 160   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
 161 }
 162
 163 /// LexSlash: Slash: /
 164 ///           C-Style Comment: /* ... */
 165 AsmToken AsmLexer::LexSlash() {
 166   switch (*CurPtr) {
 167   case '*':
 168     IsAtStartOfStatement = false;
 169     break; // C style comment.
 170   case '/':
 171     ++CurPtr;
 172     return LexLineComment();
 173   default:
 174     IsAtStartOfStatement = false;
 175     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
 176   }
 177
 178   // C Style comment.
 179   ++CurPtr;  // skip the star.
 180   const char *CommentTextStart = CurPtr;
 181   while (CurPtr != CurBuf.end()) {
 182     switch (*CurPtr++) {
 183     case '*':
 184       // End of the comment?
 185       if (*CurPtr != '/')
 186         break;
 187       // If we have a CommentConsumer, notify it about the comment.
 188       if (CommentConsumer) {
 189         CommentConsumer->HandleComment(
 190             SMLoc::getFromPointer(CommentTextStart),
 191             StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
 192       }
 193       ++CurPtr;   // End the */.
 194       return AsmToken(AsmToken::Comment,
 195                       StringRef(TokStart, CurPtr - TokStart));
 196     }
 197   }
 198   return ReturnError(TokStart, "unterminated comment");
 199 }
 200
 201 /// LexLineComment: Comment: #[^\n]*
 202 ///                        : //[^\n]*
 203 AsmToken AsmLexer::LexLineComment() {
 204   // Mark This as an end of statement with a body of the
 205   // comment. While it would be nicer to leave this two tokens,
 206   // backwards compatability with TargetParsers makes keeping this in this form
 207   // better.
 208   const char *CommentTextStart = CurPtr;
 209   int CurChar = getNextChar();
 210   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
 211     CurChar = getNextChar();
 212   if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
 213     ++CurPtr;
 214
 215   // If we have a CommentConsumer, notify it about the comment.
 216   if (CommentConsumer) {
 217     CommentConsumer->HandleComment(
 218         SMLoc::getFromPointer(CommentTextStart),
 219         StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
 220   }
 221
 222   IsAtStartOfLine = true;
 223   // This is a whole line comment. leave newline
 224   if (IsAtStartOfStatement)
 225     return AsmToken(AsmToken::EndOfStatement,
 226                     StringRef(TokStart, CurPtr - TokStart));
 227   IsAtStartOfStatement = true;
 228
 229   return AsmToken(AsmToken::EndOfStatement,
 230                   StringRef(TokStart, CurPtr - 1 - TokStart));
 231 }
 232
 233 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 234   // Skip ULL, UL, U, L and LL suffices.
 235   if (CurPtr[0] == 'U')
 236     ++CurPtr;
 237   if (CurPtr[0] == 'L')
 238     ++CurPtr;
 239   if (CurPtr[0] == 'L')
 240     ++CurPtr;
 241 }
 242
 243 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
 244 // integer as a hexadecimal, possibly with leading zeroes.
 245 static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
 246                                bool LexHex) {
 247   const char *FirstNonDec = nullptr;
 248   const char *LookAhead = CurPtr;
 249   while (true) {
 250     if (isDigit(*LookAhead)) {
 251       ++LookAhead;
 252     } else {
 253       if (!FirstNonDec)
 254         FirstNonDec = LookAhead;
 255
 256       // Keep going if we are looking for a 'h' suffix.
 257       if (LexHex && isHexDigit(*LookAhead))
 258         ++LookAhead;
 259       else
 260         break;
 261     }
 262   }
 263   bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
 264   CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
 265   if (isHex)
 266     return 16;
 267   return DefaultRadix;
 268 }
 269
 270 static AsmToken intToken(StringRef Ref, APInt &Value)
 271 {
 272   if (Value.isIntN(64))
 273     return AsmToken(AsmToken::Integer, Ref, Value);
 274   return AsmToken(AsmToken::BigNum, Ref, Value);
 275 }
 276
 277 /// LexDigit: First character is [0-9].
 278 ///   Local Label: [0-9][:]
 279 ///   Forward/Backward Label: [0-9][fb]
 280 ///   Binary integer: 0b[01]+
 281 ///   Octal integer: 0[0-7]+
 282 ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
 283 ///   Decimal integer: [1-9][0-9]*
 284 AsmToken AsmLexer::LexDigit() {
 285   // MASM-flavor binary integer: [01]+[bB]
 286   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
 287   if (LexMasmIntegers && isdigit(CurPtr[-1])) {
 288     const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
 289                                    CurPtr - 1 : nullptr;
 290     const char *OldCurPtr = CurPtr;
 291     while (isHexDigit(*CurPtr)) {
 292       if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
 293         FirstNonBinary = CurPtr;
 294       ++CurPtr;
 295     }
 296
 297     unsigned Radix = 0;
 298     if (*CurPtr == 'h' || *CurPtr == 'H') {
 299       // hexadecimal number
 300       ++CurPtr;
 301       Radix = 16;
 302     } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
 303                (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
 304       Radix = 2;
 305
 306     if (Radix == 2 || Radix == 16) {
 307       StringRef Result(TokStart, CurPtr - TokStart);
 308       APInt Value(128, 0, true);
 309
 310       if (Result.drop_back().getAsInteger(Radix, Value))
 311         return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
 312                              "invalid hexdecimal number");
 313
 314       // MSVC accepts and ignores type suffices on integer literals.
 315       SkipIgnoredIntegerSuffix(CurPtr);
 316
 317       return intToken(Result, Value);
 318    }
 319
 320     // octal/decimal integers, or floating point numbers, fall through
 321     CurPtr = OldCurPtr;
 322   }
 323
 324   // Decimal integer: [1-9][0-9]*
 325   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
 326     unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
 327     bool isHex = Radix == 16;
 328     // Check for floating point literals.
 329     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
 330       ++CurPtr;
 331       return LexFloatLiteral();
 332     }
 333
 334     StringRef Result(TokStart, CurPtr - TokStart);
 335
 336     APInt Value(128, 0, true);
 337     if (Result.getAsInteger(Radix, Value))
 338       return ReturnError(TokStart, !isHex ? "invalid decimal number" :
 339                            "invalid hexdecimal number");
 340
 341     // Consume the [hH].
 342     if (LexMasmIntegers && Radix == 16)
 343       ++CurPtr;
 344
 345     // The darwin/x86 (and x86-64) assembler accepts and ignores type
 346     // suffices on integer literals.
 347     SkipIgnoredIntegerSuffix(CurPtr);
 348
 349     return intToken(Result, Value);
 350   }
 351
 352   if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
 353     ++CurPtr;
 354     // See if we actually have "0b" as part of something like "jmp 0b\n"
 355     if (!isDigit(CurPtr[0])) {
 356       --CurPtr;
 357       StringRef Result(TokStart, CurPtr - TokStart);
 358       return AsmToken(AsmToken::Integer, Result, 0);
 359     }
 360     const char *NumStart = CurPtr;
 361     while (CurPtr[0] == '0' || CurPtr[0] == '1')
 362       ++CurPtr;
 363
 364     // Requires at least one binary digit.
 365     if (CurPtr == NumStart)
 366       return ReturnError(TokStart, "invalid binary number");
 367
 368     StringRef Result(TokStart, CurPtr - TokStart);
 369
 370     APInt Value(128, 0, true);
 371     if (Result.substr(2).getAsInteger(2, Value))
 372       return ReturnError(TokStart, "invalid binary number");
 373
 374     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 375     // suffixes on integer literals.
 376     SkipIgnoredIntegerSuffix(CurPtr);
 377
 378     return intToken(Result, Value);
 379   }
 380
 381   if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
 382     ++CurPtr;
 383     const char *NumStart = CurPtr;
 384     while (isHexDigit(CurPtr[0]))
 385       ++CurPtr;
 386
 387     // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
 388     // diagnosed by LexHexFloatLiteral).
 389     if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
 390       return LexHexFloatLiteral(NumStart == CurPtr);
 391
 392     // Otherwise requires at least one hex digit.
 393     if (CurPtr == NumStart)
 394       return ReturnError(CurPtr-2, "invalid hexadecimal number");
 395
 396     APInt Result(128, 0);
 397     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
 398       return ReturnError(TokStart, "invalid hexadecimal number");
 399
 400     // Consume the optional [hH].
 401     if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
 402       ++CurPtr;
 403
 404     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 405     // suffixes on integer literals.
 406     SkipIgnoredIntegerSuffix(CurPtr);
 407
 408     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
 409   }
 410
 411   // Either octal or hexadecimal.
 412   APInt Value(128, 0, true);
 413   unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
 414   bool isHex = Radix == 16;
 415   StringRef Result(TokStart, CurPtr - TokStart);
 416   if (Result.getAsInteger(Radix, Value))
 417     return ReturnError(TokStart, !isHex ? "invalid octal number" :
 418                        "invalid hexdecimal number");
 419
 420   // Consume the [hH].
 421   if (Radix == 16)
 422     ++CurPtr;
 423
 424   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 425   // suffixes on integer literals.
 426   SkipIgnoredIntegerSuffix(CurPtr);
 427
 428   return intToken(Result, Value);
 429 }
 430
 431 /// LexSingleQuote: Integer: 'b'
 432 AsmToken AsmLexer::LexSingleQuote() {
 433   int CurChar = getNextChar();
 434
 435   if (CurChar == '\\')
 436     CurChar = getNextChar();
 437
 438   if (CurChar == EOF)
 439     return ReturnError(TokStart, "unterminated single quote");
 440
 441   CurChar = getNextChar();
 442
 443   if (CurChar != '\'')
 444     return ReturnError(TokStart, "single quote way too long");
 445
 446   // The idea here being that 'c' is basically just an integral
 447   // constant.
 448   StringRef Res = StringRef(TokStart,CurPtr - TokStart);
 449   long long Value;
 450
 451   if (Res.startswith("\'\\")) {
 452     char theChar = Res[2];
 453     switch (theChar) {
 454       default: Value = theChar; break;
 455       case '\'': Value = '\''; break;
 456       case 't': Value = '\t'; break;
 457       case 'n': Value = '\n'; break;
 458       case 'b': Value = '\b'; break;
 459     }
 460   } else
 461     Value = TokStart[1];
 462
 463   return AsmToken(AsmToken::Integer, Res, Value);
 464 }
 465
 466 /// LexQuote: String: "..."
 467 AsmToken AsmLexer::LexQuote() {
 468   int CurChar = getNextChar();
 469   // TODO: does gas allow multiline string constants?
 470   while (CurChar != '"') {
 471     if (CurChar == '\\') {
 472       // Allow \", etc.
 473       CurChar = getNextChar();
 474     }
 475
 476     if (CurChar == EOF)
 477       return ReturnError(TokStart, "unterminated string constant");
 478
 479     CurChar = getNextChar();
 480   }
 481
 482   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
 483 }
 484
 485 StringRef AsmLexer::LexUntilEndOfStatement() {
 486   TokStart = CurPtr;
 487
 488   while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
 489          !isAtStatementSeparator(CurPtr) && // End of statement marker.
 490          *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
 491     ++CurPtr;
 492   }
 493   return StringRef(TokStart, CurPtr-TokStart);
 494 }
 495
 496 StringRef AsmLexer::LexUntilEndOfLine() {
 497   TokStart = CurPtr;
 498
 499   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
 500     ++CurPtr;
 501   }
 502   return StringRef(TokStart, CurPtr-TokStart);
 503 }
 504
 505 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
 506                             bool ShouldSkipSpace) {
 507   SaveAndRestore<const char *> SavedTokenStart(TokStart);
 508   SaveAndRestore<const char *> SavedCurPtr(CurPtr);
 509   SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
 510   SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
 511   SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
 512   SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
 513   std::string SavedErr = getErr();
 514   SMLoc SavedErrLoc = getErrLoc();
 515
 516   size_t ReadCount;
 517   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
 518     AsmToken Token = LexToken();
 519
 520     Buf[ReadCount] = Token;
 521
 522     if (Token.is(AsmToken::Eof))
 523       break;
 524   }
 525
 526   SetError(SavedErrLoc, SavedErr);
 527   return ReadCount;
 528 }
 529
 530 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
 531   StringRef CommentString = MAI.getCommentString();
 532
 533   if (CommentString.size() == 1)
 534     return CommentString[0] == Ptr[0];
 535
 536   // Allow # preprocessor commments also be counted as comments for "##" cases
 537   if (CommentString[1] == '#')
 538     return CommentString[0] == Ptr[0];
 539
 540   return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
 541 }
 542
 543 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
 544   return strncmp(Ptr, MAI.getSeparatorString(),
 545                  strlen(MAI.getSeparatorString())) == 0;
 546 }
 547
 548 AsmToken AsmLexer::LexToken() {
 549   TokStart = CurPtr;
 550   // This always consumes at least one character.
 551   int CurChar = getNextChar();
 552
 553   if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
 554     // If this starts with a '#', this may be a cpp
 555     // hash directive and otherwise a line comment.
 556     AsmToken TokenBuf[2];
 557     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
 558     size_t num = peekTokens(Buf, true);
 559     // There cannot be a space preceeding this
 560     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
 561         TokenBuf[1].is(AsmToken::String)) {
 562       CurPtr = TokStart; // reset curPtr;
 563       StringRef s = LexUntilEndOfLine();
 564       UnLex(TokenBuf[1]);
 565       UnLex(TokenBuf[0]);
 566       return AsmToken(AsmToken::HashDirective, s);
 567     }
 568     return LexLineComment();
 569   }
 570
 571   if (isAtStartOfComment(TokStart))
 572     return LexLineComment();
 573
 574   if (isAtStatementSeparator(TokStart)) {
 575     CurPtr += strlen(MAI.getSeparatorString()) - 1;
 576     IsAtStartOfLine = true;
 577     IsAtStartOfStatement = true;
 578     return AsmToken(AsmToken::EndOfStatement,
 579                     StringRef(TokStart, strlen(MAI.getSeparatorString())));
 580   }
 581
 582   // If we're missing a newline at EOF, make sure we still get an
 583   // EndOfStatement token before the Eof token.
 584   if (CurChar == EOF && !IsAtStartOfStatement) {
 585     IsAtStartOfLine = true;
 586     IsAtStartOfStatement = true;
 587     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
 588   }
 589   IsAtStartOfLine = false;
 590   bool OldIsAtStartOfStatement = IsAtStartOfStatement;
 591   IsAtStartOfStatement = false;
 592   switch (CurChar) {
 593   default:
 594     // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
 595     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
 596       return LexIdentifier();
 597
 598     // Unknown character, emit an error.
 599     return ReturnError(TokStart, "invalid character in input");
 600   case EOF:
 601     IsAtStartOfLine = true;
 602     IsAtStartOfStatement = true;
 603     return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
 604   case 0:
 605   case ' ':
 606   case '\t':
 607     IsAtStartOfStatement = OldIsAtStartOfStatement;
 608     while (*CurPtr == ' ' || *CurPtr == '\t')
 609       CurPtr++;
 610     if (SkipSpace)
 611       return LexToken(); // Ignore whitespace.
 612     else
 613       return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
 614   case '\r': {
 615     IsAtStartOfLine = true;
 616     IsAtStartOfStatement = true;
 617     // If this is a CR followed by LF, treat that as one token.
 618     if (CurPtr != CurBuf.end() && *CurPtr == '\n')
 619       ++CurPtr;
 620     return AsmToken(AsmToken::EndOfStatement,
 621                     StringRef(TokStart, CurPtr - TokStart));
 622   }
 623   case '\n':
 624     IsAtStartOfLine = true;
 625     IsAtStartOfStatement = true;
 626     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
 627   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
 628   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
 629   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
 630   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
 631   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
 632   case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
 633   case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
 634   case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
 635   case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
 636   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
 637   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
 638   case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
 639   case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
 640   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
 641   case '=':
 642     if (*CurPtr == '=') {
 643       ++CurPtr;
 644       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
 645     }
 646     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
 647   case '-':
 648     if (*CurPtr == '>') {
 649       ++CurPtr;
 650       return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
 651     }
 652     return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
 653   case '|':
 654     if (*CurPtr == '|') {
 655       ++CurPtr;
 656       return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
 657     }
 658     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
 659   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
 660   case '&':
 661     if (*CurPtr == '&') {
 662       ++CurPtr;
 663       return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
 664     }
 665     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
 666   case '!':
 667     if (*CurPtr == '=') {
 668       ++CurPtr;
 669       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
 670     }
 671     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
 672   case '%':
 673     if (MAI.hasMipsExpressions()) {
 674       AsmToken::TokenKind Operator;
 675       unsigned OperatorLength;
 676
 677       std::tie(Operator, OperatorLength) =
 678           StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
 679               StringRef(CurPtr))
 680               .StartsWith("call16", {AsmToken::PercentCall16, 7})
 681               .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
 682               .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
 683               .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
 684               .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
 685               .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
 686               .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
 687               .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
 688               .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
 689               .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
 690               .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
 691               .StartsWith("got", {AsmToken::PercentGot, 4})
 692               .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
 693               .StartsWith("higher", {AsmToken::PercentHigher, 7})
 694               .StartsWith("highest", {AsmToken::PercentHighest, 8})
 695               .StartsWith("hi", {AsmToken::PercentHi, 3})
 696               .StartsWith("lo", {AsmToken::PercentLo, 3})
 697               .StartsWith("neg", {AsmToken::PercentNeg, 4})
 698               .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
 699               .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
 700               .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
 701               .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
 702               .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
 703               .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
 704               .Default({AsmToken::Percent, 1});
 705
 706       if (Operator != AsmToken::Percent) {
 707         CurPtr += OperatorLength - 1;
 708         return AsmToken(Operator, StringRef(TokStart, OperatorLength));
 709       }
 710     }
 711     return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
 712   case '/':
 713     IsAtStartOfStatement = OldIsAtStartOfStatement;
 714     return LexSlash();
 715   case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
 716   case '\'': return LexSingleQuote();
 717   case '"': return LexQuote();
 718   case '0': case '1': case '2': case '3': case '4':
 719   case '5': case '6': case '7': case '8': case '9':
 720     return LexDigit();
 721   case '<':
 722     switch (*CurPtr) {
 723     case '<':
 724       ++CurPtr;
 725       return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
 726     case '=':
 727       ++CurPtr;
 728       return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
 729     case '>':
 730       ++CurPtr;
 731       return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
 732     default:
 733       return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
 734     }
 735   case '>':
 736     switch (*CurPtr) {
 737     case '>':
 738       ++CurPtr;
 739       return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
 740     case '=':
 741       ++CurPtr;
 742       return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
 743     default:
 744       return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
 745     }
 746
 747   // TODO: Quoted identifiers (objc methods etc)
 748   // local labels: [0-9][:]
 749   // Forward/backward labels: [0-9][fb]
 750   // Integers, fp constants, character constants.
 751   }
 752 }