clang/unittests/Tooling/Syntax/TokensTest.cpp

   1 //===- TokensTest.cpp -----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "clang/Tooling/Syntax/Tokens.h"
  10 #include "clang/AST/ASTConsumer.h"
  11 #include "clang/AST/Expr.h"
  12 #include "clang/Basic/Diagnostic.h"
  13 #include "clang/Basic/DiagnosticIDs.h"
  14 #include "clang/Basic/DiagnosticOptions.h"
  15 #include "clang/Basic/FileManager.h"
  16 #include "clang/Basic/FileSystemOptions.h"
  17 #include "clang/Basic/LLVM.h"
  18 #include "clang/Basic/LangOptions.h"
  19 #include "clang/Basic/SourceLocation.h"
  20 #include "clang/Basic/SourceManager.h"
  21 #include "clang/Basic/TokenKinds.def"
  22 #include "clang/Basic/TokenKinds.h"
  23 #include "clang/Frontend/CompilerInstance.h"
  24 #include "clang/Frontend/FrontendAction.h"
  25 #include "clang/Frontend/Utils.h"
  26 #include "clang/Lex/Lexer.h"
  27 #include "clang/Lex/PreprocessorOptions.h"
  28 #include "clang/Lex/Token.h"
  29 #include "clang/Tooling/Tooling.h"
  30 #include "llvm/ADT/ArrayRef.h"
  31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
  32 #include "llvm/ADT/STLExtras.h"
  33 #include "llvm/ADT/StringRef.h"
  34 #include "llvm/Support/FormatVariadic.h"
  35 #include "llvm/Support/MemoryBuffer.h"
  36 #include "llvm/Support/VirtualFileSystem.h"
  37 #include "llvm/Support/raw_os_ostream.h"
  38 #include "llvm/Support/raw_ostream.h"
  39 #include "llvm/Testing/Annotations/Annotations.h"
  40 #include "llvm/Testing/Support/SupportHelpers.h"
  41 #include <cassert>
  42 #include <cstdlib>
  43 #include <gmock/gmock.h>
  44 #include <gtest/gtest.h>
  45 #include <memory>
  46 #include <optional>
  47 #include <ostream>
  48 #include <string>
  49
  50 using namespace clang;
  51 using namespace clang::syntax;
  52
  53 using llvm::ValueIs;
  54 using ::testing::_;
  55 using ::testing::AllOf;
  56 using ::testing::Contains;
  57 using ::testing::ElementsAre;
  58 using ::testing::Field;
  59 using ::testing::IsEmpty;
  60 using ::testing::Matcher;
  61 using ::testing::Not;
  62 using ::testing::Pointee;
  63 using ::testing::StartsWith;
  64
  65 namespace {
  66 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
  67 // argument.
  68 MATCHER_P(SameRange, A, "") {
  69   return A.begin() == arg.begin() && A.end() == arg.end();
  70 }
  71
  72 Matcher<TokenBuffer::Expansion>
  73 IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
  74             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
  75   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
  76                Field(&TokenBuffer::Expansion::Expanded, Expanded));
  77 }
  78 // Matchers for syntax::Token.
  79 MATCHER_P(Kind, K, "") { return arg.kind() == K; }
  80 MATCHER_P2(HasText, Text, SourceMgr, "") {
  81   return arg.text(*SourceMgr) == Text;
  82 }
  83 /// Checks the start and end location of a token are equal to SourceRng.
  84 MATCHER_P(RangeIs, SourceRng, "") {
  85   return arg.location() == SourceRng.first &&
  86          arg.endLocation() == SourceRng.second;
  87 }
  88
  89 class TokenCollectorTest : public ::testing::Test {
  90 public:
  91   /// Run the clang frontend, collect the preprocessed tokens from the frontend
  92   /// invocation and store them in this->Buffer.
  93   /// This also clears SourceManager before running the compiler.
  94   void recordTokens(llvm::StringRef Code) {
  95     class RecordTokens : public ASTFrontendAction {
  96     public:
  97       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
  98
  99       bool BeginSourceFileAction(CompilerInstance &CI) override {
 100         assert(!Collector && "expected only a single call to BeginSourceFile");
 101         Collector.emplace(CI.getPreprocessor());
 102         return true;
 103       }
 104       void EndSourceFileAction() override {
 105         assert(Collector && "BeginSourceFileAction was never called");
 106         Result = std::move(*Collector).consume();
 107         Result.indexExpandedTokens();
 108       }
 109
 110       std::unique_ptr<ASTConsumer>
 111       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
 112         return std::make_unique<ASTConsumer>();
 113       }
 114
 115     private:
 116       TokenBuffer &Result;
 117       std::optional<TokenCollector> Collector;
 118     };
 119
 120     constexpr const char *FileName = "./input.cpp";
 121     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
 122     // Prepare to run a compiler.
 123     if (!Diags->getClient())
 124       Diags->setClient(new IgnoringDiagConsumer);
 125     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
 126                                       FileName};
 127     CreateInvocationOptions CIOpts;
 128     CIOpts.Diags = Diags;
 129     CIOpts.VFS = FS;
 130     auto CI = createInvocation(Args, std::move(CIOpts));
 131     assert(CI);
 132     CI->getFrontendOpts().DisableFree = false;
 133     CI->getPreprocessorOpts().addRemappedFile(
 134         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
 135     CompilerInstance Compiler;
 136     Compiler.setInvocation(std::move(CI));
 137     Compiler.setDiagnostics(Diags.get());
 138     Compiler.setFileManager(FileMgr.get());
 139     Compiler.setSourceManager(SourceMgr.get());
 140
 141     this->Buffer = TokenBuffer(*SourceMgr);
 142     RecordTokens Recorder(this->Buffer);
 143     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
 144         << "failed to run the frontend";
 145   }
 146
 147   /// Record the tokens and return a test dump of the resulting buffer.
 148   std::string collectAndDump(llvm::StringRef Code) {
 149     recordTokens(Code);
 150     return Buffer.dumpForTests();
 151   }
 152
 153   // Adds a file to the test VFS.
 154   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
 155     if (!FS->addFile(Path, time_t(),
 156                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
 157       ADD_FAILURE() << "could not add a file to VFS: " << Path;
 158     }
 159   }
 160
 161   /// Add a new file, run syntax::tokenize() on the range if any, run it on the
 162   /// whole file otherwise and return the results.
 163   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
 164     llvm::Annotations Annot(Text);
 165     auto FID = SourceMgr->createFileID(
 166         llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
 167     // FIXME: pass proper LangOptions.
 168     if (Annot.ranges().empty())
 169       return syntax::tokenize(FID, *SourceMgr, LangOptions());
 170     return syntax::tokenize(
 171         syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
 172         *SourceMgr, LangOptions());
 173   }
 174
 175   // Specialized versions of matchers that hide the SourceManager from clients.
 176   Matcher<syntax::Token> HasText(std::string Text) const {
 177     return ::HasText(Text, SourceMgr.get());
 178   }
 179   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
 180     std::pair<SourceLocation, SourceLocation> Ls;
 181     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
 182                    .getLocWithOffset(R.Begin);
 183     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
 184                     .getLocWithOffset(R.End);
 185     return ::RangeIs(Ls);
 186   }
 187
 188   /// Finds a subrange in O(n * m).
 189   template <class T, class U, class Eq>
 190   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
 191                                  llvm::ArrayRef<T> Range, Eq F) {
 192     assert(Subrange.size() >= 1);
 193     if (Range.size() < Subrange.size())
 194       return llvm::ArrayRef(Range.end(), Range.end());
 195     for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
 196          Begin <= Last; ++Begin) {
 197       auto It = Begin;
 198       for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
 199            ++ItSub, ++It) {
 200         if (!F(*ItSub, *It))
 201           goto continue_outer;
 202       }
 203       return llvm::ArrayRef(Begin, It);
 204     continue_outer:;
 205     }
 206     return llvm::ArrayRef(Range.end(), Range.end());
 207   }
 208
 209   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
 210   /// The match should be unique. \p Query is a whitespace-separated list of
 211   /// tokens to search for.
 212   llvm::ArrayRef<syntax::Token>
 213   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
 214     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
 215     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
 216     if (QueryTokens.empty()) {
 217       ADD_FAILURE() << "will not look for an empty list of tokens";
 218       std::abort();
 219     }
 220     // An equality test for search.
 221     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
 222       return Q == T.text(*SourceMgr);
 223     };
 224     // Find a match.
 225     auto Found = findSubrange(llvm::ArrayRef(QueryTokens), Tokens, TextMatches);
 226     if (Found.begin() == Tokens.end()) {
 227       ADD_FAILURE() << "could not find the subrange for " << Query;
 228       std::abort();
 229     }
 230     // Check that the match is unique.
 231     if (findSubrange(llvm::ArrayRef(QueryTokens),
 232                      llvm::ArrayRef(Found.end(), Tokens.end()), TextMatches)
 233             .begin() != Tokens.end()) {
 234       ADD_FAILURE() << "match is not unique for " << Query;
 235       std::abort();
 236     }
 237     return Found;
 238   };
 239
 240   // Specialized versions of findTokenRange for expanded and spelled tokens.
 241   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
 242     return findTokenRange(Query, Buffer.expandedTokens());
 243   }
 244   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
 245                                             FileID File = FileID()) {
 246     if (!File.isValid())
 247       File = SourceMgr->getMainFileID();
 248     return findTokenRange(Query, Buffer.spelledTokens(File));
 249   }
 250
 251   // Data fields.
 252   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
 253       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
 254   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
 255       new llvm::vfs::InMemoryFileSystem;
 256   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
 257       new FileManager(FileSystemOptions(), FS);
 258   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
 259       new SourceManager(*Diags, *FileMgr);
 260   /// Contains last result of calling recordTokens().
 261   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
 262 };
 263
 264 TEST_F(TokenCollectorTest, RawMode) {
 265   EXPECT_THAT(tokenize("int main() {}"),
 266               ElementsAre(Kind(tok::kw_int),
 267                           AllOf(HasText("main"), Kind(tok::identifier)),
 268                           Kind(tok::l_paren), Kind(tok::r_paren),
 269                           Kind(tok::l_brace), Kind(tok::r_brace)));
 270   // Comments are ignored for now.
 271   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
 272               ElementsAre(Kind(tok::kw_int),
 273                           AllOf(HasText("a"), Kind(tok::identifier)),
 274                           Kind(tok::semi)));
 275   EXPECT_THAT(tokenize("int [[main() {]]}"),
 276               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
 277                           Kind(tok::l_paren), Kind(tok::r_paren),
 278                           Kind(tok::l_brace)));
 279   EXPECT_THAT(tokenize("int [[main() {   ]]}"),
 280               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
 281                           Kind(tok::l_paren), Kind(tok::r_paren),
 282                           Kind(tok::l_brace)));
 283   // First token is partially parsed, last token is fully included even though
 284   // only a part of it is contained in the range.
 285   EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
 286               ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
 287                           Kind(tok::l_paren), Kind(tok::r_paren),
 288                           Kind(tok::l_brace), Kind(tok::kw_return)));
 289 }
 290
 291 TEST_F(TokenCollectorTest, Basic) {
 292   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
 293       {"int main() {}",
 294        R"(expanded tokens:
 295   int main ( ) { }
 296 file './input.cpp'
 297   spelled tokens:
 298     int main ( ) { }
 299   no mappings.
 300 )"},
 301       // All kinds of whitespace are ignored.
 302       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
 303        R"(expanded tokens:
 304   int main ( ) { }
 305 file './input.cpp'
 306   spelled tokens:
 307     int main ( ) { }
 308   no mappings.
 309 )"},
 310       // Annotation tokens are ignored.
 311       {R"cpp(
 312         #pragma GCC visibility push (public)
 313         #pragma GCC visibility pop
 314       )cpp",
 315        R"(expanded tokens:
 316   <empty>
 317 file './input.cpp'
 318   spelled tokens:
 319     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
 320   mappings:
 321     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
 322 )"},
 323       // Empty files should not crash.
 324       {R"cpp()cpp", R"(expanded tokens:
 325   <empty>
 326 file './input.cpp'
 327   spelled tokens:
 328     <empty>
 329   no mappings.
 330 )"},
 331       // Should not crash on errors inside '#define' directives. Error is that
 332       // stringification (#B) does not refer to a macro parameter.
 333       {
 334           R"cpp(
 335 a
 336 #define MACRO() A #B
 337 )cpp",
 338           R"(expanded tokens:
 339   a
 340 file './input.cpp'
 341   spelled tokens:
 342     a # define MACRO ( ) A # B
 343   mappings:
 344     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
 345 )"}};
 346   for (auto &Test : TestCases)
 347     EXPECT_EQ(collectAndDump(Test.first), Test.second)
 348         << collectAndDump(Test.first);
 349 }
 350
 351 TEST_F(TokenCollectorTest, Locations) {
 352   // Check locations of the tokens.
 353   llvm::Annotations Code(R"cpp(
 354     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
 355   )cpp");
 356   recordTokens(Code.code());
 357   // Check expanded tokens.
 358   EXPECT_THAT(
 359       Buffer.expandedTokens(),
 360       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
 361                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
 362                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
 363                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
 364                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
 365                   Kind(tok::eof)));
 366   // Check spelled tokens.
 367   EXPECT_THAT(
 368       Buffer.spelledTokens(SourceMgr->getMainFileID()),
 369       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
 370                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
 371                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
 372                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
 373                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
 374
 375   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
 376   for (auto &R : Code.ranges()) {
 377     EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)),
 378                 Pointee(RangeIs(R)));
 379   }
 380 }
 381
 382 TEST_F(TokenCollectorTest, MacroDirectives) {
 383   // Macro directives are not stored anywhere at the moment.
 384   std::string Code = R"cpp(
 385     #define FOO a
 386     #include "unresolved_file.h"
 387     #undef FOO
 388     #ifdef X
 389     #else
 390     #endif
 391     #ifndef Y
 392     #endif
 393     #if 1
 394     #elif 2
 395     #else
 396     #endif
 397     #pragma once
 398     #pragma something lalala
 399
 400     int a;
 401   )cpp";
 402   std::string Expected =
 403       "expanded tokens:\n"
 404       "  int a ;\n"
 405       "file './input.cpp'\n"
 406       "  spelled tokens:\n"
 407       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
 408       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
 409       "# endif # pragma once # pragma something lalala int a ;\n"
 410       "  mappings:\n"
 411       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
 412   EXPECT_EQ(collectAndDump(Code), Expected);
 413 }
 414
 415 TEST_F(TokenCollectorTest, MacroReplacements) {
 416   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
 417       // A simple object-like macro.
 418       {R"cpp(
 419     #define INT int const
 420     INT a;
 421   )cpp",
 422        R"(expanded tokens:
 423   int const a ;
 424 file './input.cpp'
 425   spelled tokens:
 426     # define INT int const INT a ;
 427   mappings:
 428     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
 429     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
 430 )"},
 431       // A simple function-like macro.
 432       {R"cpp(
 433     #define INT(a) const int
 434     INT(10+10) a;
 435   )cpp",
 436        R"(expanded tokens:
 437   const int a ;
 438 file './input.cpp'
 439   spelled tokens:
 440     # define INT ( a ) const int INT ( 10 + 10 ) a ;
 441   mappings:
 442     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
 443     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
 444 )"},
 445       // Recursive macro replacements.
 446       {R"cpp(
 447     #define ID(X) X
 448     #define INT int const
 449     ID(ID(INT)) a;
 450   )cpp",
 451        R"(expanded tokens:
 452   int const a ;
 453 file './input.cpp'
 454   spelled tokens:
 455     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
 456   mappings:
 457     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
 458     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
 459 )"},
 460       // A little more complicated recursive macro replacements.
 461       {R"cpp(
 462     #define ADD(X, Y) X+Y
 463     #define MULT(X, Y) X*Y
 464
 465     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
 466   )cpp",
 467        "expanded tokens:\n"
 468        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
 469        "file './input.cpp'\n"
 470        "  spelled tokens:\n"
 471        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
 472        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
 473        "  mappings:\n"
 474        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
 475        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
 476       // Empty macro replacement.
 477       // FIXME: the #define directives should not be glued together.
 478       {R"cpp(
 479     #define EMPTY
 480     #define EMPTY_FUNC(X)
 481     EMPTY
 482     EMPTY_FUNC(1+2+3)
 483     )cpp",
 484        R"(expanded tokens:
 485   <empty>
 486 file './input.cpp'
 487   spelled tokens:
 488     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
 489   mappings:
 490     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
 491     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
 492     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
 493 )"},
 494       // File ends with a macro replacement.
 495       {R"cpp(
 496     #define FOO 10+10;
 497     int a = FOO
 498     )cpp",
 499        R"(expanded tokens:
 500   int a = 10 + 10 ;
 501 file './input.cpp'
 502   spelled tokens:
 503     # define FOO 10 + 10 ; int a = FOO
 504   mappings:
 505     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
 506     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
 507 )"},
 508       {R"cpp(
 509          #define NUM 42
 510          #define ID(a) a
 511          #define M 1 + ID
 512          M(NUM)
 513        )cpp",
 514        R"(expanded tokens:
 515   1 + 42
 516 file './input.cpp'
 517   spelled tokens:
 518     # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
 519   mappings:
 520     ['#'_0, 'M'_17) => ['1'_0, '1'_0)
 521     ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
 522 )"},
 523   };
 524
 525   for (auto &Test : TestCases) {
 526     std::string Dump = collectAndDump(Test.first);
 527     EXPECT_EQ(Test.second, Dump) << Dump;
 528   }
 529 }
 530
 531 TEST_F(TokenCollectorTest, SpecialTokens) {
 532   // Tokens coming from concatenations.
 533   recordTokens(R"cpp(
 534     #define CONCAT(a, b) a ## b
 535     int a = CONCAT(1, 2);
 536   )cpp");
 537   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
 538               Contains(HasText("12")));
 539   // Multi-line tokens with slashes at the end.
 540   recordTokens("i\\\nn\\\nt");
 541   EXPECT_THAT(Buffer.expandedTokens(),
 542               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
 543                           Kind(tok::eof)));
 544   // FIXME: test tokens with digraphs and UCN identifiers.
 545 }
 546
 547 TEST_F(TokenCollectorTest, LateBoundTokens) {
 548   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
 549   // but we choose to record them as a single token (for now).
 550   llvm::Annotations Code(R"cpp(
 551     template <class T>
 552     struct foo { int a; };
 553     int bar = foo<foo<int$br[[>>]]().a;
 554     int baz = 10 $op[[>>]] 2;
 555   )cpp");
 556   recordTokens(Code.code());
 557   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
 558               AllOf(Contains(AllOf(Kind(tok::greatergreater),
 559                                    RangeIs(Code.range("br")))),
 560                     Contains(AllOf(Kind(tok::greatergreater),
 561                                    RangeIs(Code.range("op"))))));
 562 }
 563
 564 TEST_F(TokenCollectorTest, DelayedParsing) {
 565   llvm::StringLiteral Code = R"cpp(
 566     struct Foo {
 567       int method() {
 568         // Parser will visit method bodies and initializers multiple times, but
 569         // TokenBuffer should only record the first walk over the tokens;
 570         return 100;
 571       }
 572       int a = 10;
 573
 574       struct Subclass {
 575         void foo() {
 576           Foo().method();
 577         }
 578       };
 579     };
 580   )cpp";
 581   std::string ExpectedTokens =
 582       "expanded tokens:\n"
 583       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
 584       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
 585   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
 586 }
 587
 588 TEST_F(TokenCollectorTest, MultiFile) {
 589   addFile("./foo.h", R"cpp(
 590     #define ADD(X, Y) X+Y
 591     int a = 100;
 592     #include "bar.h"
 593   )cpp");
 594   addFile("./bar.h", R"cpp(
 595     int b = ADD(1, 2);
 596     #define MULT(X, Y) X*Y
 597   )cpp");
 598   llvm::StringLiteral Code = R"cpp(
 599     #include "foo.h"
 600     int c = ADD(1, MULT(2,3));
 601   )cpp";
 602
 603   std::string Expected = R"(expanded tokens:
 604   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
 605 file './input.cpp'
 606   spelled tokens:
 607     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
 608   mappings:
 609     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
 610     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
 611 file './foo.h'
 612   spelled tokens:
 613     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
 614   mappings:
 615     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
 616     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
 617 file './bar.h'
 618   spelled tokens:
 619     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
 620   mappings:
 621     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
 622     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
 623 )";
 624
 625   EXPECT_EQ(Expected, collectAndDump(Code))
 626       << "input: " << Code << "\nresults: " << collectAndDump(Code);
 627 }
 628
 629 class TokenBufferTest : public TokenCollectorTest {};
 630
 631 TEST_F(TokenBufferTest, SpelledByExpanded) {
 632   recordTokens(R"cpp(
 633     a1 a2 a3 b1 b2
 634   )cpp");
 635
 636   // Expanded and spelled tokens are stored separately.
 637   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
 638   // Searching for subranges of expanded tokens should give the corresponding
 639   // spelled ones.
 640   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
 641               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
 642   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
 643               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
 644   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
 645               ValueIs(SameRange(findSpelled("b1 b2"))));
 646
 647   // Test search on simple macro expansions.
 648   recordTokens(R"cpp(
 649     #define A a1 a2 a3
 650     #define B b1 b2
 651
 652     A split B
 653   )cpp");
 654   // Ranges going across expansion boundaries.
 655   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
 656               ValueIs(SameRange(findSpelled("A split B"))));
 657   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
 658               ValueIs(SameRange(findSpelled("A split").drop_back())));
 659   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
 660               ValueIs(SameRange(findSpelled("split B").drop_front())));
 661   // Ranges not fully covering macro invocations should fail.
 662   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
 663   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), std::nullopt);
 664   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
 665             std::nullopt);
 666
 667   // Recursive macro invocations.
 668   recordTokens(R"cpp(
 669     #define ID(x) x
 670     #define B b1 b2
 671
 672     ID(ID(ID(a1) a2 a3)) split ID(B)
 673   )cpp");
 674
 675   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
 676               ValueIs(SameRange(findSpelled("( B").drop_front())));
 677   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
 678               ValueIs(SameRange(findSpelled(
 679                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
 680   // Mixed ranges with expanded and spelled tokens.
 681   EXPECT_THAT(
 682       Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
 683       ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
 684   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
 685               ValueIs(SameRange(findSpelled("split ID ( B )"))));
 686   // Macro arguments
 687   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
 688               ValueIs(SameRange(findSpelled("a1"))));
 689   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
 690               ValueIs(SameRange(findSpelled("a2"))));
 691   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
 692               ValueIs(SameRange(findSpelled("a3"))));
 693   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
 694               ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
 695   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
 696               ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
 697
 698   // Empty macro expansions.
 699   recordTokens(R"cpp(
 700     #define EMPTY
 701     #define ID(X) X
 702
 703     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
 704     EMPTY EMPTY ID(4 5 6) split2
 705     ID(7 8 9) EMPTY EMPTY
 706   )cpp");
 707   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
 708               ValueIs(SameRange(findSpelled("1 2 3"))));
 709   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
 710               ValueIs(SameRange(findSpelled("4 5 6"))));
 711   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
 712               ValueIs(SameRange(findSpelled("7 8 9"))));
 713
 714   // Empty mappings coming from various directives.
 715   recordTokens(R"cpp(
 716     #define ID(X) X
 717     ID(1)
 718     #pragma lalala
 719     not_mapped
 720   )cpp");
 721   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
 722               ValueIs(SameRange(findSpelled("not_mapped"))));
 723
 724   // Multiple macro arguments
 725   recordTokens(R"cpp(
 726     #define ID(X) X
 727     #define ID2(X, Y) X Y
 728
 729     ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
 730   )cpp");
 731   // Should fail, spans multiple arguments.
 732   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
 733   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
 734               ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
 735   EXPECT_THAT(
 736       Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
 737       ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
 738   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
 739               ValueIs(SameRange(findSpelled("a5 a6"))));
 740   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
 741               ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
 742   // Should fail, spans multiple invocations.
 743   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")),
 744             std::nullopt);
 745
 746   // https://github.com/clangd/clangd/issues/1289
 747   recordTokens(R"cpp(
 748     #define FOO(X) foo(X)
 749     #define INDIRECT FOO(y)
 750     INDIRECT // expands to foo(y)
 751   )cpp");
 752   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), std::nullopt);
 753
 754   recordTokens(R"cpp(
 755     #define FOO(X) a X b
 756     FOO(y)
 757   )cpp");
 758   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y")),
 759               ValueIs(SameRange(findSpelled("y"))));
 760
 761   recordTokens(R"cpp(
 762     #define ID(X) X
 763     #define BAR ID(1)
 764     BAR
 765   )cpp");
 766   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1")),
 767               ValueIs(SameRange(findSpelled(") BAR").drop_front())));
 768
 769   // Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
 770   recordTokens(R"cpp(
 771     #define ID(X) X
 772     ID(prev good)
 773     ID(prev ID(good2))
 774     #define LARGE ID(prev ID(bad))
 775     LARGE
 776   )cpp");
 777   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
 778               ValueIs(SameRange(findSpelled("good"))));
 779   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
 780               ValueIs(SameRange(findSpelled("good2"))));
 781   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
 782
 783   recordTokens(R"cpp(
 784     #define PREV prev
 785     #define ID(X) X
 786     PREV ID(good)
 787     #define LARGE PREV ID(bad)
 788     LARGE
 789   )cpp");
 790   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
 791               ValueIs(SameRange(findSpelled("good"))));
 792   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
 793
 794   recordTokens(R"cpp(
 795     #define ID(X) X
 796     #define ID2(X, Y) X Y
 797     ID2(prev, good)
 798     ID2(prev, ID(good2))
 799     #define LARGE ID2(prev, bad)
 800     LARGE
 801   )cpp");
 802   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
 803               ValueIs(SameRange(findSpelled("good"))));
 804   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
 805               ValueIs(SameRange(findSpelled("good2"))));
 806   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
 807
 808   // Prev from macro body.
 809   recordTokens(R"cpp(
 810     #define ID(X) X
 811     #define ID2(X, Y) X prev ID(Y)
 812     ID2(not_prev, good)
 813   )cpp");
 814   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
 815               ValueIs(SameRange(findSpelled("good"))));
 816   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt);
 817 }
 818
 819 TEST_F(TokenBufferTest, ExpandedTokensForRange) {
 820   recordTokens(R"cpp(
 821     #define SIGN(X) X##_washere
 822     A SIGN(B) C SIGN(D) E SIGN(F) G
 823   )cpp");
 824
 825   SourceRange R(findExpanded("C").front().location(),
 826                 findExpanded("F_washere").front().location());
 827   // Expanded and spelled tokens are stored separately.
 828   EXPECT_THAT(Buffer.expandedTokens(R),
 829               SameRange(findExpanded("C D_washere E F_washere")));
 830   EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
 831 }
 832
 833 TEST_F(TokenBufferTest, ExpansionsOverlapping) {
 834   // Object-like macro expansions.
 835   recordTokens(R"cpp(
 836     #define FOO 3+4
 837     int a = FOO 1;
 838     int b = FOO 2;
 839   )cpp");
 840
 841   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1");
 842   EXPECT_THAT(
 843       Buffer.expansionStartingAt(Foo1.data()),
 844       ValueIs(IsExpansion(SameRange(Foo1.drop_back()),
 845                           SameRange(findExpanded("3 + 4 1").drop_back()))));
 846   EXPECT_THAT(
 847       Buffer.expansionsOverlapping(Foo1),
 848       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()),
 849                               SameRange(findExpanded("3 + 4 1").drop_back()))));
 850
 851   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2");
 852   EXPECT_THAT(
 853       Buffer.expansionStartingAt(Foo2.data()),
 854       ValueIs(IsExpansion(SameRange(Foo2.drop_back()),
 855                           SameRange(findExpanded("3 + 4 2").drop_back()))));
 856   EXPECT_THAT(
 857       Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())),
 858       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _),
 859                   IsExpansion(SameRange(Foo2.drop_back()), _)));
 860
 861   // Function-like macro expansions.
 862   recordTokens(R"cpp(
 863     #define ID(X) X
 864     int a = ID(1+2+3);
 865     int b = ID(ID(2+3+4));
 866   )cpp");
 867
 868   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
 869   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
 870               ValueIs(IsExpansion(SameRange(ID1),
 871                                   SameRange(findExpanded("1 + 2 + 3")))));
 872   // Only the first spelled token should be found.
 873   for (const auto &T : ID1.drop_front())
 874     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
 875
 876   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
 877   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
 878               ValueIs(IsExpansion(SameRange(ID2),
 879                                   SameRange(findExpanded("2 + 3 + 4")))));
 880   // Only the first spelled token should be found.
 881   for (const auto &T : ID2.drop_front())
 882     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
 883
 884   EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef(
 885                   findSpelled("1 + 2").data(), findSpelled("4").data())),
 886               ElementsAre(IsExpansion(SameRange(ID1), _),
 887                           IsExpansion(SameRange(ID2), _)));
 888
 889   // PP directives.
 890   recordTokens(R"cpp(
 891 #define FOO 1
 892 int a = FOO;
 893 #pragma once
 894 int b = 1;
 895   )cpp");
 896
 897   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
 898   EXPECT_THAT(
 899       Buffer.expansionStartingAt(&DefineFoo.front()),
 900       ValueIs(IsExpansion(SameRange(DefineFoo),
 901                           SameRange(findExpanded("int a").take_front(0)))));
 902   // Only the first spelled token should be found.
 903   for (const auto &T : DefineFoo.drop_front())
 904     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
 905
 906   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
 907   EXPECT_THAT(
 908       Buffer.expansionStartingAt(&PragmaOnce.front()),
 909       ValueIs(IsExpansion(SameRange(PragmaOnce),
 910                           SameRange(findExpanded("int b").take_front(0)))));
 911   // Only the first spelled token should be found.
 912   for (const auto &T : PragmaOnce.drop_front())
 913     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
 914
 915   EXPECT_THAT(
 916       Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")),
 917       ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _),
 918                   IsExpansion(SameRange(PragmaOnce), _)));
 919 }
 920
 921 TEST_F(TokenBufferTest, TokensToFileRange) {
 922   addFile("./foo.h", "token_from_header");
 923   llvm::Annotations Code(R"cpp(
 924     #define FOO token_from_expansion
 925     #include "./foo.h"
 926     $all[[$i[[int]] a = FOO;]]
 927   )cpp");
 928   recordTokens(Code.code());
 929
 930   auto &SM = *SourceMgr;
 931
 932   // Two simple examples.
 933   auto Int = findExpanded("int").front();
 934   auto Semi = findExpanded(";").front();
 935   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
 936                                      Code.range("i").End));
 937   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
 938             FileRange(SM.getMainFileID(), Code.range("all").Begin,
 939                       Code.range("all").End));
 940   // We don't test assertion failures because death tests are slow.
 941 }
 942
 943 TEST_F(TokenBufferTest, MacroExpansions) {
 944   llvm::Annotations Code(R"cpp(
 945     #define FOO B
 946     #define FOO2 BA
 947     #define CALL(X) int X
 948     #define G CALL(FOO2)
 949     int B;
 950     $macro[[FOO]];
 951     $macro[[CALL]](A);
 952     $macro[[G]];
 953   )cpp");
 954   recordTokens(Code.code());
 955   auto &SM = *SourceMgr;
 956   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
 957   std::vector<FileRange> ExpectedMacroRanges;
 958   for (auto Range : Code.ranges("macro"))
 959     ExpectedMacroRanges.push_back(
 960         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
 961   std::vector<FileRange> ActualMacroRanges;
 962   for (auto Expansion : Expansions)
 963     ActualMacroRanges.push_back(Expansion->range(SM));
 964   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
 965 }
 966
 967 TEST_F(TokenBufferTest, Touching) {
 968   llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
 969   recordTokens(Code.code());
 970
 971   auto Touching = [&](int Index) {
 972     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
 973                                                    Code.points()[Index]);
 974     return spelledTokensTouching(Loc, Buffer);
 975   };
 976   auto Identifier = [&](int Index) {
 977     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
 978                                                    Code.points()[Index]);
 979     const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
 980     return Tok ? Tok->text(*SourceMgr) : "";
 981   };
 982
 983   EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
 984   EXPECT_EQ(Identifier(0), "");
 985   EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
 986   EXPECT_EQ(Identifier(1), "");
 987   EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
 988   EXPECT_EQ(Identifier(2), "");
 989
 990   EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
 991   EXPECT_EQ(Identifier(3), "ab");
 992   EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
 993   EXPECT_EQ(Identifier(4), "ab");
 994
 995   EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
 996   EXPECT_EQ(Identifier(5), "ab");
 997
 998   EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
 999   EXPECT_EQ(Identifier(6), "");
1000
1001   EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
1002   EXPECT_EQ(Identifier(7), "");
1003
1004   ASSERT_EQ(Code.points().size(), 8u);
1005 }
1006
1007 TEST_F(TokenBufferTest, ExpandedBySpelled) {
1008   recordTokens(R"cpp(
1009     a1 a2 a3 b1 b2
1010   )cpp");
1011   // Expanded and spelled tokens are stored separately.
1012   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
1013   // Searching for subranges of expanded tokens should give the corresponding
1014   // spelled ones.
1015   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
1016               ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
1017   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
1018               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1019   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
1020               ElementsAre(SameRange(findExpanded("b1 b2"))));
1021
1022   // Test search on simple macro expansions.
1023   recordTokens(R"cpp(
1024     #define A a1 a2 a3
1025     #define B b1 b2
1026
1027     A split B
1028   )cpp");
1029   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
1030               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1031   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
1032               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1033   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
1034               ElementsAre(SameRange(findExpanded("b1 b2"))));
1035
1036   // Ranges not fully covering macro expansions should fail.
1037   recordTokens(R"cpp(
1038     #define ID(x) x
1039
1040     ID(a)
1041   )cpp");
1042   // Spelled don't cover entire mapping (missing ID token) -> empty result
1043   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
1044   // Spelled don't cover entire mapping (missing ) token) -> empty result
1045   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
1046
1047   // Recursive macro invocations.
1048   recordTokens(R"cpp(
1049     #define ID(x) x
1050     #define B b1 b2
1051
1052     ID(ID(ID(a1) a2 a3)) split ID(B)
1053   )cpp");
1054
1055   EXPECT_THAT(
1056       Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
1057       ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1058   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
1059               ElementsAre(SameRange(findExpanded("b1 b2"))));
1060   EXPECT_THAT(Buffer.expandedForSpelled(
1061                   findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
1062               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1063   // FIXME: these should succeed, but we do not support macro arguments yet.
1064   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
1065   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
1066               IsEmpty());
1067
1068   // Empty macro expansions.
1069   recordTokens(R"cpp(
1070     #define EMPTY
1071     #define ID(X) X
1072
1073     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
1074     EMPTY EMPTY ID(4 5 6) split2
1075     ID(7 8 9) EMPTY EMPTY
1076   )cpp");
1077   // Covered by empty expansions on one of both of the sides.
1078   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
1079               ElementsAre(SameRange(findExpanded("1 2 3"))));
1080   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
1081               ElementsAre(SameRange(findExpanded("4 5 6"))));
1082   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
1083               ElementsAre(SameRange(findExpanded("7 8 9"))));
1084   // Including the empty macro expansions on the side.
1085   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
1086               ElementsAre(SameRange(findExpanded("1 2 3"))));
1087   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
1088               ElementsAre(SameRange(findExpanded("1 2 3"))));
1089   EXPECT_THAT(
1090       Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
1091       ElementsAre(SameRange(findExpanded("1 2 3"))));
1092
1093   // Empty mappings coming from various directives.
1094   recordTokens(R"cpp(
1095     #define ID(X) X
1096     ID(1)
1097     #pragma lalala
1098     not_mapped
1099   )cpp");
1100   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
1101               IsEmpty());
1102   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
1103               IsEmpty());
1104
1105   // Empty macro expansion.
1106   recordTokens(R"cpp(
1107     #define EMPTY
1108     EMPTY int a = 100;
1109   )cpp");
1110   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
1111               IsEmpty());
1112 }
1113
1114 TEST_F(TokenCollectorTest, Pragmas) {
1115   // Tokens coming from concatenations.
1116   recordTokens(R"cpp(
1117     void foo() {
1118       #pragma unroll 4
1119       for(int i=0;i<4;++i);
1120     }
1121   )cpp");
1122 }
1123 } // namespace