1 //===- TokensTest.cpp -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Tooling/Syntax/Tokens.h"
10 #include "clang/AST/ASTConsumer.h"
11 #include "clang/AST/Expr.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticIDs.h"
14 #include "clang/Basic/DiagnosticOptions.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemOptions.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.def"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Frontend/CompilerInstance.h"
24 #include "clang/Frontend/FrontendAction.h"
25 #include "clang/Frontend/Utils.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/PreprocessorOptions.h"
28 #include "clang/Lex/Token.h"
29 #include "clang/Tooling/Tooling.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/VirtualFileSystem.h"
37 #include "llvm/Support/raw_os_ostream.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Testing/Annotations/Annotations.h"
40 #include "llvm/Testing/Support/SupportHelpers.h"
43 #include <gmock/gmock.h>
44 #include <gtest/gtest.h>
50 using namespace clang
;
51 using namespace clang::syntax
;
55 using ::testing::AllOf
;
56 using ::testing::Contains
;
57 using ::testing::ElementsAre
;
58 using ::testing::Field
;
59 using ::testing::IsEmpty
;
60 using ::testing::Matcher
;
62 using ::testing::Pointee
;
63 using ::testing::StartsWith
;
66 // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
68 MATCHER_P(SameRange
, A
, "") {
69 return A
.begin() == arg
.begin() && A
.end() == arg
.end();
72 Matcher
<TokenBuffer::Expansion
>
73 IsExpansion(Matcher
<llvm::ArrayRef
<syntax::Token
>> Spelled
,
74 Matcher
<llvm::ArrayRef
<syntax::Token
>> Expanded
) {
75 return AllOf(Field(&TokenBuffer::Expansion::Spelled
, Spelled
),
76 Field(&TokenBuffer::Expansion::Expanded
, Expanded
));
78 // Matchers for syntax::Token.
79 MATCHER_P(Kind
, K
, "") { return arg
.kind() == K
; }
80 MATCHER_P2(HasText
, Text
, SourceMgr
, "") {
81 return arg
.text(*SourceMgr
) == Text
;
83 /// Checks the start and end location of a token are equal to SourceRng.
84 MATCHER_P(RangeIs
, SourceRng
, "") {
85 return arg
.location() == SourceRng
.first
&&
86 arg
.endLocation() == SourceRng
.second
;
89 class TokenCollectorTest
: public ::testing::Test
{
91 /// Run the clang frontend, collect the preprocessed tokens from the frontend
92 /// invocation and store them in this->Buffer.
93 /// This also clears SourceManager before running the compiler.
94 void recordTokens(llvm::StringRef Code
) {
95 class RecordTokens
: public ASTFrontendAction
{
97 explicit RecordTokens(TokenBuffer
&Result
) : Result(Result
) {}
99 bool BeginSourceFileAction(CompilerInstance
&CI
) override
{
100 assert(!Collector
&& "expected only a single call to BeginSourceFile");
101 Collector
.emplace(CI
.getPreprocessor());
104 void EndSourceFileAction() override
{
105 assert(Collector
&& "BeginSourceFileAction was never called");
106 Result
= std::move(*Collector
).consume();
107 Result
.indexExpandedTokens();
110 std::unique_ptr
<ASTConsumer
>
111 CreateASTConsumer(CompilerInstance
&CI
, StringRef InFile
) override
{
112 return std::make_unique
<ASTConsumer
>();
117 std::optional
<TokenCollector
> Collector
;
120 constexpr const char *FileName
= "./input.cpp";
121 FS
->addFile(FileName
, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
122 // Prepare to run a compiler.
123 if (!Diags
->getClient())
124 Diags
->setClient(new IgnoringDiagConsumer
);
125 std::vector
<const char *> Args
= {"tok-test", "-std=c++03", "-fsyntax-only",
127 CreateInvocationOptions CIOpts
;
128 CIOpts
.Diags
= Diags
;
130 auto CI
= createInvocation(Args
, std::move(CIOpts
));
132 CI
->getFrontendOpts().DisableFree
= false;
133 CI
->getPreprocessorOpts().addRemappedFile(
134 FileName
, llvm::MemoryBuffer::getMemBufferCopy(Code
).release());
135 CompilerInstance Compiler
;
136 Compiler
.setInvocation(std::move(CI
));
137 Compiler
.setDiagnostics(Diags
.get());
138 Compiler
.setFileManager(FileMgr
.get());
139 Compiler
.setSourceManager(SourceMgr
.get());
141 this->Buffer
= TokenBuffer(*SourceMgr
);
142 RecordTokens
Recorder(this->Buffer
);
143 ASSERT_TRUE(Compiler
.ExecuteAction(Recorder
))
144 << "failed to run the frontend";
147 /// Record the tokens and return a test dump of the resulting buffer.
148 std::string
collectAndDump(llvm::StringRef Code
) {
150 return Buffer
.dumpForTests();
153 // Adds a file to the test VFS.
154 void addFile(llvm::StringRef Path
, llvm::StringRef Contents
) {
155 if (!FS
->addFile(Path
, time_t(),
156 llvm::MemoryBuffer::getMemBufferCopy(Contents
))) {
157 ADD_FAILURE() << "could not add a file to VFS: " << Path
;
161 /// Add a new file, run syntax::tokenize() on the range if any, run it on the
162 /// whole file otherwise and return the results.
163 std::vector
<syntax::Token
> tokenize(llvm::StringRef Text
) {
164 llvm::Annotations
Annot(Text
);
165 auto FID
= SourceMgr
->createFileID(
166 llvm::MemoryBuffer::getMemBufferCopy(Annot
.code()));
167 // FIXME: pass proper LangOptions.
168 if (Annot
.ranges().empty())
169 return syntax::tokenize(FID
, *SourceMgr
, LangOptions());
170 return syntax::tokenize(
171 syntax::FileRange(FID
, Annot
.range().Begin
, Annot
.range().End
),
172 *SourceMgr
, LangOptions());
175 // Specialized versions of matchers that hide the SourceManager from clients.
176 Matcher
<syntax::Token
> HasText(std::string Text
) const {
177 return ::HasText(Text
, SourceMgr
.get());
179 Matcher
<syntax::Token
> RangeIs(llvm::Annotations::Range R
) const {
180 std::pair
<SourceLocation
, SourceLocation
> Ls
;
181 Ls
.first
= SourceMgr
->getLocForStartOfFile(SourceMgr
->getMainFileID())
182 .getLocWithOffset(R
.Begin
);
183 Ls
.second
= SourceMgr
->getLocForStartOfFile(SourceMgr
->getMainFileID())
184 .getLocWithOffset(R
.End
);
185 return ::RangeIs(Ls
);
188 /// Finds a subrange in O(n * m).
189 template <class T
, class U
, class Eq
>
190 llvm::ArrayRef
<T
> findSubrange(llvm::ArrayRef
<U
> Subrange
,
191 llvm::ArrayRef
<T
> Range
, Eq F
) {
192 assert(Subrange
.size() >= 1);
193 if (Range
.size() < Subrange
.size())
194 return llvm::ArrayRef(Range
.end(), Range
.end());
195 for (auto Begin
= Range
.begin(), Last
= Range
.end() - Subrange
.size();
196 Begin
<= Last
; ++Begin
) {
198 for (auto ItSub
= Subrange
.begin(); ItSub
!= Subrange
.end();
203 return llvm::ArrayRef(Begin
, It
);
206 return llvm::ArrayRef(Range
.end(), Range
.end());
209 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
210 /// The match should be unique. \p Query is a whitespace-separated list of
211 /// tokens to search for.
212 llvm::ArrayRef
<syntax::Token
>
213 findTokenRange(llvm::StringRef Query
, llvm::ArrayRef
<syntax::Token
> Tokens
) {
214 llvm::SmallVector
<llvm::StringRef
, 8> QueryTokens
;
215 Query
.split(QueryTokens
, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
216 if (QueryTokens
.empty()) {
217 ADD_FAILURE() << "will not look for an empty list of tokens";
220 // An equality test for search.
221 auto TextMatches
= [this](llvm::StringRef Q
, const syntax::Token
&T
) {
222 return Q
== T
.text(*SourceMgr
);
225 auto Found
= findSubrange(llvm::ArrayRef(QueryTokens
), Tokens
, TextMatches
);
226 if (Found
.begin() == Tokens
.end()) {
227 ADD_FAILURE() << "could not find the subrange for " << Query
;
230 // Check that the match is unique.
231 if (findSubrange(llvm::ArrayRef(QueryTokens
),
232 llvm::ArrayRef(Found
.end(), Tokens
.end()), TextMatches
)
233 .begin() != Tokens
.end()) {
234 ADD_FAILURE() << "match is not unique for " << Query
;
240 // Specialized versions of findTokenRange for expanded and spelled tokens.
241 llvm::ArrayRef
<syntax::Token
> findExpanded(llvm::StringRef Query
) {
242 return findTokenRange(Query
, Buffer
.expandedTokens());
244 llvm::ArrayRef
<syntax::Token
> findSpelled(llvm::StringRef Query
,
245 FileID File
= FileID()) {
247 File
= SourceMgr
->getMainFileID();
248 return findTokenRange(Query
, Buffer
.spelledTokens(File
));
252 llvm::IntrusiveRefCntPtr
<DiagnosticsEngine
> Diags
=
253 new DiagnosticsEngine(new DiagnosticIDs
, new DiagnosticOptions
);
254 IntrusiveRefCntPtr
<llvm::vfs::InMemoryFileSystem
> FS
=
255 new llvm::vfs::InMemoryFileSystem
;
256 llvm::IntrusiveRefCntPtr
<FileManager
> FileMgr
=
257 new FileManager(FileSystemOptions(), FS
);
258 llvm::IntrusiveRefCntPtr
<SourceManager
> SourceMgr
=
259 new SourceManager(*Diags
, *FileMgr
);
260 /// Contains last result of calling recordTokens().
261 TokenBuffer Buffer
= TokenBuffer(*SourceMgr
);
264 TEST_F(TokenCollectorTest
, RawMode
) {
265 EXPECT_THAT(tokenize("int main() {}"),
266 ElementsAre(Kind(tok::kw_int
),
267 AllOf(HasText("main"), Kind(tok::identifier
)),
268 Kind(tok::l_paren
), Kind(tok::r_paren
),
269 Kind(tok::l_brace
), Kind(tok::r_brace
)));
270 // Comments are ignored for now.
271 EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
272 ElementsAre(Kind(tok::kw_int
),
273 AllOf(HasText("a"), Kind(tok::identifier
)),
275 EXPECT_THAT(tokenize("int [[main() {]]}"),
276 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier
)),
277 Kind(tok::l_paren
), Kind(tok::r_paren
),
278 Kind(tok::l_brace
)));
279 EXPECT_THAT(tokenize("int [[main() { ]]}"),
280 ElementsAre(AllOf(HasText("main"), Kind(tok::identifier
)),
281 Kind(tok::l_paren
), Kind(tok::r_paren
),
282 Kind(tok::l_brace
)));
283 // First token is partially parsed, last token is fully included even though
284 // only a part of it is contained in the range.
285 EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
286 ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier
)),
287 Kind(tok::l_paren
), Kind(tok::r_paren
),
288 Kind(tok::l_brace
), Kind(tok::kw_return
)));
291 TEST_F(TokenCollectorTest
, Basic
) {
292 std::pair
</*Input*/ std::string
, /*Expected*/ std::string
> TestCases
[] = {
301 // All kinds of whitespace are ignored.
302 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
310 // Annotation tokens are ignored.
312 #pragma GCC visibility push (public)
313 #pragma GCC visibility pop
319 # pragma GCC visibility push ( public ) # pragma GCC visibility pop
321 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
323 // Empty files should not crash.
324 {R
"cpp()cpp", R
"(expanded tokens:
331 // Should not crash on errors inside '#define' directives. Error is that
332 // stringification (#B) does not refer to a macro parameter.
342 a # define MACRO ( ) A # B
344 ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
346 for (auto &Test
: TestCases
)
347 EXPECT_EQ(collectAndDump(Test
.first
), Test
.second
)
348 << collectAndDump(Test
.first
);
351 TEST_F(TokenCollectorTest
, Locations
) {
352 // Check locations of the tokens.
353 llvm::Annotations
Code(R
"cpp(
354 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz
"]] $r5[[;]]
356 recordTokens(Code
.code());
357 // Check expanded tokens.
359 Buffer
.expandedTokens(),
360 ElementsAre(AllOf(Kind(tok::kw_int
), RangeIs(Code
.range("r1"))),
361 AllOf(Kind(tok::identifier
), RangeIs(Code
.range("r2"))),
362 AllOf(Kind(tok::equal
), RangeIs(Code
.range("r3"))),
363 AllOf(Kind(tok::string_literal
), RangeIs(Code
.range("r4"))),
364 AllOf(Kind(tok::semi
), RangeIs(Code
.range("r5"))),
366 // Check spelled tokens.
368 Buffer
.spelledTokens(SourceMgr
->getMainFileID()),
369 ElementsAre(AllOf(Kind(tok::kw_int
), RangeIs(Code
.range("r1"))),
370 AllOf(Kind(tok::identifier
), RangeIs(Code
.range("r2"))),
371 AllOf(Kind(tok::equal
), RangeIs(Code
.range("r3"))),
372 AllOf(Kind(tok::string_literal
), RangeIs(Code
.range("r4"))),
373 AllOf(Kind(tok::semi
), RangeIs(Code
.range("r5")))));
375 auto StartLoc
= SourceMgr
->getLocForStartOfFile(SourceMgr
->getMainFileID());
376 for (auto &R
: Code
.ranges()) {
378 Buffer
.spelledTokenContaining(StartLoc
.getLocWithOffset(R
.Begin
)),
379 Pointee(RangeIs(R
)));
383 TEST_F(TokenCollectorTest
, LocationInMiddleOfSpelledToken
) {
384 llvm::Annotations
Code(R
"cpp(
385 int foo = [[baa^aar]];
387 recordTokens(Code
.code());
388 // Check spelled tokens.
389 auto StartLoc
= SourceMgr
->getLocForStartOfFile(SourceMgr
->getMainFileID());
391 Buffer
.spelledTokenContaining(StartLoc
.getLocWithOffset(Code
.point())),
392 Pointee(RangeIs(Code
.range())));
395 TEST_F(TokenCollectorTest
, MacroDirectives
) {
396 // Macro directives are not stored anywhere at the moment.
397 std::string Code
= R
"cpp(
399 #include "unresolved_file
.h
"
411 #pragma something lalala
415 std::string Expected
=
418 "file './input.cpp'\n"
420 " # define FOO a # include \"unresolved_file.h\" # undef FOO "
421 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
422 "# endif # pragma once # pragma something lalala int a ;\n"
424 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
425 EXPECT_EQ(collectAndDump(Code
), Expected
);
428 TEST_F(TokenCollectorTest
, MacroReplacements
) {
429 std::pair
</*Input*/ std::string
, /*Expected*/ std::string
> TestCases
[] = {
430 // A simple object-like macro.
432 #define INT int const
439 # define INT int const INT a ;
441 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
442 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
444 // A simple function-like macro.
446 #define INT(a) const int
453 # define INT ( a ) const int INT ( 10 + 10 ) a ;
455 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
456 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
458 // Recursive macro replacements.
461 #define INT int const
468 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
470 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
471 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
473 // A little more complicated recursive macro replacements.
475 #define ADD(X, Y) X+Y
476 #define MULT(X, Y) X*Y
478 int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
481 " int a = 1 * 2 + 3 * 4 + 5 ;\n"
482 "file './input.cpp'\n"
484 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
485 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
487 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
488 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
489 // Empty macro replacement.
490 // FIXME: the #define directives should not be glued together.
493 #define EMPTY_FUNC(X)
501 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
503 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
504 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
505 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
507 // File ends with a macro replacement.
516 # define FOO 10 + 10 ; int a = FOO
518 ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
519 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
531 # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
533 ['#'_0, 'M'_17) => ['1'_0, '1'_0)
534 ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
538 for (auto &Test
: TestCases
) {
539 std::string Dump
= collectAndDump(Test
.first
);
540 EXPECT_EQ(Test
.second
, Dump
) << Dump
;
544 TEST_F(TokenCollectorTest
, SpecialTokens
) {
545 // Tokens coming from concatenations.
547 #define CONCAT(a, b) a ## b
548 int a = CONCAT(1, 2);
550 EXPECT_THAT(std::vector
<syntax::Token
>(Buffer
.expandedTokens()),
551 Contains(HasText("12")));
552 // Multi-line tokens with slashes at the end.
553 recordTokens("i\\\nn\\\nt");
554 EXPECT_THAT(Buffer
.expandedTokens(),
555 ElementsAre(AllOf(Kind(tok::kw_int
), HasText("i\\\nn\\\nt")),
557 // FIXME: test tokens with digraphs and UCN identifiers.
560 TEST_F(TokenCollectorTest
, LateBoundTokens
) {
561 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
562 // but we choose to record them as a single token (for now).
563 llvm::Annotations
Code(R
"cpp(
565 struct foo { int a; };
566 int bar = foo<foo<int$br[[>>]]().a;
567 int baz = 10 $op[[>>]] 2;
569 recordTokens(Code
.code());
570 EXPECT_THAT(std::vector
<syntax::Token
>(Buffer
.expandedTokens()),
571 AllOf(Contains(AllOf(Kind(tok::greatergreater
),
572 RangeIs(Code
.range("br")))),
573 Contains(AllOf(Kind(tok::greatergreater
),
574 RangeIs(Code
.range("op"))))));
577 TEST_F(TokenCollectorTest
, DelayedParsing
) {
578 llvm::StringLiteral Code
= R
"cpp(
581 // Parser will visit method bodies and initializers multiple times, but
582 // TokenBuffer should only record the first walk over the tokens;
594 std::string ExpectedTokens
=
596 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
597 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
598 EXPECT_THAT(collectAndDump(Code
), StartsWith(ExpectedTokens
));
601 TEST_F(TokenCollectorTest
, MultiFile
) {
602 addFile("./foo.h", R
"cpp(
603 #define ADD(X, Y) X+Y
607 addFile("./bar.h", R
"cpp(
609 #define MULT(X, Y) X*Y
611 llvm::StringLiteral Code
= R
"cpp(
613 int c = ADD(1, MULT(2,3));
616 std::string Expected
= R
"(expanded tokens:
617 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
620 # include "foo
.h
" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
622 ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
623 ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
626 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar
.h
"
628 ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
629 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
632 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
634 ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
635 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
638 EXPECT_EQ(Expected
, collectAndDump(Code
))
639 << "input: " << Code
<< "\nresults: " << collectAndDump(Code
);
642 class TokenBufferTest
: public TokenCollectorTest
{};
644 TEST_F(TokenBufferTest
, SpelledByExpanded
) {
649 // Expanded and spelled tokens are stored separately.
650 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
651 // Searching for subranges of expanded tokens should give the corresponding
653 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
654 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
655 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1 a2 a3")),
656 ValueIs(SameRange(findSpelled("a1 a2 a3"))));
657 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("b1 b2")),
658 ValueIs(SameRange(findSpelled("b1 b2"))));
660 // Test search on simple macro expansions.
667 // Ranges going across expansion boundaries.
668 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
669 ValueIs(SameRange(findSpelled("A split B"))));
670 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1 a2 a3")),
671 ValueIs(SameRange(findSpelled("A split").drop_back())));
672 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("b1 b2")),
673 ValueIs(SameRange(findSpelled("split B").drop_front())));
674 // Ranges not fully covering macro invocations should fail.
675 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("a1 a2")), std::nullopt
);
676 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("b2")), std::nullopt
);
677 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
680 // Recursive macro invocations.
685 ID(ID(ID(a1) a2 a3)) split ID(B)
688 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("b1 b2")),
689 ValueIs(SameRange(findSpelled("( B").drop_front())));
690 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
691 ValueIs(SameRange(findSpelled(
692 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
693 // Mixed ranges with expanded and spelled tokens.
695 Buffer
.spelledForExpanded(findExpanded("a1 a2 a3 split")),
696 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
697 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("split b1 b2")),
698 ValueIs(SameRange(findSpelled("split ID ( B )"))));
700 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1")),
701 ValueIs(SameRange(findSpelled("a1"))));
702 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a2")),
703 ValueIs(SameRange(findSpelled("a2"))));
704 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a3")),
705 ValueIs(SameRange(findSpelled("a3"))));
706 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1 a2")),
707 ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
708 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a1 a2 a3")),
709 ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
711 // Empty macro expansions.
716 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
717 EMPTY EMPTY ID(4 5 6) split2
718 ID(7 8 9) EMPTY EMPTY
720 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("1 2 3")),
721 ValueIs(SameRange(findSpelled("1 2 3"))));
722 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("4 5 6")),
723 ValueIs(SameRange(findSpelled("4 5 6"))));
724 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("7 8 9")),
725 ValueIs(SameRange(findSpelled("7 8 9"))));
727 // Empty mappings coming from various directives.
734 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("not_mapped")),
735 ValueIs(SameRange(findSpelled("not_mapped"))));
737 // Multiple macro arguments
740 #define ID2(X, Y) X Y
742 ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
744 // Should fail, spans multiple arguments.
745 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("a1 a2")), std::nullopt
);
746 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a2 a3")),
747 ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
749 Buffer
.spelledForExpanded(findExpanded("a1 a2 a3")),
750 ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
751 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a5 a6")),
752 ValueIs(SameRange(findSpelled("a5 a6"))));
753 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
754 ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
755 // Should fail, spans multiple invocations.
756 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("a1 a2 a3 a4")),
759 // https://github.com/clangd/clangd/issues/1289
761 #define FOO(X) foo(X)
762 #define INDIRECT FOO(y)
763 INDIRECT // expands to foo(y)
765 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("y")), std::nullopt
);
771 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("y")),
772 ValueIs(SameRange(findSpelled("y"))));
779 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("1")),
780 ValueIs(SameRange(findSpelled(") BAR").drop_front())));
782 // Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
787 #define LARGE ID(prev ID(bad))
790 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("good")),
791 ValueIs(SameRange(findSpelled("good"))));
792 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("good2")),
793 ValueIs(SameRange(findSpelled("good2"))));
794 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("bad")), std::nullopt
);
800 #define LARGE PREV ID(bad)
803 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("good")),
804 ValueIs(SameRange(findSpelled("good"))));
805 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("bad")), std::nullopt
);
809 #define ID2(X, Y) X Y
812 #define LARGE ID2(prev, bad)
815 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("good")),
816 ValueIs(SameRange(findSpelled("good"))));
817 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("good2")),
818 ValueIs(SameRange(findSpelled("good2"))));
819 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("bad")), std::nullopt
);
821 // Prev from macro body.
824 #define ID2(X, Y) X prev ID(Y)
827 EXPECT_THAT(Buffer
.spelledForExpanded(findExpanded("good")),
828 ValueIs(SameRange(findSpelled("good"))));
829 EXPECT_EQ(Buffer
.spelledForExpanded(findExpanded("prev good")), std::nullopt
);
832 TEST_F(TokenBufferTest
, NoCrashForEofToken
) {
836 ASSERT_TRUE(!Buffer
.expandedTokens().empty());
837 ASSERT_EQ(Buffer
.expandedTokens().back().kind(), tok::eof
);
838 // Expanded range including `eof` is handled gracefully (`eof` is ignored).
840 Buffer
.spelledForExpanded(Buffer
.expandedTokens()),
841 ValueIs(SameRange(Buffer
.spelledTokens(SourceMgr
->getMainFileID()))));
844 TEST_F(TokenBufferTest
, ExpandedTokensForRange
) {
846 #define SIGN(X) X##_washere
847 A SIGN(B) C SIGN(D) E SIGN(F) G
850 SourceRange
R(findExpanded("C").front().location(),
851 findExpanded("F_washere").front().location());
852 // Expanded and spelled tokens are stored separately.
853 EXPECT_THAT(Buffer
.expandedTokens(R
),
854 SameRange(findExpanded("C D_washere E F_washere")));
855 EXPECT_THAT(Buffer
.expandedTokens(SourceRange()), testing::IsEmpty());
858 TEST_F(TokenBufferTest
, ExpansionsOverlapping
) {
859 // Object-like macro expansions.
866 llvm::ArrayRef
<syntax::Token
> Foo1
= findSpelled("FOO 1");
868 Buffer
.expansionStartingAt(Foo1
.data()),
869 ValueIs(IsExpansion(SameRange(Foo1
.drop_back()),
870 SameRange(findExpanded("3 + 4 1").drop_back()))));
872 Buffer
.expansionsOverlapping(Foo1
),
873 ElementsAre(IsExpansion(SameRange(Foo1
.drop_back()),
874 SameRange(findExpanded("3 + 4 1").drop_back()))));
876 llvm::ArrayRef
<syntax::Token
> Foo2
= findSpelled("FOO 2");
878 Buffer
.expansionStartingAt(Foo2
.data()),
879 ValueIs(IsExpansion(SameRange(Foo2
.drop_back()),
880 SameRange(findExpanded("3 + 4 2").drop_back()))));
882 Buffer
.expansionsOverlapping(llvm::ArrayRef(Foo1
.begin(), Foo2
.end())),
883 ElementsAre(IsExpansion(SameRange(Foo1
.drop_back()), _
),
884 IsExpansion(SameRange(Foo2
.drop_back()), _
)));
886 // Function-like macro expansions.
890 int b = ID(ID(2+3+4));
893 llvm::ArrayRef
<syntax::Token
> ID1
= findSpelled("ID ( 1 + 2 + 3 )");
894 EXPECT_THAT(Buffer
.expansionStartingAt(&ID1
.front()),
895 ValueIs(IsExpansion(SameRange(ID1
),
896 SameRange(findExpanded("1 + 2 + 3")))));
897 // Only the first spelled token should be found.
898 for (const auto &T
: ID1
.drop_front())
899 EXPECT_EQ(Buffer
.expansionStartingAt(&T
), std::nullopt
);
901 llvm::ArrayRef
<syntax::Token
> ID2
= findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
902 EXPECT_THAT(Buffer
.expansionStartingAt(&ID2
.front()),
903 ValueIs(IsExpansion(SameRange(ID2
),
904 SameRange(findExpanded("2 + 3 + 4")))));
905 // Only the first spelled token should be found.
906 for (const auto &T
: ID2
.drop_front())
907 EXPECT_EQ(Buffer
.expansionStartingAt(&T
), std::nullopt
);
909 EXPECT_THAT(Buffer
.expansionsOverlapping(llvm::ArrayRef(
910 findSpelled("1 + 2").data(), findSpelled("4").data())),
911 ElementsAre(IsExpansion(SameRange(ID1
), _
),
912 IsExpansion(SameRange(ID2
), _
)));
922 llvm::ArrayRef
<syntax::Token
> DefineFoo
= findSpelled("# define FOO 1");
924 Buffer
.expansionStartingAt(&DefineFoo
.front()),
925 ValueIs(IsExpansion(SameRange(DefineFoo
),
926 SameRange(findExpanded("int a").take_front(0)))));
927 // Only the first spelled token should be found.
928 for (const auto &T
: DefineFoo
.drop_front())
929 EXPECT_EQ(Buffer
.expansionStartingAt(&T
), std::nullopt
);
931 llvm::ArrayRef
<syntax::Token
> PragmaOnce
= findSpelled("# pragma once");
933 Buffer
.expansionStartingAt(&PragmaOnce
.front()),
934 ValueIs(IsExpansion(SameRange(PragmaOnce
),
935 SameRange(findExpanded("int b").take_front(0)))));
936 // Only the first spelled token should be found.
937 for (const auto &T
: PragmaOnce
.drop_front())
938 EXPECT_EQ(Buffer
.expansionStartingAt(&T
), std::nullopt
);
941 Buffer
.expansionsOverlapping(findSpelled("FOO ; # pragma")),
942 ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _
),
943 IsExpansion(SameRange(PragmaOnce
), _
)));
946 TEST_F(TokenBufferTest
, TokensToFileRange
) {
947 addFile("./foo.h", "token_from_header");
948 llvm::Annotations
Code(R
"cpp(
949 #define FOO token_from_expansion
951 $all[[$i[[int]] a = FOO;]]
953 recordTokens(Code
.code());
955 auto &SM
= *SourceMgr
;
957 // Two simple examples.
958 auto Int
= findExpanded("int").front();
959 auto Semi
= findExpanded(";").front();
960 EXPECT_EQ(Int
.range(SM
), FileRange(SM
.getMainFileID(), Code
.range("i").Begin
,
961 Code
.range("i").End
));
962 EXPECT_EQ(syntax::Token::range(SM
, Int
, Semi
),
963 FileRange(SM
.getMainFileID(), Code
.range("all").Begin
,
964 Code
.range("all").End
));
965 // We don't test assertion failures because death tests are slow.
968 TEST_F(TokenBufferTest
, MacroExpansions
) {
969 llvm::Annotations
Code(R
"cpp(
972 #define CALL(X) int X
979 recordTokens(Code
.code());
980 auto &SM
= *SourceMgr
;
981 auto Expansions
= Buffer
.macroExpansions(SM
.getMainFileID());
982 std::vector
<FileRange
> ExpectedMacroRanges
;
983 for (auto Range
: Code
.ranges("macro"))
984 ExpectedMacroRanges
.push_back(
985 FileRange(SM
.getMainFileID(), Range
.Begin
, Range
.End
));
986 std::vector
<FileRange
> ActualMacroRanges
;
987 for (auto Expansion
: Expansions
)
988 ActualMacroRanges
.push_back(Expansion
->range(SM
));
989 EXPECT_EQ(ExpectedMacroRanges
, ActualMacroRanges
);
992 TEST_F(TokenBufferTest
, Touching
) {
993 llvm::Annotations
Code("^i^nt^ ^a^b^=^1;^");
994 recordTokens(Code
.code());
996 auto Touching
= [&](int Index
) {
997 SourceLocation Loc
= SourceMgr
->getComposedLoc(SourceMgr
->getMainFileID(),
998 Code
.points()[Index
]);
999 return spelledTokensTouching(Loc
, Buffer
);
1001 auto Identifier
= [&](int Index
) {
1002 SourceLocation Loc
= SourceMgr
->getComposedLoc(SourceMgr
->getMainFileID(),
1003 Code
.points()[Index
]);
1004 const syntax::Token
*Tok
= spelledIdentifierTouching(Loc
, Buffer
);
1005 return Tok
? Tok
->text(*SourceMgr
) : "";
1008 EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
1009 EXPECT_EQ(Identifier(0), "");
1010 EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
1011 EXPECT_EQ(Identifier(1), "");
1012 EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
1013 EXPECT_EQ(Identifier(2), "");
1015 EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
1016 EXPECT_EQ(Identifier(3), "ab");
1017 EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
1018 EXPECT_EQ(Identifier(4), "ab");
1020 EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
1021 EXPECT_EQ(Identifier(5), "ab");
1023 EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
1024 EXPECT_EQ(Identifier(6), "");
1026 EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
1027 EXPECT_EQ(Identifier(7), "");
1029 ASSERT_EQ(Code
.points().size(), 8u);
1032 TEST_F(TokenBufferTest
, ExpandedBySpelled
) {
1036 // Expanded and spelled tokens are stored separately.
1037 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
1038 // Searching for subranges of expanded tokens should give the corresponding
1040 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
1041 ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
1042 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("a1 a2 a3")),
1043 ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1044 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("b1 b2")),
1045 ElementsAre(SameRange(findExpanded("b1 b2"))));
1047 // Test search on simple macro expansions.
1054 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("A split B")),
1055 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1056 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("A split").drop_back()),
1057 ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1058 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("split B").drop_front()),
1059 ElementsAre(SameRange(findExpanded("b1 b2"))));
1061 // Ranges not fully covering macro expansions should fail.
1067 // Spelled don't cover entire mapping (missing ID token) -> empty result
1068 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("( a )")), IsEmpty());
1069 // Spelled don't cover entire mapping (missing ) token) -> empty result
1070 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
1072 // Recursive macro invocations.
1077 ID(ID(ID(a1) a2 a3)) split ID(B)
1081 Buffer
.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
1082 ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
1083 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("ID ( B )")),
1084 ElementsAre(SameRange(findExpanded("b1 b2"))));
1085 EXPECT_THAT(Buffer
.expandedForSpelled(
1086 findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
1087 ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
1088 // FIXME: these should succeed, but we do not support macro arguments yet.
1089 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("a1")), IsEmpty());
1090 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
1093 // Empty macro expansions.
1098 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
1099 EMPTY EMPTY ID(4 5 6) split2
1100 ID(7 8 9) EMPTY EMPTY
1102 // Covered by empty expansions on one of both of the sides.
1103 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
1104 ElementsAre(SameRange(findExpanded("1 2 3"))));
1105 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
1106 ElementsAre(SameRange(findExpanded("4 5 6"))));
1107 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
1108 ElementsAre(SameRange(findExpanded("7 8 9"))));
1109 // Including the empty macro expansions on the side.
1110 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
1111 ElementsAre(SameRange(findExpanded("1 2 3"))));
1112 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
1113 ElementsAre(SameRange(findExpanded("1 2 3"))));
1115 Buffer
.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
1116 ElementsAre(SameRange(findExpanded("1 2 3"))));
1118 // Empty mappings coming from various directives.
1125 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("# define ID ( X ) X")),
1127 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("# pragma lalala")),
1130 // Empty macro expansion.
1135 EXPECT_THAT(Buffer
.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
1139 TEST_F(TokenCollectorTest
, Pragmas
) {
1140 // Tokens coming from concatenations.
1144 for(int i=0;i<4;++i);