1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/LiteralSupport.h"
22 #include "clang/Lex/MacroArgs.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/ModuleLoader.h"
25 #include "clang/Lex/Preprocessor.h"
26 #include "clang/Lex/PreprocessorOptions.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Testing/Annotations/Annotations.h"
30 #include "gmock/gmock.h"
31 #include "gtest/gtest.h"
37 using namespace clang
;
38 using testing::ElementsAre
;
41 class LexerTest
: public ::testing::Test
{
44 : FileMgr(FileMgrOpts
),
45 DiagID(new DiagnosticIDs()),
46 Diags(DiagID
, new DiagnosticOptions
, new IgnoringDiagConsumer()),
47 SourceMgr(Diags
, FileMgr
),
48 TargetOpts(new TargetOptions
)
50 TargetOpts
->Triple
= "x86_64-apple-darwin11.1.0";
51 Target
= TargetInfo::CreateTargetInfo(Diags
, TargetOpts
);
54 std::unique_ptr
<Preprocessor
> CreatePP(StringRef Source
,
55 TrivialModuleLoader
&ModLoader
) {
56 std::unique_ptr
<llvm::MemoryBuffer
> Buf
=
57 llvm::MemoryBuffer::getMemBuffer(Source
);
58 SourceMgr
.setMainFileID(SourceMgr
.createFileID(std::move(Buf
)));
60 HeaderSearch
HeaderInfo(std::make_shared
<HeaderSearchOptions
>(), SourceMgr
,
61 Diags
, LangOpts
, Target
.get());
62 std::unique_ptr
<Preprocessor
> PP
= std::make_unique
<Preprocessor
>(
63 std::make_shared
<PreprocessorOptions
>(), Diags
, LangOpts
, SourceMgr
,
64 HeaderInfo
, ModLoader
,
65 /*IILookup =*/nullptr,
66 /*OwnsHeaderSearch =*/false);
67 PP
->Initialize(*Target
);
68 PP
->EnterMainSourceFile();
72 std::vector
<Token
> Lex(StringRef Source
) {
73 TrivialModuleLoader ModLoader
;
74 PP
= CreatePP(Source
, ModLoader
);
76 std::vector
<Token
> toks
;
77 PP
->LexTokensUntilEOF(&toks
);
82 std::vector
<Token
> CheckLex(StringRef Source
,
83 ArrayRef
<tok::TokenKind
> ExpectedTokens
) {
84 auto toks
= Lex(Source
);
85 EXPECT_EQ(ExpectedTokens
.size(), toks
.size());
86 for (unsigned i
= 0, e
= ExpectedTokens
.size(); i
!= e
; ++i
) {
87 EXPECT_EQ(ExpectedTokens
[i
], toks
[i
].getKind());
93 std::string
getSourceText(Token Begin
, Token End
) {
96 Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
97 Begin
.getLocation(), End
.getLocation())),
98 SourceMgr
, LangOpts
, &Invalid
);
101 return std::string(Str
);
104 FileSystemOptions FileMgrOpts
;
106 IntrusiveRefCntPtr
<DiagnosticIDs
> DiagID
;
107 DiagnosticsEngine Diags
;
108 SourceManager SourceMgr
;
109 LangOptions LangOpts
;
110 std::shared_ptr
<TargetOptions
> TargetOpts
;
111 IntrusiveRefCntPtr
<TargetInfo
> Target
;
112 std::unique_ptr
<Preprocessor
> PP
;
115 TEST_F(LexerTest
, GetSourceTextExpandsToMaximumInMacroArgument
) {
116 std::vector
<tok::TokenKind
> ExpectedTokens
;
117 ExpectedTokens
.push_back(tok::identifier
);
118 ExpectedTokens
.push_back(tok::l_paren
);
119 ExpectedTokens
.push_back(tok::identifier
);
120 ExpectedTokens
.push_back(tok::r_paren
);
122 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
126 EXPECT_EQ("M(i)", getSourceText(toks
[2], toks
[2]));
129 TEST_F(LexerTest
, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro
) {
130 std::vector
<tok::TokenKind
> ExpectedTokens
;
131 ExpectedTokens
.push_back(tok::identifier
);
132 ExpectedTokens
.push_back(tok::identifier
);
134 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
138 EXPECT_EQ("M(i)", getSourceText(toks
[0], toks
[0]));
141 TEST_F(LexerTest
, GetSourceTextExpandsInMacroArgumentForBeginOfMacro
) {
142 std::vector
<tok::TokenKind
> ExpectedTokens
;
143 ExpectedTokens
.push_back(tok::identifier
);
144 ExpectedTokens
.push_back(tok::identifier
);
145 ExpectedTokens
.push_back(tok::identifier
);
147 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
151 EXPECT_EQ("c M(i)", getSourceText(toks
[1], toks
[2]));
154 TEST_F(LexerTest
, GetSourceTextExpandsInMacroArgumentForEndOfMacro
) {
155 std::vector
<tok::TokenKind
> ExpectedTokens
;
156 ExpectedTokens
.push_back(tok::identifier
);
157 ExpectedTokens
.push_back(tok::identifier
);
158 ExpectedTokens
.push_back(tok::identifier
);
160 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
164 EXPECT_EQ("M(i) c", getSourceText(toks
[0], toks
[1]));
167 TEST_F(LexerTest
, GetSourceTextInSeparateFnMacros
) {
168 std::vector
<tok::TokenKind
> ExpectedTokens
;
169 ExpectedTokens
.push_back(tok::identifier
);
170 ExpectedTokens
.push_back(tok::identifier
);
171 ExpectedTokens
.push_back(tok::identifier
);
172 ExpectedTokens
.push_back(tok::identifier
);
174 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
175 "M(c M(i)) M(M(i) c)",
178 EXPECT_EQ("<INVALID>", getSourceText(toks
[1], toks
[2]));
181 TEST_F(LexerTest
, GetSourceTextWorksAcrossTokenPastes
) {
182 std::vector
<tok::TokenKind
> ExpectedTokens
;
183 ExpectedTokens
.push_back(tok::identifier
);
184 ExpectedTokens
.push_back(tok::l_paren
);
185 ExpectedTokens
.push_back(tok::identifier
);
186 ExpectedTokens
.push_back(tok::r_paren
);
188 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
189 "#define C(x) M(x##c)\n"
193 EXPECT_EQ("C(i)", getSourceText(toks
[2], toks
[2]));
196 TEST_F(LexerTest
, GetSourceTextExpandsAcrossMultipleMacroCalls
) {
197 std::vector
<tok::TokenKind
> ExpectedTokens
;
198 ExpectedTokens
.push_back(tok::identifier
);
199 ExpectedTokens
.push_back(tok::l_paren
);
200 ExpectedTokens
.push_back(tok::identifier
);
201 ExpectedTokens
.push_back(tok::r_paren
);
203 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
206 EXPECT_EQ("M(M(i))", getSourceText(toks
[2], toks
[2]));
209 TEST_F(LexerTest
, GetSourceTextInMiddleOfMacroArgument
) {
210 std::vector
<tok::TokenKind
> ExpectedTokens
;
211 ExpectedTokens
.push_back(tok::identifier
);
212 ExpectedTokens
.push_back(tok::l_paren
);
213 ExpectedTokens
.push_back(tok::identifier
);
214 ExpectedTokens
.push_back(tok::r_paren
);
216 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
219 EXPECT_EQ("i", getSourceText(toks
[2], toks
[2]));
222 TEST_F(LexerTest
, GetSourceTextExpandsAroundDifferentMacroCalls
) {
223 std::vector
<tok::TokenKind
> ExpectedTokens
;
224 ExpectedTokens
.push_back(tok::identifier
);
225 ExpectedTokens
.push_back(tok::l_paren
);
226 ExpectedTokens
.push_back(tok::identifier
);
227 ExpectedTokens
.push_back(tok::r_paren
);
229 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
233 EXPECT_EQ("C(M(i))", getSourceText(toks
[2], toks
[2]));
236 TEST_F(LexerTest
, GetSourceTextOnlyExpandsIfFirstTokenInMacro
) {
237 std::vector
<tok::TokenKind
> ExpectedTokens
;
238 ExpectedTokens
.push_back(tok::identifier
);
239 ExpectedTokens
.push_back(tok::l_paren
);
240 ExpectedTokens
.push_back(tok::identifier
);
241 ExpectedTokens
.push_back(tok::identifier
);
242 ExpectedTokens
.push_back(tok::r_paren
);
244 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
248 EXPECT_EQ("M(i)", getSourceText(toks
[3], toks
[3]));
251 TEST_F(LexerTest
, GetSourceTextExpandsRecursively
) {
252 std::vector
<tok::TokenKind
> ExpectedTokens
;
253 ExpectedTokens
.push_back(tok::identifier
);
254 ExpectedTokens
.push_back(tok::identifier
);
255 ExpectedTokens
.push_back(tok::l_paren
);
256 ExpectedTokens
.push_back(tok::identifier
);
257 ExpectedTokens
.push_back(tok::r_paren
);
259 std::vector
<Token
> toks
= CheckLex("#define M(x) x\n"
260 "#define C(x) c M(x)\n"
263 EXPECT_EQ("M(i)", getSourceText(toks
[3], toks
[3]));
266 TEST_F(LexerTest
, LexAPI
) {
267 std::vector
<tok::TokenKind
> ExpectedTokens
;
268 // Line 1 (after the #defines)
269 ExpectedTokens
.push_back(tok::l_square
);
270 ExpectedTokens
.push_back(tok::identifier
);
271 ExpectedTokens
.push_back(tok::r_square
);
272 ExpectedTokens
.push_back(tok::l_square
);
273 ExpectedTokens
.push_back(tok::identifier
);
274 ExpectedTokens
.push_back(tok::r_square
);
276 ExpectedTokens
.push_back(tok::identifier
);
277 ExpectedTokens
.push_back(tok::identifier
);
278 ExpectedTokens
.push_back(tok::identifier
);
279 ExpectedTokens
.push_back(tok::identifier
);
281 std::vector
<Token
> toks
= CheckLex("#define M(x) [x]\n"
284 "#define NOF1 INN(val)\n"
287 "N(INN(val)) N(NOF1) N(NOF2) N(val)",
290 SourceLocation lsqrLoc
= toks
[0].getLocation();
291 SourceLocation idLoc
= toks
[1].getLocation();
292 SourceLocation rsqrLoc
= toks
[2].getLocation();
293 CharSourceRange macroRange
= SourceMgr
.getExpansionRange(lsqrLoc
);
296 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc
, SourceMgr
, LangOpts
, &Loc
));
297 EXPECT_EQ(Loc
, macroRange
.getBegin());
298 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc
, SourceMgr
, LangOpts
));
299 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc
, SourceMgr
, LangOpts
));
300 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc
, SourceMgr
, LangOpts
, &Loc
));
301 EXPECT_EQ(Loc
, macroRange
.getEnd());
302 EXPECT_TRUE(macroRange
.isTokenRange());
304 CharSourceRange range
= Lexer::makeFileCharRange(
305 CharSourceRange::getTokenRange(lsqrLoc
, idLoc
), SourceMgr
, LangOpts
);
306 EXPECT_TRUE(range
.isInvalid());
307 range
= Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc
, rsqrLoc
),
308 SourceMgr
, LangOpts
);
309 EXPECT_TRUE(range
.isInvalid());
310 range
= Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc
, rsqrLoc
),
311 SourceMgr
, LangOpts
);
312 EXPECT_TRUE(!range
.isTokenRange());
313 EXPECT_EQ(range
.getAsRange(),
314 SourceRange(macroRange
.getBegin(),
315 macroRange
.getEnd().getLocWithOffset(1)));
317 StringRef text
= Lexer::getSourceText(
318 CharSourceRange::getTokenRange(lsqrLoc
, rsqrLoc
),
319 SourceMgr
, LangOpts
);
320 EXPECT_EQ(text
, "M(foo)");
322 SourceLocation macroLsqrLoc
= toks
[3].getLocation();
323 SourceLocation macroIdLoc
= toks
[4].getLocation();
324 SourceLocation macroRsqrLoc
= toks
[5].getLocation();
325 SourceLocation fileLsqrLoc
= SourceMgr
.getSpellingLoc(macroLsqrLoc
);
326 SourceLocation fileIdLoc
= SourceMgr
.getSpellingLoc(macroIdLoc
);
327 SourceLocation fileRsqrLoc
= SourceMgr
.getSpellingLoc(macroRsqrLoc
);
329 range
= Lexer::makeFileCharRange(
330 CharSourceRange::getTokenRange(macroLsqrLoc
, macroIdLoc
),
331 SourceMgr
, LangOpts
);
332 EXPECT_EQ(SourceRange(fileLsqrLoc
, fileIdLoc
.getLocWithOffset(3)),
335 range
= Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc
, macroRsqrLoc
),
336 SourceMgr
, LangOpts
);
337 EXPECT_EQ(SourceRange(fileIdLoc
, fileRsqrLoc
.getLocWithOffset(1)),
340 macroRange
= SourceMgr
.getExpansionRange(macroLsqrLoc
);
341 range
= Lexer::makeFileCharRange(
342 CharSourceRange::getTokenRange(macroLsqrLoc
, macroRsqrLoc
),
343 SourceMgr
, LangOpts
);
344 EXPECT_EQ(SourceRange(macroRange
.getBegin(), macroRange
.getEnd().getLocWithOffset(1)),
347 text
= Lexer::getSourceText(
348 CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc
, macroIdLoc
)),
349 SourceMgr
, LangOpts
);
350 EXPECT_EQ(text
, "[bar");
353 SourceLocation idLoc1
= toks
[6].getLocation();
354 SourceLocation idLoc2
= toks
[7].getLocation();
355 SourceLocation idLoc3
= toks
[8].getLocation();
356 SourceLocation idLoc4
= toks
[9].getLocation();
357 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1
, SourceMgr
, LangOpts
));
358 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2
, SourceMgr
, LangOpts
));
359 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3
, SourceMgr
, LangOpts
));
360 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4
, SourceMgr
, LangOpts
));
363 TEST_F(LexerTest
, HandlesSplitTokens
) {
364 std::vector
<tok::TokenKind
> ExpectedTokens
;
365 // Line 1 (after the #defines)
366 ExpectedTokens
.push_back(tok::identifier
);
367 ExpectedTokens
.push_back(tok::less
);
368 ExpectedTokens
.push_back(tok::identifier
);
369 ExpectedTokens
.push_back(tok::less
);
370 ExpectedTokens
.push_back(tok::greatergreater
);
372 ExpectedTokens
.push_back(tok::identifier
);
373 ExpectedTokens
.push_back(tok::less
);
374 ExpectedTokens
.push_back(tok::identifier
);
375 ExpectedTokens
.push_back(tok::less
);
376 ExpectedTokens
.push_back(tok::greatergreater
);
378 std::vector
<Token
> toks
= CheckLex("#define TY ty\n"
379 "#define RANGLE ty<ty<>>\n"
384 SourceLocation outerTyLoc
= toks
[0].getLocation();
385 SourceLocation innerTyLoc
= toks
[2].getLocation();
386 SourceLocation gtgtLoc
= toks
[4].getLocation();
387 // Split the token to simulate the action of the parser and force creation of
388 // an `ExpansionTokenRange`.
389 SourceLocation rangleLoc
= PP
->SplitToken(gtgtLoc
, 1);
391 // Verify that it only captures the first greater-then and not the second one.
392 CharSourceRange range
= Lexer::makeFileCharRange(
393 CharSourceRange::getTokenRange(innerTyLoc
, rangleLoc
), SourceMgr
,
395 EXPECT_TRUE(range
.isCharRange());
396 EXPECT_EQ(range
.getAsRange(),
397 SourceRange(innerTyLoc
, gtgtLoc
.getLocWithOffset(1)));
399 // Verify case where range begins in a macro expansion.
400 range
= Lexer::makeFileCharRange(
401 CharSourceRange::getTokenRange(outerTyLoc
, rangleLoc
), SourceMgr
,
403 EXPECT_TRUE(range
.isCharRange());
404 EXPECT_EQ(range
.getAsRange(),
405 SourceRange(SourceMgr
.getExpansionLoc(outerTyLoc
),
406 gtgtLoc
.getLocWithOffset(1)));
408 SourceLocation macroInnerTyLoc
= toks
[7].getLocation();
409 SourceLocation macroGtgtLoc
= toks
[9].getLocation();
410 // Split the token to simulate the action of the parser and force creation of
411 // an `ExpansionTokenRange`.
412 SourceLocation macroRAngleLoc
= PP
->SplitToken(macroGtgtLoc
, 1);
414 // Verify that it fails (because it only captures the first greater-then and
415 // not the second one, so it doesn't span the entire macro expansion).
416 range
= Lexer::makeFileCharRange(
417 CharSourceRange::getTokenRange(macroInnerTyLoc
, macroRAngleLoc
),
418 SourceMgr
, LangOpts
);
419 EXPECT_TRUE(range
.isInvalid());
422 TEST_F(LexerTest
, DontMergeMacroArgsFromDifferentMacroFiles
) {
423 std::vector
<Token
> toks
=
424 Lex("#define helper1 0\n"
425 "void helper2(const char *, ...);\n"
426 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
427 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
428 "void f1() { M2(\"a\", \"b\"); }");
430 // Check the file corresponding to the "helper1" macro arg in M2.
432 // The lexer used to report its size as 31, meaning that the end of the
433 // expansion would be on the *next line* (just past `M2("a", "b")`). Make
434 // sure that we get the correct end location (the comma after "helper1").
435 SourceLocation helper1ArgLoc
= toks
[20].getLocation();
436 EXPECT_EQ(SourceMgr
.getFileIDSize(SourceMgr
.getFileID(helper1ArgLoc
)), 8U);
439 TEST_F(LexerTest
, DontOverallocateStringifyArgs
) {
440 TrivialModuleLoader ModLoader
;
441 auto PP
= CreatePP("\"StrArg\", 5, 'C'", ModLoader
);
443 llvm::BumpPtrAllocator Allocator
;
444 std::array
<IdentifierInfo
*, 3> ParamList
;
445 MacroInfo
*MI
= PP
->AllocateMacroInfo({});
446 MI
->setIsFunctionLike();
447 MI
->setParameterList(ParamList
, Allocator
);
448 EXPECT_EQ(3u, MI
->getNumParams());
449 EXPECT_TRUE(MI
->isFunctionLike());
452 Eof
.setKind(tok::eof
);
453 std::vector
<Token
> ArgTokens
;
457 if (tok
.is(tok::eof
)) {
458 ArgTokens
.push_back(Eof
);
461 if (tok
.is(tok::comma
))
462 ArgTokens
.push_back(Eof
);
464 ArgTokens
.push_back(tok
);
467 auto MacroArgsDeleter
= [&PP
](MacroArgs
*M
) { M
->destroy(*PP
); };
468 std::unique_ptr
<MacroArgs
, decltype(MacroArgsDeleter
)> MA(
469 MacroArgs::create(MI
, ArgTokens
, false, *PP
), MacroArgsDeleter
);
470 auto StringifyArg
= [&](int ArgNo
) {
471 return MA
->StringifyArgument(MA
->getUnexpArgument(ArgNo
), *PP
,
472 /*Charify=*/false, {}, {});
474 Token Result
= StringifyArg(0);
475 EXPECT_EQ(tok::string_literal
, Result
.getKind());
476 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result
.getLiteralData());
477 Result
= StringifyArg(1);
478 EXPECT_EQ(tok::string_literal
, Result
.getKind());
479 EXPECT_STREQ("\"5\"", Result
.getLiteralData());
480 Result
= StringifyArg(2);
481 EXPECT_EQ(tok::string_literal
, Result
.getKind());
482 EXPECT_STREQ("\"'C'\"", Result
.getLiteralData());
483 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
484 EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
488 TEST_F(LexerTest
, IsNewLineEscapedValid
) {
489 auto hasNewLineEscaped
= [](const char *S
) {
490 return Lexer::isNewLineEscaped(S
, S
+ strlen(S
) - 1);
493 EXPECT_TRUE(hasNewLineEscaped("\\\r"));
494 EXPECT_TRUE(hasNewLineEscaped("\\\n"));
495 EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
496 EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
497 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
498 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
500 EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
501 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
502 EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
503 EXPECT_FALSE(hasNewLineEscaped("\r"));
504 EXPECT_FALSE(hasNewLineEscaped("\n"));
505 EXPECT_FALSE(hasNewLineEscaped("\r\n"));
506 EXPECT_FALSE(hasNewLineEscaped("\n\r"));
507 EXPECT_FALSE(hasNewLineEscaped("\r\r"));
508 EXPECT_FALSE(hasNewLineEscaped("\n\n"));
511 TEST_F(LexerTest
, GetBeginningOfTokenWithEscapedNewLine
) {
512 // Each line should have the same length for
513 // further offset calculation to be more straightforward.
514 const unsigned IdentifierLength
= 8;
515 std::string TextToLex
= "rabarbar\n"
520 std::vector
<tok::TokenKind
> ExpectedTokens
{5, tok::identifier
};
521 std::vector
<Token
> LexedTokens
= CheckLex(TextToLex
, ExpectedTokens
);
523 for (const Token
&Tok
: LexedTokens
) {
524 std::pair
<FileID
, unsigned> OriginalLocation
=
525 SourceMgr
.getDecomposedLoc(Tok
.getLocation());
526 for (unsigned Offset
= 0; Offset
< IdentifierLength
; ++Offset
) {
527 SourceLocation LookupLocation
=
528 Tok
.getLocation().getLocWithOffset(Offset
);
530 std::pair
<FileID
, unsigned> FoundLocation
=
531 SourceMgr
.getDecomposedExpansionLoc(
532 Lexer::GetBeginningOfToken(LookupLocation
, SourceMgr
, LangOpts
));
534 // Check that location returned by the GetBeginningOfToken
535 // is the same as original token location reported by Lexer.
536 EXPECT_EQ(FoundLocation
.second
, OriginalLocation
.second
);
541 TEST_F(LexerTest
, AvoidPastEndOfStringDereference
) {
542 EXPECT_TRUE(Lex(" // \\\n").empty());
543 EXPECT_TRUE(Lex("#include <\\\\").empty());
544 EXPECT_TRUE(Lex("#include <\\\\\n").empty());
547 TEST_F(LexerTest
, StringizingRasString
) {
548 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
549 std::string String1
= R
"(foo
552 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
553 SmallString
<128> String2
;
554 String2
+= String1
.c_str();
557 std::string String3
= R
"(\
561 SmallString
<128> String4
;
562 String4
+= String3
.c_str();
563 std::string String5
= R
"(a\
567 SmallString
<128> String6
;
568 String6
+= String5
.c_str();
570 String1
= Lexer::Stringify(StringRef(String1
));
571 Lexer::Stringify(String2
);
572 String3
= Lexer::Stringify(StringRef(String3
));
573 Lexer::Stringify(String4
);
574 String5
= Lexer::Stringify(StringRef(String5
));
575 Lexer::Stringify(String6
);
577 EXPECT_EQ(String1
, R
"(foo\n {\"bar
\":[]}\n baz
)");
578 EXPECT_EQ(String2, R"(foo
\n {\"bar
\":[]}\n baz
)");
579 EXPECT_EQ(String3, R"(\\\n \\n
\n \\\\n
\n \\\\)");
580 EXPECT_EQ(String4, R"(\\\n \\n
\n \\\\n
\n \\\\)");
581 EXPECT_EQ(String5, R"(a
\\\n\n\n \\\\b
)");
582 EXPECT_EQ(String6, R"(a
\\\n\n\n \\\\b
)");
585 TEST_F(LexerTest, CharRangeOffByOne) {
586 std::vector<Token> toks = Lex(R"(#define MOO 1
587 void foo() { MOO
; })");
588 const Token &moo = toks[5];
590 EXPECT_EQ(getSourceText(moo, moo), "MOO
");
592 SourceRange R{moo.getLocation(), moo.getLocation()};
595 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
597 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
599 CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
601 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO
"); // Was "MO
".
604 TEST_F(LexerTest, FindNextToken) {
605 Lex("int abcd
= 0;\n"
606 "int xyz
= abcd
;\n");
607 std::vector<std::string> GeneratedByNextToken;
609 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
611 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
615 GeneratedByNextToken.push_back(getSourceText(*T, *T));
616 Loc = T->getLocation();
618 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd
", "=", "0", ";", "int",
619 "xyz
", "=", "abcd
", ";"));
622 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
623 TrivialModuleLoader ModLoader;
624 auto PP = CreatePP("", ModLoader);
625 PP->LexTokensUntilEOF();
626 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
630 TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
631 const llvm::StringLiteral Source = R"cpp(
632 // First line comment.
633 //* Second line comment which is ambigious.
634 ; // Have a non-comment token to make sure something is lexed.
636 LangOpts.LineComment = false;
637 auto Toks = Lex(Source);
638 auto &SM = PP->getSourceManager();
639 auto SrcBuffer = SM.getBufferData(SM.getMainFileID());
640 Lexer L(SM.getLocForStartOfFile(SM.getMainFileID()), PP->getLangOpts(),
641 SrcBuffer.data(), SrcBuffer.data(),
642 SrcBuffer.data() + SrcBuffer.size());
644 auto ToksView = llvm::ArrayRef(Toks);
646 EXPECT_FALSE(ToksView.empty());
647 while (!L.LexFromRawLexer(T)) {
648 ASSERT_TRUE(!ToksView.empty());
649 EXPECT_EQ(T.getKind(), ToksView.front().getKind());
650 ToksView = ToksView.drop_front();
652 EXPECT_TRUE(ToksView.empty());
655 TEST(LexerPreambleTest, PreambleBounds) {
656 std::vector<std::string> Cases = {
667 ]]// trailing comment
677 for (const auto& Case : Cases) {
678 llvm::Annotations A(Case);
679 clang::LangOptions LangOpts;
680 LangOpts.CPlusPlusModules = true;
681 auto Bounds = Lexer::ComputePreamble(A.code(), LangOpts);
682 EXPECT_EQ(Bounds.Size, A.range().End) << Case;
686 } // anonymous namespace