1 //===--- Record.cpp - Record compiler events ------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang-include-cleaner/Record.h"
10 #include "clang-include-cleaner/Types.h"
11 #include "clang/AST/ASTConsumer.h"
12 #include "clang/AST/ASTContext.h"
13 #include "clang/AST/DeclGroup.h"
14 #include "clang/Basic/FileEntry.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/LLVM.h"
17 #include "clang/Basic/SourceLocation.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Frontend/CompilerInstance.h"
21 #include "clang/Lex/DirectoryLookup.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/PPCallbacks.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
26 #include "clang/Tooling/Inclusions/StandardLibrary.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/ADT/iterator_range.h"
34 #include "llvm/Support/Allocator.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/FileSystem/UniqueID.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
47 namespace clang::include_cleaner
{
50 class PPRecorder
: public PPCallbacks
{
52 PPRecorder(RecordedPP
&Recorded
, const Preprocessor
&PP
)
53 : Recorded(Recorded
), PP(PP
), SM(PP
.getSourceManager()) {
54 for (const auto &Dir
: PP
.getHeaderSearchInfo().search_dir_range())
55 if (Dir
.getLookupType() == DirectoryLookup::LT_NormalDir
)
56 Recorded
.Includes
.addSearchDirectory(Dir
.getDirRef()->getName());
59 void FileChanged(SourceLocation Loc
, FileChangeReason Reason
,
60 SrcMgr::CharacteristicKind FileType
,
61 FileID PrevFID
) override
{
62 Active
= SM
.isWrittenInMainFile(Loc
);
65 void InclusionDirective(SourceLocation Hash
, const Token
&IncludeTok
,
66 StringRef SpelledFilename
, bool IsAngled
,
67 CharSourceRange FilenameRange
,
68 OptionalFileEntryRef File
, StringRef SearchPath
,
69 StringRef RelativePath
, const Module
*SuggestedModule
,
71 SrcMgr::CharacteristicKind
) override
{
76 I
.HashLocation
= Hash
;
78 I
.Line
= SM
.getSpellingLineNumber(Hash
);
79 I
.Spelled
= SpelledFilename
;
81 Recorded
.Includes
.add(I
);
84 void MacroExpands(const Token
&MacroName
, const MacroDefinition
&MD
,
85 SourceRange Range
, const MacroArgs
*Args
) override
{
88 recordMacroRef(MacroName
, *MD
.getMacroInfo());
91 void MacroDefined(const Token
&MacroName
, const MacroDirective
*MD
) override
{
95 const auto *MI
= MD
->getMacroInfo();
96 // The tokens of a macro definition could refer to a macro.
97 // Formally this reference isn't resolved until this macro is expanded,
98 // but we want to treat it as a reference anyway.
99 for (const auto &Tok
: MI
->tokens()) {
100 auto *II
= Tok
.getIdentifierInfo();
101 // Could this token be a reference to a macro? (Not param to this macro).
102 if (!II
|| !II
->hadMacroDefinition() ||
103 llvm::is_contained(MI
->params(), II
))
105 if (const MacroInfo
*MI
= PP
.getMacroInfo(II
))
106 recordMacroRef(Tok
, *MI
);
110 void MacroUndefined(const Token
&MacroName
, const MacroDefinition
&MD
,
111 const MacroDirective
*) override
{
114 if (const auto *MI
= MD
.getMacroInfo())
115 recordMacroRef(MacroName
, *MI
);
118 void Ifdef(SourceLocation Loc
, const Token
&MacroNameTok
,
119 const MacroDefinition
&MD
) override
{
122 if (const auto *MI
= MD
.getMacroInfo())
123 recordMacroRef(MacroNameTok
, *MI
, RefType::Ambiguous
);
126 void Ifndef(SourceLocation Loc
, const Token
&MacroNameTok
,
127 const MacroDefinition
&MD
) override
{
130 if (const auto *MI
= MD
.getMacroInfo())
131 recordMacroRef(MacroNameTok
, *MI
, RefType::Ambiguous
);
134 using PPCallbacks::Elifdef
;
135 using PPCallbacks::Elifndef
;
136 void Elifdef(SourceLocation Loc
, const Token
&MacroNameTok
,
137 const MacroDefinition
&MD
) override
{
140 if (const auto *MI
= MD
.getMacroInfo())
141 recordMacroRef(MacroNameTok
, *MI
, RefType::Ambiguous
);
143 void Elifndef(SourceLocation Loc
, const Token
&MacroNameTok
,
144 const MacroDefinition
&MD
) override
{
147 if (const auto *MI
= MD
.getMacroInfo())
148 recordMacroRef(MacroNameTok
, *MI
, RefType::Ambiguous
);
151 void Defined(const Token
&MacroNameTok
, const MacroDefinition
&MD
,
152 SourceRange Range
) override
{
155 if (const auto *MI
= MD
.getMacroInfo())
156 recordMacroRef(MacroNameTok
, *MI
, RefType::Ambiguous
);
160 void recordMacroRef(const Token
&Tok
, const MacroInfo
&MI
,
161 RefType RT
= RefType::Explicit
) {
162 if (MI
.isBuiltinMacro())
163 return; // __FILE__ is not a reference.
164 Recorded
.MacroReferences
.push_back(
165 SymbolReference
{Macro
{Tok
.getIdentifierInfo(), MI
.getDefinitionLoc()},
166 Tok
.getLocation(), RT
});
170 RecordedPP
&Recorded
;
171 const Preprocessor
&PP
;
172 const SourceManager
&SM
;
177 class PragmaIncludes::RecordPragma
: public PPCallbacks
, public CommentHandler
{
179 RecordPragma(const CompilerInstance
&CI
, PragmaIncludes
*Out
)
180 : RecordPragma(CI
.getPreprocessor(), Out
) {}
181 RecordPragma(const Preprocessor
&P
, PragmaIncludes
*Out
)
182 : SM(P
.getSourceManager()), HeaderInfo(P
.getHeaderSearchInfo()), Out(Out
),
183 Arena(std::make_shared
<llvm::BumpPtrAllocator
>()),
184 UniqueStrings(*Arena
),
185 MainFileStem(llvm::sys::path::stem(
186 SM
.getNonBuiltinFilenameForID(SM
.getMainFileID()).value_or(""))) {}
188 void FileChanged(SourceLocation Loc
, FileChangeReason Reason
,
189 SrcMgr::CharacteristicKind FileType
,
190 FileID PrevFID
) override
{
191 InMainFile
= SM
.isWrittenInMainFile(Loc
);
193 if (Reason
== PPCallbacks::ExitFile
) {
194 // At file exit time HeaderSearchInfo is valid and can be used to
195 // determine whether the file was a self-contained header or not.
196 if (OptionalFileEntryRef FE
= SM
.getFileEntryRefForID(PrevFID
)) {
197 if (tooling::isSelfContainedHeader(*FE
, SM
, HeaderInfo
))
198 Out
->NonSelfContainedFiles
.erase(FE
->getUniqueID());
200 Out
->NonSelfContainedFiles
.insert(FE
->getUniqueID());
205 void EndOfMainFile() override
{
206 for (auto &It
: Out
->IWYUExportBy
) {
207 llvm::sort(It
.getSecond());
208 It
.getSecond().erase(
209 std::unique(It
.getSecond().begin(), It
.getSecond().end()),
210 It
.getSecond().end());
212 Out
->Arena
.emplace_back(std::move(Arena
));
215 void InclusionDirective(SourceLocation HashLoc
, const Token
&IncludeTok
,
216 llvm::StringRef FileName
, bool IsAngled
,
217 CharSourceRange
/*FilenameRange*/,
218 OptionalFileEntryRef File
,
219 llvm::StringRef
/*SearchPath*/,
220 llvm::StringRef
/*RelativePath*/,
221 const clang::Module
* /*SuggestedModule*/,
222 bool /*ModuleImported*/,
223 SrcMgr::CharacteristicKind FileKind
) override
{
224 FileID HashFID
= SM
.getFileID(HashLoc
);
225 int HashLine
= SM
.getLineNumber(HashFID
, SM
.getFileOffset(HashLoc
));
226 std::optional
<Header
> IncludedHeader
;
228 if (auto StandardHeader
=
229 tooling::stdlib::Header::named("<" + FileName
.str() + ">")) {
230 IncludedHeader
= *StandardHeader
;
232 if (!IncludedHeader
&& File
)
233 IncludedHeader
= *File
;
234 checkForExport(HashFID
, HashLine
, IncludedHeader
, File
);
235 checkForKeep(HashLine
, File
);
236 checkForDeducedAssociated(IncludedHeader
);
239 void checkForExport(FileID IncludingFile
, int HashLine
,
240 std::optional
<Header
> IncludedHeader
,
241 OptionalFileEntryRef IncludedFile
) {
242 if (ExportStack
.empty())
244 auto &Top
= ExportStack
.back();
245 if (Top
.SeenAtFile
!= IncludingFile
)
247 // Make sure current include is covered by the export pragma.
248 if ((Top
.Block
&& HashLine
> Top
.SeenAtLine
) ||
249 Top
.SeenAtLine
== HashLine
) {
251 Out
->IWYUExportBy
[IncludedFile
->getUniqueID()].push_back(Top
.Path
);
252 if (IncludedHeader
&& IncludedHeader
->kind() == Header::Standard
)
253 Out
->StdIWYUExportBy
[IncludedHeader
->standard()].push_back(Top
.Path
);
254 // main-file #include with export pragma should never be removed.
255 if (Top
.SeenAtFile
== SM
.getMainFileID() && IncludedFile
)
256 Out
->ShouldKeep
.insert(IncludedFile
->getUniqueID());
258 if (!Top
.Block
) // Pop immediately for single-line export pragma.
259 ExportStack
.pop_back();
262 void checkForKeep(int HashLine
, OptionalFileEntryRef IncludedFile
) {
263 if (!InMainFile
|| KeepStack
.empty())
265 KeepPragma
&Top
= KeepStack
.back();
266 // Check if the current include is covered by a keep pragma.
267 if (IncludedFile
&& ((Top
.Block
&& HashLine
> Top
.SeenAtLine
) ||
268 Top
.SeenAtLine
== HashLine
)) {
269 Out
->ShouldKeep
.insert(IncludedFile
->getUniqueID());
273 KeepStack
.pop_back(); // Pop immediately for single-line keep pragma.
276 // Consider marking H as the "associated header" of the main file.
279 // - it must be the first #include in the main file
280 // - it must have the same name stem as the main file (foo.h and foo.cpp)
281 // (IWYU pragma: associated is also supported, just not by this function).
283 // We consider the associated header as if it had a keep pragma.
284 // (Unlike IWYU, we don't treat #includes inside the associated header as if
285 // they were written in the main file.)
286 void checkForDeducedAssociated(std::optional
<Header
> H
) {
287 namespace path
= llvm::sys::path
;
288 if (!InMainFile
|| SeenAssociatedCandidate
)
290 SeenAssociatedCandidate
= true; // Only the first #include is our candidate.
291 if (!H
|| H
->kind() != Header::Physical
)
293 if (path::stem(H
->physical().getName(), path::Style::posix
) == MainFileStem
)
294 Out
->ShouldKeep
.insert(H
->physical().getUniqueID());
297 bool HandleComment(Preprocessor
&PP
, SourceRange Range
) override
{
298 auto &SM
= PP
.getSourceManager();
300 tooling::parseIWYUPragma(SM
.getCharacterData(Range
.getBegin()));
304 auto [CommentFID
, CommentOffset
] = SM
.getDecomposedLoc(Range
.getBegin());
305 int CommentLine
= SM
.getLineNumber(CommentFID
, CommentOffset
);
308 if (Pragma
->starts_with("keep") ||
309 // Limited support for associated headers: never consider unused.
310 Pragma
->starts_with("associated")) {
311 KeepStack
.push_back({CommentLine
, false});
312 } else if (Pragma
->starts_with("begin_keep")) {
313 KeepStack
.push_back({CommentLine
, true});
314 } else if (Pragma
->starts_with("end_keep") && !KeepStack
.empty()) {
315 assert(KeepStack
.back().Block
);
316 KeepStack
.pop_back();
320 auto FE
= SM
.getFileEntryRefForID(CommentFID
);
322 // This can only happen when the buffer was registered virtually into
323 // SourceManager and FileManager has no idea about it. In such a scenario,
324 // that file cannot be discovered by HeaderSearch, therefore no "explicit"
325 // includes for that file.
328 auto CommentUID
= FE
->getUniqueID();
329 if (Pragma
->consume_front("private")) {
330 StringRef PublicHeader
;
331 if (Pragma
->consume_front(", include ")) {
332 // We always insert using the spelling from the pragma.
334 save(Pragma
->starts_with("<") || Pragma
->starts_with("\"")
336 : ("\"" + *Pragma
+ "\"").str());
338 Out
->IWYUPublic
.insert({CommentUID
, PublicHeader
});
341 if (Pragma
->consume_front("always_keep")) {
342 Out
->ShouldKeep
.insert(CommentUID
);
345 auto Filename
= FE
->getName();
346 // Record export pragma.
347 if (Pragma
->starts_with("export")) {
348 ExportStack
.push_back({CommentLine
, CommentFID
, save(Filename
), false});
349 } else if (Pragma
->starts_with("begin_exports")) {
350 ExportStack
.push_back({CommentLine
, CommentFID
, save(Filename
), true});
351 } else if (Pragma
->starts_with("end_exports")) {
352 // FIXME: be robust on unmatching cases. We should only pop the stack if
353 // the begin_exports and end_exports is in the same file.
354 if (!ExportStack
.empty()) {
355 assert(ExportStack
.back().Block
);
356 ExportStack
.pop_back();
363 StringRef
save(llvm::StringRef S
) { return UniqueStrings
.save(S
); }
365 bool InMainFile
= false;
366 const SourceManager
&SM
;
367 const HeaderSearch
&HeaderInfo
;
369 std::shared_ptr
<llvm::BumpPtrAllocator
> Arena
;
370 /// Intern table for strings. Contents are on the arena.
371 llvm::StringSaver UniqueStrings
;
372 // Used when deducing associated header.
373 llvm::StringRef MainFileStem
;
374 bool SeenAssociatedCandidate
= false;
376 struct ExportPragma
{
377 // The line number where we saw the begin_exports or export pragma.
378 int SeenAtLine
= 0; // 1-based line number.
379 // The file where we saw the pragma.
381 // Name (per FileEntry::getName()) of the file SeenAtFile.
383 // true if it is a block begin/end_exports pragma; false if it is a
384 // single-line export pragma.
387 // A stack for tracking all open begin_exports or single-line export.
388 std::vector
<ExportPragma
> ExportStack
;
391 // The line number where we saw the begin_keep or keep pragma.
392 int SeenAtLine
= 0; // 1-based line number.
393 // true if it is a block begin/end_keep pragma; false if it is a
394 // single-line keep pragma.
397 // A stack for tracking all open begin_keep pragmas or single-line keeps.
398 std::vector
<KeepPragma
> KeepStack
;
401 void PragmaIncludes::record(const CompilerInstance
&CI
) {
402 auto Record
= std::make_unique
<RecordPragma
>(CI
, this);
403 CI
.getPreprocessor().addCommentHandler(Record
.get());
404 CI
.getPreprocessor().addPPCallbacks(std::move(Record
));
407 void PragmaIncludes::record(Preprocessor
&P
) {
408 auto Record
= std::make_unique
<RecordPragma
>(P
, this);
409 P
.addCommentHandler(Record
.get());
410 P
.addPPCallbacks(std::move(Record
));
413 llvm::StringRef
PragmaIncludes::getPublic(const FileEntry
*F
) const {
414 auto It
= IWYUPublic
.find(F
->getUniqueID());
415 if (It
== IWYUPublic
.end())
417 return It
->getSecond();
420 static llvm::SmallVector
<FileEntryRef
>
421 toFileEntries(llvm::ArrayRef
<StringRef
> FileNames
, FileManager
&FM
) {
422 llvm::SmallVector
<FileEntryRef
> Results
;
424 for (auto FName
: FileNames
) {
425 // FIMXE: log the failing cases?
426 if (auto FE
= FM
.getOptionalFileRef(FName
))
427 Results
.push_back(*FE
);
431 llvm::SmallVector
<FileEntryRef
>
432 PragmaIncludes::getExporters(const FileEntry
*File
, FileManager
&FM
) const {
433 auto It
= IWYUExportBy
.find(File
->getUniqueID());
434 if (It
== IWYUExportBy
.end())
437 return toFileEntries(It
->getSecond(), FM
);
439 llvm::SmallVector
<FileEntryRef
>
440 PragmaIncludes::getExporters(tooling::stdlib::Header StdHeader
,
441 FileManager
&FM
) const {
442 auto It
= StdIWYUExportBy
.find(StdHeader
);
443 if (It
== StdIWYUExportBy
.end())
445 return toFileEntries(It
->getSecond(), FM
);
448 bool PragmaIncludes::isSelfContained(const FileEntry
*FE
) const {
449 return !NonSelfContainedFiles
.contains(FE
->getUniqueID());
452 bool PragmaIncludes::isPrivate(const FileEntry
*FE
) const {
453 return IWYUPublic
.contains(FE
->getUniqueID());
456 bool PragmaIncludes::shouldKeep(const FileEntry
*FE
) const {
457 return ShouldKeep
.contains(FE
->getUniqueID()) ||
458 NonSelfContainedFiles
.contains(FE
->getUniqueID());
462 template <typename T
> bool isImplicitTemplateSpecialization(const Decl
*D
) {
463 if (const auto *TD
= dyn_cast
<T
>(D
))
464 return TD
->getTemplateSpecializationKind() == TSK_ImplicitInstantiation
;
469 std::unique_ptr
<ASTConsumer
> RecordedAST::record() {
470 class Recorder
: public ASTConsumer
{
474 Recorder(RecordedAST
*Out
) : Out(Out
) {}
475 void Initialize(ASTContext
&Ctx
) override
{ Out
->Ctx
= &Ctx
; }
476 bool HandleTopLevelDecl(DeclGroupRef DG
) override
{
477 const auto &SM
= Out
->Ctx
->getSourceManager();
479 if (!SM
.isWrittenInMainFile(SM
.getExpansionLoc(D
->getLocation())))
481 if (isImplicitTemplateSpecialization
<FunctionDecl
>(D
) ||
482 isImplicitTemplateSpecialization
<CXXRecordDecl
>(D
) ||
483 isImplicitTemplateSpecialization
<VarDecl
>(D
))
485 // FIXME: Filter out certain Obj-C as well.
486 Out
->Roots
.push_back(D
);
488 return ASTConsumer::HandleTopLevelDecl(DG
);
492 return std::make_unique
<Recorder
>(this);
495 std::unique_ptr
<PPCallbacks
> RecordedPP::record(const Preprocessor
&PP
) {
496 return std::make_unique
<PPRecorder
>(*this, PP
);
499 } // namespace clang::include_cleaner