Bump version to 19.1.0git
[llvm-project.git] / clang-tools-extra / include-cleaner / lib / Record.cpp
blob6b5be956ec10827edc654ad3fe1c5eab0bb505a8
1 //===--- Record.cpp - Record compiler events ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "clang-include-cleaner/Record.h"
10 #include "clang-include-cleaner/Types.h"
11 #include "clang/AST/ASTConsumer.h"
12 #include "clang/AST/ASTContext.h"
13 #include "clang/AST/DeclGroup.h"
14 #include "clang/Basic/FileEntry.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/LLVM.h"
17 #include "clang/Basic/SourceLocation.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Frontend/CompilerInstance.h"
21 #include "clang/Lex/DirectoryLookup.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/PPCallbacks.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
26 #include "clang/Tooling/Inclusions/StandardLibrary.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/ADT/iterator_range.h"
34 #include "llvm/Support/Allocator.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/FileSystem/UniqueID.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
39 #include <algorithm>
40 #include <assert.h>
41 #include <memory>
42 #include <optional>
43 #include <set>
44 #include <utility>
45 #include <vector>
47 namespace clang::include_cleaner {
48 namespace {
50 class PPRecorder : public PPCallbacks {
51 public:
52 PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
53 : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {
54 for (const auto &Dir : PP.getHeaderSearchInfo().search_dir_range())
55 if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
56 Recorded.Includes.addSearchDirectory(Dir.getDirRef()->getName());
59 void FileChanged(SourceLocation Loc, FileChangeReason Reason,
60 SrcMgr::CharacteristicKind FileType,
61 FileID PrevFID) override {
62 Active = SM.isWrittenInMainFile(Loc);
65 void InclusionDirective(SourceLocation Hash, const Token &IncludeTok,
66 StringRef SpelledFilename, bool IsAngled,
67 CharSourceRange FilenameRange,
68 OptionalFileEntryRef File, StringRef SearchPath,
69 StringRef RelativePath, const Module *SuggestedModule,
70 bool ModuleImported,
71 SrcMgr::CharacteristicKind) override {
72 if (!Active)
73 return;
75 Include I;
76 I.HashLocation = Hash;
77 I.Resolved = File;
78 I.Line = SM.getSpellingLineNumber(Hash);
79 I.Spelled = SpelledFilename;
80 I.Angled = IsAngled;
81 Recorded.Includes.add(I);
84 void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
85 SourceRange Range, const MacroArgs *Args) override {
86 if (!Active)
87 return;
88 recordMacroRef(MacroName, *MD.getMacroInfo());
91 void MacroDefined(const Token &MacroName, const MacroDirective *MD) override {
92 if (!Active)
93 return;
95 const auto *MI = MD->getMacroInfo();
96 // The tokens of a macro definition could refer to a macro.
97 // Formally this reference isn't resolved until this macro is expanded,
98 // but we want to treat it as a reference anyway.
99 for (const auto &Tok : MI->tokens()) {
100 auto *II = Tok.getIdentifierInfo();
101 // Could this token be a reference to a macro? (Not param to this macro).
102 if (!II || !II->hadMacroDefinition() ||
103 llvm::is_contained(MI->params(), II))
104 continue;
105 if (const MacroInfo *MI = PP.getMacroInfo(II))
106 recordMacroRef(Tok, *MI);
110 void MacroUndefined(const Token &MacroName, const MacroDefinition &MD,
111 const MacroDirective *) override {
112 if (!Active)
113 return;
114 if (const auto *MI = MD.getMacroInfo())
115 recordMacroRef(MacroName, *MI);
118 void Ifdef(SourceLocation Loc, const Token &MacroNameTok,
119 const MacroDefinition &MD) override {
120 if (!Active)
121 return;
122 if (const auto *MI = MD.getMacroInfo())
123 recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
126 void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
127 const MacroDefinition &MD) override {
128 if (!Active)
129 return;
130 if (const auto *MI = MD.getMacroInfo())
131 recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
134 using PPCallbacks::Elifdef;
135 using PPCallbacks::Elifndef;
136 void Elifdef(SourceLocation Loc, const Token &MacroNameTok,
137 const MacroDefinition &MD) override {
138 if (!Active)
139 return;
140 if (const auto *MI = MD.getMacroInfo())
141 recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
143 void Elifndef(SourceLocation Loc, const Token &MacroNameTok,
144 const MacroDefinition &MD) override {
145 if (!Active)
146 return;
147 if (const auto *MI = MD.getMacroInfo())
148 recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
151 void Defined(const Token &MacroNameTok, const MacroDefinition &MD,
152 SourceRange Range) override {
153 if (!Active)
154 return;
155 if (const auto *MI = MD.getMacroInfo())
156 recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
159 private:
160 void recordMacroRef(const Token &Tok, const MacroInfo &MI,
161 RefType RT = RefType::Explicit) {
162 if (MI.isBuiltinMacro())
163 return; // __FILE__ is not a reference.
164 Recorded.MacroReferences.push_back(
165 SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()},
166 Tok.getLocation(), RT});
169 bool Active = false;
170 RecordedPP &Recorded;
171 const Preprocessor &PP;
172 const SourceManager &SM;
175 } // namespace
177 class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
178 public:
179 RecordPragma(const CompilerInstance &CI, PragmaIncludes *Out)
180 : RecordPragma(CI.getPreprocessor(), Out) {}
181 RecordPragma(const Preprocessor &P, PragmaIncludes *Out)
182 : SM(P.getSourceManager()), HeaderInfo(P.getHeaderSearchInfo()), Out(Out),
183 Arena(std::make_shared<llvm::BumpPtrAllocator>()),
184 UniqueStrings(*Arena),
185 MainFileStem(llvm::sys::path::stem(
186 SM.getNonBuiltinFilenameForID(SM.getMainFileID()).value_or(""))) {}
188 void FileChanged(SourceLocation Loc, FileChangeReason Reason,
189 SrcMgr::CharacteristicKind FileType,
190 FileID PrevFID) override {
191 InMainFile = SM.isWrittenInMainFile(Loc);
193 if (Reason == PPCallbacks::ExitFile) {
194 // At file exit time HeaderSearchInfo is valid and can be used to
195 // determine whether the file was a self-contained header or not.
196 if (OptionalFileEntryRef FE = SM.getFileEntryRefForID(PrevFID)) {
197 if (tooling::isSelfContainedHeader(*FE, SM, HeaderInfo))
198 Out->NonSelfContainedFiles.erase(FE->getUniqueID());
199 else
200 Out->NonSelfContainedFiles.insert(FE->getUniqueID());
205 void EndOfMainFile() override {
206 for (auto &It : Out->IWYUExportBy) {
207 llvm::sort(It.getSecond());
208 It.getSecond().erase(
209 std::unique(It.getSecond().begin(), It.getSecond().end()),
210 It.getSecond().end());
212 Out->Arena.emplace_back(std::move(Arena));
215 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
216 llvm::StringRef FileName, bool IsAngled,
217 CharSourceRange /*FilenameRange*/,
218 OptionalFileEntryRef File,
219 llvm::StringRef /*SearchPath*/,
220 llvm::StringRef /*RelativePath*/,
221 const clang::Module * /*SuggestedModule*/,
222 bool /*ModuleImported*/,
223 SrcMgr::CharacteristicKind FileKind) override {
224 FileID HashFID = SM.getFileID(HashLoc);
225 int HashLine = SM.getLineNumber(HashFID, SM.getFileOffset(HashLoc));
226 std::optional<Header> IncludedHeader;
227 if (IsAngled)
228 if (auto StandardHeader =
229 tooling::stdlib::Header::named("<" + FileName.str() + ">")) {
230 IncludedHeader = *StandardHeader;
232 if (!IncludedHeader && File)
233 IncludedHeader = *File;
234 checkForExport(HashFID, HashLine, IncludedHeader, File);
235 checkForKeep(HashLine, File);
236 checkForDeducedAssociated(IncludedHeader);
239 void checkForExport(FileID IncludingFile, int HashLine,
240 std::optional<Header> IncludedHeader,
241 OptionalFileEntryRef IncludedFile) {
242 if (ExportStack.empty())
243 return;
244 auto &Top = ExportStack.back();
245 if (Top.SeenAtFile != IncludingFile)
246 return;
247 // Make sure current include is covered by the export pragma.
248 if ((Top.Block && HashLine > Top.SeenAtLine) ||
249 Top.SeenAtLine == HashLine) {
250 if (IncludedFile)
251 Out->IWYUExportBy[IncludedFile->getUniqueID()].push_back(Top.Path);
252 if (IncludedHeader && IncludedHeader->kind() == Header::Standard)
253 Out->StdIWYUExportBy[IncludedHeader->standard()].push_back(Top.Path);
254 // main-file #include with export pragma should never be removed.
255 if (Top.SeenAtFile == SM.getMainFileID() && IncludedFile)
256 Out->ShouldKeep.insert(IncludedFile->getUniqueID());
258 if (!Top.Block) // Pop immediately for single-line export pragma.
259 ExportStack.pop_back();
262 void checkForKeep(int HashLine, OptionalFileEntryRef IncludedFile) {
263 if (!InMainFile || KeepStack.empty())
264 return;
265 KeepPragma &Top = KeepStack.back();
266 // Check if the current include is covered by a keep pragma.
267 if (IncludedFile && ((Top.Block && HashLine > Top.SeenAtLine) ||
268 Top.SeenAtLine == HashLine)) {
269 Out->ShouldKeep.insert(IncludedFile->getUniqueID());
272 if (!Top.Block)
273 KeepStack.pop_back(); // Pop immediately for single-line keep pragma.
276 // Consider marking H as the "associated header" of the main file.
278 // Our heuristic:
279 // - it must be the first #include in the main file
280 // - it must have the same name stem as the main file (foo.h and foo.cpp)
281 // (IWYU pragma: associated is also supported, just not by this function).
283 // We consider the associated header as if it had a keep pragma.
284 // (Unlike IWYU, we don't treat #includes inside the associated header as if
285 // they were written in the main file.)
286 void checkForDeducedAssociated(std::optional<Header> H) {
287 namespace path = llvm::sys::path;
288 if (!InMainFile || SeenAssociatedCandidate)
289 return;
290 SeenAssociatedCandidate = true; // Only the first #include is our candidate.
291 if (!H || H->kind() != Header::Physical)
292 return;
293 if (path::stem(H->physical().getName(), path::Style::posix) == MainFileStem)
294 Out->ShouldKeep.insert(H->physical().getUniqueID());
297 bool HandleComment(Preprocessor &PP, SourceRange Range) override {
298 auto &SM = PP.getSourceManager();
299 auto Pragma =
300 tooling::parseIWYUPragma(SM.getCharacterData(Range.getBegin()));
301 if (!Pragma)
302 return false;
304 auto [CommentFID, CommentOffset] = SM.getDecomposedLoc(Range.getBegin());
305 int CommentLine = SM.getLineNumber(CommentFID, CommentOffset);
307 if (InMainFile) {
308 if (Pragma->starts_with("keep") ||
309 // Limited support for associated headers: never consider unused.
310 Pragma->starts_with("associated")) {
311 KeepStack.push_back({CommentLine, false});
312 } else if (Pragma->starts_with("begin_keep")) {
313 KeepStack.push_back({CommentLine, true});
314 } else if (Pragma->starts_with("end_keep") && !KeepStack.empty()) {
315 assert(KeepStack.back().Block);
316 KeepStack.pop_back();
320 auto FE = SM.getFileEntryRefForID(CommentFID);
321 if (!FE) {
322 // This can only happen when the buffer was registered virtually into
323 // SourceManager and FileManager has no idea about it. In such a scenario,
324 // that file cannot be discovered by HeaderSearch, therefore no "explicit"
325 // includes for that file.
326 return false;
328 auto CommentUID = FE->getUniqueID();
329 if (Pragma->consume_front("private")) {
330 StringRef PublicHeader;
331 if (Pragma->consume_front(", include ")) {
332 // We always insert using the spelling from the pragma.
333 PublicHeader =
334 save(Pragma->starts_with("<") || Pragma->starts_with("\"")
335 ? (*Pragma)
336 : ("\"" + *Pragma + "\"").str());
338 Out->IWYUPublic.insert({CommentUID, PublicHeader});
339 return false;
341 if (Pragma->consume_front("always_keep")) {
342 Out->ShouldKeep.insert(CommentUID);
343 return false;
345 auto Filename = FE->getName();
346 // Record export pragma.
347 if (Pragma->starts_with("export")) {
348 ExportStack.push_back({CommentLine, CommentFID, save(Filename), false});
349 } else if (Pragma->starts_with("begin_exports")) {
350 ExportStack.push_back({CommentLine, CommentFID, save(Filename), true});
351 } else if (Pragma->starts_with("end_exports")) {
352 // FIXME: be robust on unmatching cases. We should only pop the stack if
353 // the begin_exports and end_exports is in the same file.
354 if (!ExportStack.empty()) {
355 assert(ExportStack.back().Block);
356 ExportStack.pop_back();
359 return false;
362 private:
363 StringRef save(llvm::StringRef S) { return UniqueStrings.save(S); }
365 bool InMainFile = false;
366 const SourceManager &SM;
367 const HeaderSearch &HeaderInfo;
368 PragmaIncludes *Out;
369 std::shared_ptr<llvm::BumpPtrAllocator> Arena;
370 /// Intern table for strings. Contents are on the arena.
371 llvm::StringSaver UniqueStrings;
372 // Used when deducing associated header.
373 llvm::StringRef MainFileStem;
374 bool SeenAssociatedCandidate = false;
376 struct ExportPragma {
377 // The line number where we saw the begin_exports or export pragma.
378 int SeenAtLine = 0; // 1-based line number.
379 // The file where we saw the pragma.
380 FileID SeenAtFile;
381 // Name (per FileEntry::getName()) of the file SeenAtFile.
382 StringRef Path;
383 // true if it is a block begin/end_exports pragma; false if it is a
384 // single-line export pragma.
385 bool Block = false;
387 // A stack for tracking all open begin_exports or single-line export.
388 std::vector<ExportPragma> ExportStack;
390 struct KeepPragma {
391 // The line number where we saw the begin_keep or keep pragma.
392 int SeenAtLine = 0; // 1-based line number.
393 // true if it is a block begin/end_keep pragma; false if it is a
394 // single-line keep pragma.
395 bool Block = false;
397 // A stack for tracking all open begin_keep pragmas or single-line keeps.
398 std::vector<KeepPragma> KeepStack;
401 void PragmaIncludes::record(const CompilerInstance &CI) {
402 auto Record = std::make_unique<RecordPragma>(CI, this);
403 CI.getPreprocessor().addCommentHandler(Record.get());
404 CI.getPreprocessor().addPPCallbacks(std::move(Record));
407 void PragmaIncludes::record(Preprocessor &P) {
408 auto Record = std::make_unique<RecordPragma>(P, this);
409 P.addCommentHandler(Record.get());
410 P.addPPCallbacks(std::move(Record));
413 llvm::StringRef PragmaIncludes::getPublic(const FileEntry *F) const {
414 auto It = IWYUPublic.find(F->getUniqueID());
415 if (It == IWYUPublic.end())
416 return "";
417 return It->getSecond();
420 static llvm::SmallVector<FileEntryRef>
421 toFileEntries(llvm::ArrayRef<StringRef> FileNames, FileManager &FM) {
422 llvm::SmallVector<FileEntryRef> Results;
424 for (auto FName : FileNames) {
425 // FIMXE: log the failing cases?
426 if (auto FE = FM.getOptionalFileRef(FName))
427 Results.push_back(*FE);
429 return Results;
431 llvm::SmallVector<FileEntryRef>
432 PragmaIncludes::getExporters(const FileEntry *File, FileManager &FM) const {
433 auto It = IWYUExportBy.find(File->getUniqueID());
434 if (It == IWYUExportBy.end())
435 return {};
437 return toFileEntries(It->getSecond(), FM);
439 llvm::SmallVector<FileEntryRef>
440 PragmaIncludes::getExporters(tooling::stdlib::Header StdHeader,
441 FileManager &FM) const {
442 auto It = StdIWYUExportBy.find(StdHeader);
443 if (It == StdIWYUExportBy.end())
444 return {};
445 return toFileEntries(It->getSecond(), FM);
448 bool PragmaIncludes::isSelfContained(const FileEntry *FE) const {
449 return !NonSelfContainedFiles.contains(FE->getUniqueID());
452 bool PragmaIncludes::isPrivate(const FileEntry *FE) const {
453 return IWYUPublic.contains(FE->getUniqueID());
456 bool PragmaIncludes::shouldKeep(const FileEntry *FE) const {
457 return ShouldKeep.contains(FE->getUniqueID()) ||
458 NonSelfContainedFiles.contains(FE->getUniqueID());
461 namespace {
462 template <typename T> bool isImplicitTemplateSpecialization(const Decl *D) {
463 if (const auto *TD = dyn_cast<T>(D))
464 return TD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation;
465 return false;
467 } // namespace
469 std::unique_ptr<ASTConsumer> RecordedAST::record() {
470 class Recorder : public ASTConsumer {
471 RecordedAST *Out;
473 public:
474 Recorder(RecordedAST *Out) : Out(Out) {}
475 void Initialize(ASTContext &Ctx) override { Out->Ctx = &Ctx; }
476 bool HandleTopLevelDecl(DeclGroupRef DG) override {
477 const auto &SM = Out->Ctx->getSourceManager();
478 for (Decl *D : DG) {
479 if (!SM.isWrittenInMainFile(SM.getExpansionLoc(D->getLocation())))
480 continue;
481 if (isImplicitTemplateSpecialization<FunctionDecl>(D) ||
482 isImplicitTemplateSpecialization<CXXRecordDecl>(D) ||
483 isImplicitTemplateSpecialization<VarDecl>(D))
484 continue;
485 // FIXME: Filter out certain Obj-C as well.
486 Out->Roots.push_back(D);
488 return ASTConsumer::HandleTopLevelDecl(DG);
492 return std::make_unique<Recorder>(this);
495 std::unique_ptr<PPCallbacks> RecordedPP::record(const Preprocessor &PP) {
496 return std::make_unique<PPRecorder>(*this, PP);
499 } // namespace clang::include_cleaner