workflows: Fix typo from 2d3d0f50ceb938c155a7283e684f28190d24d6ba
[llvm-project.git] / clang-tools-extra / clangd / IncludeCleaner.cpp
blob0bdf32c6d69dd2c1ec3d5c587e0a893e7250ecd8
1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "IncludeCleaner.h"
10 #include "Config.h"
11 #include "Headers.h"
12 #include "ParsedAST.h"
13 #include "Protocol.h"
14 #include "SourceCode.h"
15 #include "index/CanonicalIncludes.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/ExprCXX.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/STLFunctionalExtras.h"
28 #include "llvm/ADT/SmallString.h"
29 #include "llvm/ADT/StringSet.h"
30 #include "llvm/Support/FormatVariadic.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/Regex.h"
33 #include <functional>
35 namespace clang {
36 namespace clangd {
38 static bool AnalyzeStdlib = false;
39 void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
41 namespace {
43 /// Crawler traverses the AST and feeds in the locations of (sometimes
44 /// implicitly) used symbols into \p Result.
45 class ReferencedLocationCrawler
46 : public RecursiveASTVisitor<ReferencedLocationCrawler> {
47 public:
48 ReferencedLocationCrawler(ReferencedLocations &Result,
49 const SourceManager &SM)
50 : Result(Result), SM(SM) {}
52 bool VisitDeclRefExpr(DeclRefExpr *DRE) {
53 add(DRE->getDecl());
54 add(DRE->getFoundDecl());
55 return true;
58 bool VisitMemberExpr(MemberExpr *ME) {
59 add(ME->getMemberDecl());
60 add(ME->getFoundDecl().getDecl());
61 return true;
64 bool VisitTagType(TagType *TT) {
65 add(TT->getDecl());
66 return true;
69 bool VisitFunctionDecl(FunctionDecl *FD) {
70 // Function definition will require redeclarations to be included.
71 if (FD->isThisDeclarationADefinition())
72 add(FD);
73 return true;
76 bool VisitCXXConstructExpr(CXXConstructExpr *CCE) {
77 add(CCE->getConstructor());
78 return true;
81 bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) {
82 // Using templateName case is handled by the override TraverseTemplateName.
83 if (TST->getTemplateName().getKind() == TemplateName::UsingTemplate)
84 return true;
85 add(TST->getAsCXXRecordDecl()); // Specialization
86 return true;
89 // There is no VisitTemplateName in RAV, thus we override the Traverse version
90 // to handle the Using TemplateName case.
91 bool TraverseTemplateName(TemplateName TN) {
92 VisitTemplateName(TN);
93 return Base::TraverseTemplateName(TN);
95 // A pseudo VisitTemplateName, dispatched by the above TraverseTemplateName!
96 bool VisitTemplateName(TemplateName TN) {
97 if (const auto *USD = TN.getAsUsingShadowDecl()) {
98 add(USD);
99 return true;
101 add(TN.getAsTemplateDecl()); // Primary template.
102 return true;
105 bool VisitUsingType(UsingType *UT) {
106 add(UT->getFoundDecl());
107 return true;
110 bool VisitTypedefType(TypedefType *TT) {
111 add(TT->getDecl());
112 return true;
115 // Consider types of any subexpression used, even if the type is not named.
116 // This is helpful in getFoo().bar(), where Foo must be complete.
117 // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
118 // consider types "used" when they are not directly spelled in code.
119 bool VisitExpr(Expr *E) {
120 TraverseType(E->getType());
121 return true;
124 bool TraverseType(QualType T) {
125 if (isNew(T.getTypePtrOrNull())) // don't care about quals
126 Base::TraverseType(T);
127 return true;
130 bool VisitUsingDecl(UsingDecl *D) {
131 for (const auto *Shadow : D->shadows())
132 add(Shadow->getTargetDecl());
133 return true;
136 // Enums may be usefully forward-declared as *complete* types by specifying
137 // an underlying type. In this case, the definition should see the declaration
138 // so they can be checked for compatibility.
139 bool VisitEnumDecl(EnumDecl *D) {
140 if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo())
141 add(D);
142 return true;
145 // When the overload is not resolved yet, mark all candidates as used.
146 bool VisitOverloadExpr(OverloadExpr *E) {
147 for (const auto *ResolutionDecl : E->decls())
148 add(ResolutionDecl);
149 return true;
152 private:
153 using Base = RecursiveASTVisitor<ReferencedLocationCrawler>;
155 void add(const Decl *D) {
156 if (!D || !isNew(D->getCanonicalDecl()))
157 return;
158 if (auto SS = StdRecognizer(D)) {
159 Result.Stdlib.insert(*SS);
160 return;
162 // Special case RecordDecls, as it is common for them to be forward
163 // declared multiple times. The most common cases are:
164 // - Definition available in TU, only mark that one as usage. The rest is
165 // likely to be unnecessary. This might result in false positives when an
166 // internal definition is visible.
167 // - There's a forward declaration in the main file, no need for other
168 // redecls.
169 if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
170 if (const auto *Definition = RD->getDefinition()) {
171 Result.User.insert(Definition->getLocation());
172 return;
174 if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation()))
175 return;
177 for (const Decl *Redecl : D->redecls())
178 Result.User.insert(Redecl->getLocation());
181 bool isNew(const void *P) { return P && Visited.insert(P).second; }
183 ReferencedLocations &Result;
184 llvm::DenseSet<const void *> Visited;
185 const SourceManager &SM;
186 tooling::stdlib::Recognizer StdRecognizer;
189 // Given a set of referenced FileIDs, determines all the potentially-referenced
190 // files and macros by traversing expansion/spelling locations of macro IDs.
191 // This is used to map the referenced SourceLocations onto real files.
192 struct ReferencedFilesBuilder {
193 ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {}
194 llvm::DenseSet<FileID> Files;
195 llvm::DenseSet<FileID> Macros;
196 const SourceManager &SM;
198 void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); }
200 void add(FileID FID, SourceLocation Loc) {
201 if (FID.isInvalid())
202 return;
203 assert(SM.isInFileID(Loc, FID));
204 if (Loc.isFileID()) {
205 Files.insert(FID);
206 return;
208 // Don't process the same macro FID twice.
209 if (!Macros.insert(FID).second)
210 return;
211 const auto &Exp = SM.getSLocEntry(FID).getExpansion();
212 add(Exp.getSpellingLoc());
213 add(Exp.getExpansionLocStart());
214 add(Exp.getExpansionLocEnd());
218 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not
219 // handled.
220 clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) {
221 clangd::Range Result;
222 Result.end = Result.start = offsetToPosition(Code, HashOffset);
224 // Span the warning until the EOL or EOF.
225 Result.end.character +=
226 lspLength(Code.drop_front(HashOffset).take_until([](char C) {
227 return C == '\n' || C == '\r';
228 }));
229 return Result;
232 // Finds locations of macros referenced from within the main file. That includes
233 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`.
234 void findReferencedMacros(const SourceManager &SM, Preprocessor &PP,
235 const syntax::TokenBuffer *Tokens,
236 ReferencedLocations &Result) {
237 trace::Span Tracer("IncludeCleaner::findReferencedMacros");
238 // FIXME(kirillbobyrev): The macros from the main file are collected in
239 // ParsedAST's MainFileMacros. However, we can't use it here because it
240 // doesn't handle macro references that were not expanded, e.g. in macro
241 // definitions or preprocessor-disabled sections.
243 // Extending MainFileMacros to collect missing references and switching to
244 // this mechanism (as opposed to iterating through all tokens) will improve
245 // the performance of findReferencedMacros and also improve other features
246 // relying on MainFileMacros.
247 for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) {
248 auto Macro = locateMacroAt(Tok, PP);
249 if (!Macro)
250 continue;
251 auto Loc = Macro->Info->getDefinitionLoc();
252 if (Loc.isValid())
253 Result.User.insert(Loc);
254 // FIXME: support stdlib macros
258 static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
259 const Config &Cfg) {
260 if (Inc.BehindPragmaKeep)
261 return false;
263 // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
264 // System headers are likely to be standard library headers.
265 // Until we have good support for umbrella headers, don't warn about them.
266 if (Inc.Written.front() == '<') {
267 if (AnalyzeStdlib && tooling::stdlib::Header::named(Inc.Written))
268 return true;
269 return false;
271 assert(Inc.HeaderID);
272 auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
273 // FIXME: Ignore the headers with IWYU export pragmas for now, remove this
274 // check when we have more precise tracking of exported headers.
275 if (AST.getIncludeStructure().hasIWYUExport(HID))
276 return false;
277 auto FE = AST.getSourceManager().getFileManager().getFileRef(
278 AST.getIncludeStructure().getRealPath(HID));
279 assert(FE);
280 // Headers without include guards have side effects and are not
281 // self-contained, skip them.
282 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
283 &FE->getFileEntry())) {
284 dlog("{0} doesn't have header guard and will not be considered unused",
285 FE->getName());
286 return false;
288 for (auto &Filter : Cfg.Diagnostics.Includes.IgnoreHeader) {
289 // Convert the path to Unix slashes and try to match against the filter.
290 llvm::SmallString<64> Path(Inc.Resolved);
291 llvm::sys::path::native(Path, llvm::sys::path::Style::posix);
292 if (Filter(Inc.Resolved)) {
293 dlog("{0} header is filtered out by the configuration", FE->getName());
294 return false;
297 return true;
300 // In case symbols are coming from non self-contained header, we need to find
301 // its first includer that is self-contained. This is the header users can
302 // include, so it will be responsible for bringing the symbols from given
303 // header into the scope.
304 FileID headerResponsible(FileID ID, const SourceManager &SM,
305 const IncludeStructure &Includes) {
306 // Unroll the chain of non self-contained headers until we find the one that
307 // can be included.
308 for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
309 FE = SM.getFileEntryForID(ID)) {
310 // If FE is nullptr, we consider it to be the responsible header.
311 if (!FE)
312 break;
313 auto HID = Includes.getID(FE);
314 assert(HID && "We're iterating over headers already existing in "
315 "IncludeStructure");
316 if (Includes.isSelfContained(*HID))
317 break;
318 // The header is not self-contained: put the responsibility for its symbols
319 // on its includer.
320 ID = SM.getFileID(SM.getIncludeLoc(ID));
322 return ID;
325 } // namespace
327 ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP,
328 const syntax::TokenBuffer *Tokens) {
329 trace::Span Tracer("IncludeCleaner::findReferencedLocations");
330 ReferencedLocations Result;
331 const auto &SM = Ctx.getSourceManager();
332 ReferencedLocationCrawler Crawler(Result, SM);
333 Crawler.TraverseAST(Ctx);
334 if (Tokens)
335 findReferencedMacros(SM, PP, Tokens, Result);
336 return Result;
339 ReferencedLocations findReferencedLocations(ParsedAST &AST) {
340 return findReferencedLocations(AST.getASTContext(), AST.getPreprocessor(),
341 &AST.getTokens());
344 ReferencedFiles findReferencedFiles(
345 const ReferencedLocations &Locs, const SourceManager &SM,
346 llvm::function_ref<FileID(FileID)> HeaderResponsible,
347 llvm::function_ref<Optional<StringRef>(FileID)> UmbrellaHeader) {
348 std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
349 llvm::sort(Sorted); // Group by FileID.
350 ReferencedFilesBuilder Builder(SM);
351 for (auto It = Sorted.begin(); It < Sorted.end();) {
352 FileID FID = SM.getFileID(*It);
353 Builder.add(FID, *It);
354 // Cheaply skip over all the other locations from the same FileID.
355 // This avoids lots of redundant Loc->File lookups for the same file.
357 ++It;
358 while (It != Sorted.end() && SM.isInFileID(*It, FID));
361 // If a header is not self-contained, we consider its symbols a logical part
362 // of the including file. Therefore, mark the parents of all used
363 // non-self-contained FileIDs as used. Perform this on FileIDs rather than
364 // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
365 llvm::DenseSet<FileID> UserFiles;
366 llvm::StringSet<> PublicHeaders;
367 for (FileID ID : Builder.Files) {
368 UserFiles.insert(HeaderResponsible(ID));
369 if (auto PublicHeader = UmbrellaHeader(ID)) {
370 PublicHeaders.insert(*PublicHeader);
374 llvm::DenseSet<tooling::stdlib::Header> StdlibFiles;
375 for (const auto &Symbol : Locs.Stdlib)
376 for (const auto &Header : Symbol.headers())
377 StdlibFiles.insert(Header);
379 return {std::move(UserFiles), std::move(StdlibFiles),
380 std::move(PublicHeaders)};
383 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
384 const IncludeStructure &Includes,
385 const CanonicalIncludes &CanonIncludes,
386 const SourceManager &SM) {
387 return findReferencedFiles(
388 Locs, SM,
389 [&SM, &Includes](FileID ID) {
390 return headerResponsible(ID, SM, Includes);
392 [&SM, &CanonIncludes](FileID ID) -> Optional<StringRef> {
393 auto Entry = SM.getFileEntryRefForID(ID);
394 if (!Entry)
395 return llvm::None;
396 auto PublicHeader = CanonIncludes.mapHeader(*Entry);
397 if (PublicHeader.empty())
398 return llvm::None;
399 return PublicHeader;
403 std::vector<const Inclusion *>
404 getUnused(ParsedAST &AST,
405 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
406 const llvm::StringSet<> &ReferencedPublicHeaders) {
407 trace::Span Tracer("IncludeCleaner::getUnused");
408 const Config &Cfg = Config::current();
409 std::vector<const Inclusion *> Unused;
410 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
411 if (!MFI.HeaderID)
412 continue;
413 if (ReferencedPublicHeaders.contains(MFI.Written))
414 continue;
415 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
416 bool Used = ReferencedFiles.contains(IncludeID);
417 if (!Used && !mayConsiderUnused(MFI, AST, Cfg)) {
418 dlog("{0} was not used, but is not eligible to be diagnosed as unused",
419 MFI.Written);
420 continue;
422 if (!Used)
423 Unused.push_back(&MFI);
424 dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED");
426 return Unused;
429 #ifndef NDEBUG
430 // Is FID a <built-in>, <scratch space> etc?
431 static bool isSpecialBuffer(FileID FID, const SourceManager &SM) {
432 const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile();
433 return FI.getName().startswith("<");
435 #endif
437 llvm::DenseSet<IncludeStructure::HeaderID>
438 translateToHeaderIDs(const ReferencedFiles &Files,
439 const IncludeStructure &Includes,
440 const SourceManager &SM) {
441 trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
442 llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
443 TranslatedHeaderIDs.reserve(Files.User.size());
444 for (FileID FID : Files.User) {
445 const FileEntry *FE = SM.getFileEntryForID(FID);
446 if (!FE) {
447 assert(isSpecialBuffer(FID, SM));
448 continue;
450 const auto File = Includes.getID(FE);
451 assert(File);
452 TranslatedHeaderIDs.insert(*File);
454 for (tooling::stdlib::Header StdlibUsed : Files.Stdlib)
455 for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
456 TranslatedHeaderIDs.insert(HID);
457 return TranslatedHeaderIDs;
460 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
461 const auto &SM = AST.getSourceManager();
463 auto Refs = findReferencedLocations(AST);
464 auto ReferencedFiles =
465 findReferencedFiles(Refs, AST.getIncludeStructure(),
466 AST.getCanonicalIncludes(), AST.getSourceManager());
467 auto ReferencedHeaders =
468 translateToHeaderIDs(ReferencedFiles, AST.getIncludeStructure(), SM);
469 return getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas);
472 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
473 llvm::StringRef Code) {
474 const Config &Cfg = Config::current();
475 if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict ||
476 Cfg.Diagnostics.SuppressAll ||
477 Cfg.Diagnostics.Suppress.contains("unused-includes"))
478 return {};
479 trace::Span Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics");
480 std::vector<Diag> Result;
481 std::string FileName =
482 AST.getSourceManager()
483 .getFileEntryRefForID(AST.getSourceManager().getMainFileID())
484 ->getName()
485 .str();
486 for (const auto *Inc : computeUnusedIncludes(AST)) {
487 Diag D;
488 D.Message =
489 llvm::formatv("included header {0} is not used directly",
490 llvm::sys::path::filename(
491 Inc->Written.substr(1, Inc->Written.size() - 2),
492 llvm::sys::path::Style::posix));
493 D.Name = "unused-includes";
494 D.Source = Diag::DiagSource::Clangd;
495 D.File = FileName;
496 D.Severity = DiagnosticsEngine::Warning;
497 D.Tags.push_back(Unnecessary);
498 D.Range = getDiagnosticRange(Code, Inc->HashOffset);
499 // FIXME(kirillbobyrev): Removing inclusion might break the code if the
500 // used headers are only reachable transitively through this one. Suggest
501 // including them directly instead.
502 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
503 // (keep/export) remove the warning once we support IWYU pragmas.
504 D.Fixes.emplace_back();
505 D.Fixes.back().Message = "remove #include directive";
506 D.Fixes.back().Edits.emplace_back();
507 D.Fixes.back().Edits.back().range.start.line = Inc->HashLine;
508 D.Fixes.back().Edits.back().range.end.line = Inc->HashLine + 1;
509 D.InsideMainFile = true;
510 Result.push_back(std::move(D));
512 return Result;
515 } // namespace clangd
516 } // namespace clang