1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "IncludeCleaner.h"
12 #include "ParsedAST.h"
14 #include "SourceCode.h"
15 #include "index/CanonicalIncludes.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/ExprCXX.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/STLFunctionalExtras.h"
28 #include "llvm/ADT/SmallString.h"
29 #include "llvm/ADT/StringSet.h"
30 #include "llvm/Support/FormatVariadic.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/Regex.h"
38 static bool AnalyzeStdlib
= false;
39 void setIncludeCleanerAnalyzesStdlib(bool B
) { AnalyzeStdlib
= B
; }
43 /// Crawler traverses the AST and feeds in the locations of (sometimes
44 /// implicitly) used symbols into \p Result.
45 class ReferencedLocationCrawler
46 : public RecursiveASTVisitor
<ReferencedLocationCrawler
> {
48 ReferencedLocationCrawler(ReferencedLocations
&Result
,
49 const SourceManager
&SM
)
50 : Result(Result
), SM(SM
) {}
52 bool VisitDeclRefExpr(DeclRefExpr
*DRE
) {
54 add(DRE
->getFoundDecl());
58 bool VisitMemberExpr(MemberExpr
*ME
) {
59 add(ME
->getMemberDecl());
60 add(ME
->getFoundDecl().getDecl());
64 bool VisitTagType(TagType
*TT
) {
69 bool VisitFunctionDecl(FunctionDecl
*FD
) {
70 // Function definition will require redeclarations to be included.
71 if (FD
->isThisDeclarationADefinition())
76 bool VisitCXXConstructExpr(CXXConstructExpr
*CCE
) {
77 add(CCE
->getConstructor());
81 bool VisitTemplateSpecializationType(TemplateSpecializationType
*TST
) {
82 // Using templateName case is handled by the override TraverseTemplateName.
83 if (TST
->getTemplateName().getKind() == TemplateName::UsingTemplate
)
85 add(TST
->getAsCXXRecordDecl()); // Specialization
89 // There is no VisitTemplateName in RAV, thus we override the Traverse version
90 // to handle the Using TemplateName case.
91 bool TraverseTemplateName(TemplateName TN
) {
92 VisitTemplateName(TN
);
93 return Base::TraverseTemplateName(TN
);
95 // A pseudo VisitTemplateName, dispatched by the above TraverseTemplateName!
96 bool VisitTemplateName(TemplateName TN
) {
97 if (const auto *USD
= TN
.getAsUsingShadowDecl()) {
101 add(TN
.getAsTemplateDecl()); // Primary template.
105 bool VisitUsingType(UsingType
*UT
) {
106 add(UT
->getFoundDecl());
110 bool VisitTypedefType(TypedefType
*TT
) {
115 // Consider types of any subexpression used, even if the type is not named.
116 // This is helpful in getFoo().bar(), where Foo must be complete.
117 // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to
118 // consider types "used" when they are not directly spelled in code.
119 bool VisitExpr(Expr
*E
) {
120 TraverseType(E
->getType());
124 bool TraverseType(QualType T
) {
125 if (isNew(T
.getTypePtrOrNull())) // don't care about quals
126 Base::TraverseType(T
);
130 bool VisitUsingDecl(UsingDecl
*D
) {
131 for (const auto *Shadow
: D
->shadows())
132 add(Shadow
->getTargetDecl());
136 // Enums may be usefully forward-declared as *complete* types by specifying
137 // an underlying type. In this case, the definition should see the declaration
138 // so they can be checked for compatibility.
139 bool VisitEnumDecl(EnumDecl
*D
) {
140 if (D
->isThisDeclarationADefinition() && D
->getIntegerTypeSourceInfo())
145 // When the overload is not resolved yet, mark all candidates as used.
146 bool VisitOverloadExpr(OverloadExpr
*E
) {
147 for (const auto *ResolutionDecl
: E
->decls())
153 using Base
= RecursiveASTVisitor
<ReferencedLocationCrawler
>;
155 void add(const Decl
*D
) {
156 if (!D
|| !isNew(D
->getCanonicalDecl()))
158 if (auto SS
= StdRecognizer(D
)) {
159 Result
.Stdlib
.insert(*SS
);
162 // Special case RecordDecls, as it is common for them to be forward
163 // declared multiple times. The most common cases are:
164 // - Definition available in TU, only mark that one as usage. The rest is
165 // likely to be unnecessary. This might result in false positives when an
166 // internal definition is visible.
167 // - There's a forward declaration in the main file, no need for other
169 if (const auto *RD
= llvm::dyn_cast
<RecordDecl
>(D
)) {
170 if (const auto *Definition
= RD
->getDefinition()) {
171 Result
.User
.insert(Definition
->getLocation());
174 if (SM
.isInMainFile(RD
->getMostRecentDecl()->getLocation()))
177 for (const Decl
*Redecl
: D
->redecls())
178 Result
.User
.insert(Redecl
->getLocation());
181 bool isNew(const void *P
) { return P
&& Visited
.insert(P
).second
; }
183 ReferencedLocations
&Result
;
184 llvm::DenseSet
<const void *> Visited
;
185 const SourceManager
&SM
;
186 tooling::stdlib::Recognizer StdRecognizer
;
189 // Given a set of referenced FileIDs, determines all the potentially-referenced
190 // files and macros by traversing expansion/spelling locations of macro IDs.
191 // This is used to map the referenced SourceLocations onto real files.
192 struct ReferencedFilesBuilder
{
193 ReferencedFilesBuilder(const SourceManager
&SM
) : SM(SM
) {}
194 llvm::DenseSet
<FileID
> Files
;
195 llvm::DenseSet
<FileID
> Macros
;
196 const SourceManager
&SM
;
198 void add(SourceLocation Loc
) { add(SM
.getFileID(Loc
), Loc
); }
200 void add(FileID FID
, SourceLocation Loc
) {
203 assert(SM
.isInFileID(Loc
, FID
));
204 if (Loc
.isFileID()) {
208 // Don't process the same macro FID twice.
209 if (!Macros
.insert(FID
).second
)
211 const auto &Exp
= SM
.getSLocEntry(FID
).getExpansion();
212 add(Exp
.getSpellingLoc());
213 add(Exp
.getExpansionLocStart());
214 add(Exp
.getExpansionLocEnd());
218 // Returns the range starting at '#' and ending at EOL. Escaped newlines are not
220 clangd::Range
getDiagnosticRange(llvm::StringRef Code
, unsigned HashOffset
) {
221 clangd::Range Result
;
222 Result
.end
= Result
.start
= offsetToPosition(Code
, HashOffset
);
224 // Span the warning until the EOL or EOF.
225 Result
.end
.character
+=
226 lspLength(Code
.drop_front(HashOffset
).take_until([](char C
) {
227 return C
== '\n' || C
== '\r';
232 // Finds locations of macros referenced from within the main file. That includes
233 // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`.
234 void findReferencedMacros(const SourceManager
&SM
, Preprocessor
&PP
,
235 const syntax::TokenBuffer
*Tokens
,
236 ReferencedLocations
&Result
) {
237 trace::Span
Tracer("IncludeCleaner::findReferencedMacros");
238 // FIXME(kirillbobyrev): The macros from the main file are collected in
239 // ParsedAST's MainFileMacros. However, we can't use it here because it
240 // doesn't handle macro references that were not expanded, e.g. in macro
241 // definitions or preprocessor-disabled sections.
243 // Extending MainFileMacros to collect missing references and switching to
244 // this mechanism (as opposed to iterating through all tokens) will improve
245 // the performance of findReferencedMacros and also improve other features
246 // relying on MainFileMacros.
247 for (const syntax::Token
&Tok
: Tokens
->spelledTokens(SM
.getMainFileID())) {
248 auto Macro
= locateMacroAt(Tok
, PP
);
251 auto Loc
= Macro
->Info
->getDefinitionLoc();
253 Result
.User
.insert(Loc
);
254 // FIXME: support stdlib macros
258 static bool mayConsiderUnused(const Inclusion
&Inc
, ParsedAST
&AST
,
260 if (Inc
.BehindPragmaKeep
)
263 // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
264 // System headers are likely to be standard library headers.
265 // Until we have good support for umbrella headers, don't warn about them.
266 if (Inc
.Written
.front() == '<') {
267 if (AnalyzeStdlib
&& tooling::stdlib::Header::named(Inc
.Written
))
271 assert(Inc
.HeaderID
);
272 auto HID
= static_cast<IncludeStructure::HeaderID
>(*Inc
.HeaderID
);
273 // FIXME: Ignore the headers with IWYU export pragmas for now, remove this
274 // check when we have more precise tracking of exported headers.
275 if (AST
.getIncludeStructure().hasIWYUExport(HID
))
277 auto FE
= AST
.getSourceManager().getFileManager().getFileRef(
278 AST
.getIncludeStructure().getRealPath(HID
));
280 // Headers without include guards have side effects and are not
281 // self-contained, skip them.
282 if (!AST
.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
283 &FE
->getFileEntry())) {
284 dlog("{0} doesn't have header guard and will not be considered unused",
288 for (auto &Filter
: Cfg
.Diagnostics
.Includes
.IgnoreHeader
) {
289 // Convert the path to Unix slashes and try to match against the filter.
290 llvm::SmallString
<64> Path(Inc
.Resolved
);
291 llvm::sys::path::native(Path
, llvm::sys::path::Style::posix
);
292 if (Filter(Inc
.Resolved
)) {
293 dlog("{0} header is filtered out by the configuration", FE
->getName());
300 // In case symbols are coming from non self-contained header, we need to find
301 // its first includer that is self-contained. This is the header users can
302 // include, so it will be responsible for bringing the symbols from given
303 // header into the scope.
304 FileID
headerResponsible(FileID ID
, const SourceManager
&SM
,
305 const IncludeStructure
&Includes
) {
306 // Unroll the chain of non self-contained headers until we find the one that
308 for (const FileEntry
*FE
= SM
.getFileEntryForID(ID
); ID
!= SM
.getMainFileID();
309 FE
= SM
.getFileEntryForID(ID
)) {
310 // If FE is nullptr, we consider it to be the responsible header.
313 auto HID
= Includes
.getID(FE
);
314 assert(HID
&& "We're iterating over headers already existing in "
316 if (Includes
.isSelfContained(*HID
))
318 // The header is not self-contained: put the responsibility for its symbols
320 ID
= SM
.getFileID(SM
.getIncludeLoc(ID
));
327 ReferencedLocations
findReferencedLocations(ASTContext
&Ctx
, Preprocessor
&PP
,
328 const syntax::TokenBuffer
*Tokens
) {
329 trace::Span
Tracer("IncludeCleaner::findReferencedLocations");
330 ReferencedLocations Result
;
331 const auto &SM
= Ctx
.getSourceManager();
332 ReferencedLocationCrawler
Crawler(Result
, SM
);
333 Crawler
.TraverseAST(Ctx
);
335 findReferencedMacros(SM
, PP
, Tokens
, Result
);
339 ReferencedLocations
findReferencedLocations(ParsedAST
&AST
) {
340 return findReferencedLocations(AST
.getASTContext(), AST
.getPreprocessor(),
344 ReferencedFiles
findReferencedFiles(
345 const ReferencedLocations
&Locs
, const SourceManager
&SM
,
346 llvm::function_ref
<FileID(FileID
)> HeaderResponsible
,
347 llvm::function_ref
<Optional
<StringRef
>(FileID
)> UmbrellaHeader
) {
348 std::vector
<SourceLocation
> Sorted
{Locs
.User
.begin(), Locs
.User
.end()};
349 llvm::sort(Sorted
); // Group by FileID.
350 ReferencedFilesBuilder
Builder(SM
);
351 for (auto It
= Sorted
.begin(); It
< Sorted
.end();) {
352 FileID FID
= SM
.getFileID(*It
);
353 Builder
.add(FID
, *It
);
354 // Cheaply skip over all the other locations from the same FileID.
355 // This avoids lots of redundant Loc->File lookups for the same file.
358 while (It
!= Sorted
.end() && SM
.isInFileID(*It
, FID
));
361 // If a header is not self-contained, we consider its symbols a logical part
362 // of the including file. Therefore, mark the parents of all used
363 // non-self-contained FileIDs as used. Perform this on FileIDs rather than
364 // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
365 llvm::DenseSet
<FileID
> UserFiles
;
366 llvm::StringSet
<> PublicHeaders
;
367 for (FileID ID
: Builder
.Files
) {
368 UserFiles
.insert(HeaderResponsible(ID
));
369 if (auto PublicHeader
= UmbrellaHeader(ID
)) {
370 PublicHeaders
.insert(*PublicHeader
);
374 llvm::DenseSet
<tooling::stdlib::Header
> StdlibFiles
;
375 for (const auto &Symbol
: Locs
.Stdlib
)
376 for (const auto &Header
: Symbol
.headers())
377 StdlibFiles
.insert(Header
);
379 return {std::move(UserFiles
), std::move(StdlibFiles
),
380 std::move(PublicHeaders
)};
383 ReferencedFiles
findReferencedFiles(const ReferencedLocations
&Locs
,
384 const IncludeStructure
&Includes
,
385 const CanonicalIncludes
&CanonIncludes
,
386 const SourceManager
&SM
) {
387 return findReferencedFiles(
389 [&SM
, &Includes
](FileID ID
) {
390 return headerResponsible(ID
, SM
, Includes
);
392 [&SM
, &CanonIncludes
](FileID ID
) -> Optional
<StringRef
> {
393 auto Entry
= SM
.getFileEntryRefForID(ID
);
396 auto PublicHeader
= CanonIncludes
.mapHeader(*Entry
);
397 if (PublicHeader
.empty())
403 std::vector
<const Inclusion
*>
404 getUnused(ParsedAST
&AST
,
405 const llvm::DenseSet
<IncludeStructure::HeaderID
> &ReferencedFiles
,
406 const llvm::StringSet
<> &ReferencedPublicHeaders
) {
407 trace::Span
Tracer("IncludeCleaner::getUnused");
408 const Config
&Cfg
= Config::current();
409 std::vector
<const Inclusion
*> Unused
;
410 for (const Inclusion
&MFI
: AST
.getIncludeStructure().MainFileIncludes
) {
413 if (ReferencedPublicHeaders
.contains(MFI
.Written
))
415 auto IncludeID
= static_cast<IncludeStructure::HeaderID
>(*MFI
.HeaderID
);
416 bool Used
= ReferencedFiles
.contains(IncludeID
);
417 if (!Used
&& !mayConsiderUnused(MFI
, AST
, Cfg
)) {
418 dlog("{0} was not used, but is not eligible to be diagnosed as unused",
423 Unused
.push_back(&MFI
);
424 dlog("{0} is {1}", MFI
.Written
, Used
? "USED" : "UNUSED");
430 // Is FID a <built-in>, <scratch space> etc?
431 static bool isSpecialBuffer(FileID FID
, const SourceManager
&SM
) {
432 const SrcMgr::FileInfo
&FI
= SM
.getSLocEntry(FID
).getFile();
433 return FI
.getName().startswith("<");
437 llvm::DenseSet
<IncludeStructure::HeaderID
>
438 translateToHeaderIDs(const ReferencedFiles
&Files
,
439 const IncludeStructure
&Includes
,
440 const SourceManager
&SM
) {
441 trace::Span
Tracer("IncludeCleaner::translateToHeaderIDs");
442 llvm::DenseSet
<IncludeStructure::HeaderID
> TranslatedHeaderIDs
;
443 TranslatedHeaderIDs
.reserve(Files
.User
.size());
444 for (FileID FID
: Files
.User
) {
445 const FileEntry
*FE
= SM
.getFileEntryForID(FID
);
447 assert(isSpecialBuffer(FID
, SM
));
450 const auto File
= Includes
.getID(FE
);
452 TranslatedHeaderIDs
.insert(*File
);
454 for (tooling::stdlib::Header StdlibUsed
: Files
.Stdlib
)
455 for (auto HID
: Includes
.StdlibHeaders
.lookup(StdlibUsed
))
456 TranslatedHeaderIDs
.insert(HID
);
457 return TranslatedHeaderIDs
;
460 std::vector
<const Inclusion
*> computeUnusedIncludes(ParsedAST
&AST
) {
461 const auto &SM
= AST
.getSourceManager();
463 auto Refs
= findReferencedLocations(AST
);
464 auto ReferencedFiles
=
465 findReferencedFiles(Refs
, AST
.getIncludeStructure(),
466 AST
.getCanonicalIncludes(), AST
.getSourceManager());
467 auto ReferencedHeaders
=
468 translateToHeaderIDs(ReferencedFiles
, AST
.getIncludeStructure(), SM
);
469 return getUnused(AST
, ReferencedHeaders
, ReferencedFiles
.SpelledUmbrellas
);
472 std::vector
<Diag
> issueUnusedIncludesDiagnostics(ParsedAST
&AST
,
473 llvm::StringRef Code
) {
474 const Config
&Cfg
= Config::current();
475 if (Cfg
.Diagnostics
.UnusedIncludes
!= Config::UnusedIncludesPolicy::Strict
||
476 Cfg
.Diagnostics
.SuppressAll
||
477 Cfg
.Diagnostics
.Suppress
.contains("unused-includes"))
479 trace::Span
Tracer("IncludeCleaner::issueUnusedIncludesDiagnostics");
480 std::vector
<Diag
> Result
;
481 std::string FileName
=
482 AST
.getSourceManager()
483 .getFileEntryRefForID(AST
.getSourceManager().getMainFileID())
486 for (const auto *Inc
: computeUnusedIncludes(AST
)) {
489 llvm::formatv("included header {0} is not used directly",
490 llvm::sys::path::filename(
491 Inc
->Written
.substr(1, Inc
->Written
.size() - 2),
492 llvm::sys::path::Style::posix
));
493 D
.Name
= "unused-includes";
494 D
.Source
= Diag::DiagSource::Clangd
;
496 D
.Severity
= DiagnosticsEngine::Warning
;
497 D
.Tags
.push_back(Unnecessary
);
498 D
.Range
= getDiagnosticRange(Code
, Inc
->HashOffset
);
499 // FIXME(kirillbobyrev): Removing inclusion might break the code if the
500 // used headers are only reachable transitively through this one. Suggest
501 // including them directly instead.
502 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
503 // (keep/export) remove the warning once we support IWYU pragmas.
504 D
.Fixes
.emplace_back();
505 D
.Fixes
.back().Message
= "remove #include directive";
506 D
.Fixes
.back().Edits
.emplace_back();
507 D
.Fixes
.back().Edits
.back().range
.start
.line
= Inc
->HashLine
;
508 D
.Fixes
.back().Edits
.back().range
.end
.line
= Inc
->HashLine
+ 1;
509 D
.InsideMainFile
= true;
510 Result
.push_back(std::move(D
));
515 } // namespace clangd