1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "IncludeCleaner.h"
10 #include "Diagnostics.h"
12 #include "ParsedAST.h"
15 #include "SourceCode.h"
16 #include "clang-include-cleaner/Analysis.h"
17 #include "clang-include-cleaner/IncludeSpeller.h"
18 #include "clang-include-cleaner/Record.h"
19 #include "clang-include-cleaner/Types.h"
20 #include "support/Logger.h"
21 #include "support/Path.h"
22 #include "support/Trace.h"
23 #include "clang/AST/ASTContext.h"
24 #include "clang/Basic/Diagnostic.h"
25 #include "clang/Basic/LLVM.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Format/Format.h"
29 #include "clang/Lex/DirectoryLookup.h"
30 #include "clang/Lex/HeaderSearch.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Tooling/Core/Replacement.h"
33 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
34 #include "clang/Tooling/Inclusions/StandardLibrary.h"
35 #include "clang/Tooling/Syntax/Tokens.h"
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseSet.h"
38 #include "llvm/ADT/GenericUniformityImpl.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallString.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/FormatVariadic.h"
46 #include "llvm/Support/Path.h"
47 #include "llvm/Support/Regex.h"
57 namespace clang::clangd
{
60 bool isIgnored(llvm::StringRef HeaderPath
, HeaderFilter IgnoreHeaders
) {
61 // Convert the path to Unix slashes and try to match against the filter.
62 llvm::SmallString
<64> NormalizedPath(HeaderPath
);
63 llvm::sys::path::native(NormalizedPath
, llvm::sys::path::Style::posix
);
64 for (auto &Filter
: IgnoreHeaders
) {
65 if (Filter(NormalizedPath
))
71 bool mayConsiderUnused(const Inclusion
&Inc
, ParsedAST
&AST
,
72 const include_cleaner::PragmaIncludes
*PI
) {
74 auto HID
= static_cast<IncludeStructure::HeaderID
>(*Inc
.HeaderID
);
75 auto FE
= AST
.getSourceManager().getFileManager().getFileRef(
76 AST
.getIncludeStructure().getRealPath(HID
));
78 if (FE
->getDir() == AST
.getPreprocessor()
79 .getHeaderSearchInfo()
83 if (PI
&& PI
->shouldKeep(*FE
))
85 // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
86 // System headers are likely to be standard library headers.
87 // Until we have good support for umbrella headers, don't warn about them.
88 if (Inc
.Written
.front() == '<')
89 return tooling::stdlib::Header::named(Inc
.Written
).has_value();
91 // Check if main file is the public interface for a private header. If so we
92 // shouldn't diagnose it as unused.
93 if (auto PHeader
= PI
->getPublic(*FE
); !PHeader
.empty()) {
94 PHeader
= PHeader
.trim("<>\"");
95 // Since most private -> public mappings happen in a verbatim way, we
96 // check textually here. This might go wrong in presence of symlinks or
97 // header mappings. But that's not different than rest of the places.
98 if (AST
.tuPath().endswith(PHeader
))
102 // Headers without include guards have side effects and are not
103 // self-contained, skip them.
104 if (!AST
.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
106 dlog("{0} doesn't have header guard and will not be considered unused",
113 std::vector
<Diag
> generateMissingIncludeDiagnostics(
114 ParsedAST
&AST
, llvm::ArrayRef
<MissingIncludeDiagInfo
> MissingIncludes
,
115 llvm::StringRef Code
, HeaderFilter IgnoreHeaders
) {
116 std::vector
<Diag
> Result
;
117 const SourceManager
&SM
= AST
.getSourceManager();
118 const FileEntry
*MainFile
= SM
.getFileEntryForID(SM
.getMainFileID());
120 auto FileStyle
= format::getStyle(
121 format::DefaultFormatStyle
, AST
.tuPath(), format::DefaultFallbackStyle
,
122 Code
, &SM
.getFileManager().getVirtualFileSystem());
124 elog("Couldn't infer style", FileStyle
.takeError());
125 FileStyle
= format::getLLVMStyle();
128 tooling::HeaderIncludes
HeaderIncludes(AST
.tuPath(), Code
,
129 FileStyle
->IncludeStyle
);
130 for (const auto &SymbolWithMissingInclude
: MissingIncludes
) {
131 llvm::StringRef ResolvedPath
=
132 SymbolWithMissingInclude
.Providers
.front().resolvedPath();
133 if (isIgnored(ResolvedPath
, IgnoreHeaders
)) {
134 dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by "
140 std::string Spelling
= include_cleaner::spellHeader(
141 {SymbolWithMissingInclude
.Providers
.front(),
142 AST
.getPreprocessor().getHeaderSearchInfo(), MainFile
});
144 llvm::StringRef HeaderRef
{Spelling
};
145 bool Angled
= HeaderRef
.starts_with("<");
146 // We might suggest insertion of an existing include in edge cases, e.g.,
147 // include is present in a PP-disabled region, or spelling of the header
148 // turns out to be the same as one of the unresolved includes in the
150 std::optional
<tooling::Replacement
> Replacement
= HeaderIncludes
.insert(
151 HeaderRef
.trim("\"<>"), Angled
, tooling::IncludeDirective::Include
);
152 if (!Replacement
.has_value())
155 Diag
&D
= Result
.emplace_back();
157 llvm::formatv("No header providing \"{0}\" is directly included",
158 SymbolWithMissingInclude
.Symbol
.name());
159 D
.Name
= "missing-includes";
160 D
.Source
= Diag::DiagSource::Clangd
;
161 D
.File
= AST
.tuPath();
162 D
.InsideMainFile
= true;
163 // We avoid the "warning" severity here in favor of LSP's "information".
165 // Users treat most warnings on code being edited as high-priority.
166 // They don't think of include cleanups the same way: they want to edit
167 // lines with existing violations without fixing them.
168 // Diagnostics at the same level tend to be visually indistinguishable,
169 // and a few missing includes can cause many diagnostics.
170 // Marking these as "information" leaves them visible, but less intrusive.
172 // (These concerns don't apply to unused #include warnings: these are fewer,
173 // they appear on infrequently-edited lines with few other warnings, and
174 // the 'Unneccesary' tag often result in a different rendering)
176 // Usually clang's "note" severity usually has special semantics, being
177 // translated into LSP RelatedInformation of a parent diagnostic.
178 // But not here: these aren't processed by clangd's DiagnosticConsumer.
179 D
.Severity
= DiagnosticsEngine::Note
;
180 D
.Range
= clangd::Range
{
181 offsetToPosition(Code
,
182 SymbolWithMissingInclude
.SymRefRange
.beginOffset()),
183 offsetToPosition(Code
,
184 SymbolWithMissingInclude
.SymRefRange
.endOffset())};
185 auto &F
= D
.Fixes
.emplace_back();
186 F
.Message
= "#include " + Spelling
;
187 TextEdit Edit
= replacementToEdit(Code
, *Replacement
);
188 F
.Edits
.emplace_back(std::move(Edit
));
193 std::vector
<Diag
> generateUnusedIncludeDiagnostics(
194 PathRef FileName
, llvm::ArrayRef
<const Inclusion
*> UnusedIncludes
,
195 llvm::StringRef Code
, HeaderFilter IgnoreHeaders
) {
196 std::vector
<Diag
> Result
;
197 for (const auto *Inc
: UnusedIncludes
) {
198 if (isIgnored(Inc
->Resolved
, IgnoreHeaders
))
200 Diag
&D
= Result
.emplace_back();
202 llvm::formatv("included header {0} is not used directly",
203 llvm::sys::path::filename(
204 Inc
->Written
.substr(1, Inc
->Written
.size() - 2),
205 llvm::sys::path::Style::posix
));
206 D
.Name
= "unused-includes";
207 D
.Source
= Diag::DiagSource::Clangd
;
209 D
.InsideMainFile
= true;
210 D
.Severity
= DiagnosticsEngine::Warning
;
211 D
.Tags
.push_back(Unnecessary
);
212 D
.Range
= rangeTillEOL(Code
, Inc
->HashOffset
);
213 // FIXME(kirillbobyrev): Removing inclusion might break the code if the
214 // used headers are only reachable transitively through this one. Suggest
215 // including them directly instead.
216 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
217 // (keep/export) remove the warning once we support IWYU pragmas.
218 auto &F
= D
.Fixes
.emplace_back();
219 F
.Message
= "remove #include directive";
220 F
.Edits
.emplace_back();
221 F
.Edits
.back().range
.start
.line
= Inc
->HashLine
;
222 F
.Edits
.back().range
.end
.line
= Inc
->HashLine
+ 1;
228 removeAllUnusedIncludes(llvm::ArrayRef
<Diag
> UnusedIncludes
) {
229 if (UnusedIncludes
.empty())
233 RemoveAll
.Message
= "remove all unused includes";
234 for (const auto &Diag
: UnusedIncludes
) {
235 assert(Diag
.Fixes
.size() == 1 && "Expected exactly one fix.");
236 RemoveAll
.Edits
.insert(RemoveAll
.Edits
.end(),
237 Diag
.Fixes
.front().Edits
.begin(),
238 Diag
.Fixes
.front().Edits
.end());
241 // TODO(hokein): emit a suitable text for the label.
242 ChangeAnnotation Annotation
= {/*label=*/"",
243 /*needsConfirmation=*/true,
245 static const ChangeAnnotationIdentifier RemoveAllUnusedID
=
246 "RemoveAllUnusedIncludes";
247 for (unsigned I
= 0; I
< RemoveAll
.Edits
.size(); ++I
) {
248 ChangeAnnotationIdentifier ID
= RemoveAllUnusedID
+ std::to_string(I
);
249 RemoveAll
.Edits
[I
].annotationId
= ID
;
250 RemoveAll
.Annotations
.push_back({ID
, Annotation
});
256 addAllMissingIncludes(llvm::ArrayRef
<Diag
> MissingIncludeDiags
) {
257 if (MissingIncludeDiags
.empty())
261 AddAllMissing
.Message
= "add all missing includes";
262 // A map to deduplicate the edits with the same new text.
263 // newText (#include "my_missing_header.h") -> TextEdit.
264 std::map
<std::string
, TextEdit
> Edits
;
265 for (const auto &Diag
: MissingIncludeDiags
) {
266 assert(Diag
.Fixes
.size() == 1 && "Expected exactly one fix.");
267 for (const auto &Edit
: Diag
.Fixes
.front().Edits
) {
268 Edits
.try_emplace(Edit
.newText
, Edit
);
271 // FIXME(hokein): emit used symbol reference in the annotation.
272 ChangeAnnotation Annotation
= {/*label=*/"",
273 /*needsConfirmation=*/true,
275 static const ChangeAnnotationIdentifier AddAllMissingID
=
276 "AddAllMissingIncludes";
278 for (auto &It
: Edits
) {
279 ChangeAnnotationIdentifier ID
= AddAllMissingID
+ std::to_string(I
++);
280 AddAllMissing
.Edits
.push_back(std::move(It
.second
));
281 AddAllMissing
.Edits
.back().annotationId
= ID
;
283 AddAllMissing
.Annotations
.push_back({ID
, Annotation
});
285 return AddAllMissing
;
287 Fix
fixAll(const Fix
&RemoveAllUnused
, const Fix
&AddAllMissing
) {
289 FixAll
.Message
= "fix all includes";
291 for (const auto &F
: RemoveAllUnused
.Edits
)
292 FixAll
.Edits
.push_back(F
);
293 for (const auto &F
: AddAllMissing
.Edits
)
294 FixAll
.Edits
.push_back(F
);
296 for (const auto &A
: RemoveAllUnused
.Annotations
)
297 FixAll
.Annotations
.push_back(A
);
298 for (const auto &A
: AddAllMissing
.Annotations
)
299 FixAll
.Annotations
.push_back(A
);
303 std::vector
<const Inclusion
*>
304 getUnused(ParsedAST
&AST
,
305 const llvm::DenseSet
<IncludeStructure::HeaderID
> &ReferencedFiles
) {
306 trace::Span
Tracer("IncludeCleaner::getUnused");
307 std::vector
<const Inclusion
*> Unused
;
308 for (const Inclusion
&MFI
: AST
.getIncludeStructure().MainFileIncludes
) {
311 auto IncludeID
= static_cast<IncludeStructure::HeaderID
>(*MFI
.HeaderID
);
312 if (ReferencedFiles
.contains(IncludeID
))
314 if (!mayConsiderUnused(MFI
, AST
, AST
.getPragmaIncludes().get())) {
315 dlog("{0} was not used, but is not eligible to be diagnosed as unused",
319 Unused
.push_back(&MFI
);
326 std::vector
<include_cleaner::SymbolReference
>
327 collectMacroReferences(ParsedAST
&AST
) {
328 const auto &SM
= AST
.getSourceManager();
329 auto &PP
= AST
.getPreprocessor();
330 std::vector
<include_cleaner::SymbolReference
> Macros
;
331 for (const auto &[_
, Refs
] : AST
.getMacros().MacroRefs
) {
332 for (const auto &Ref
: Refs
) {
333 auto Loc
= SM
.getComposedLoc(SM
.getMainFileID(), Ref
.StartOffset
);
334 const auto *Tok
= AST
.getTokens().spelledTokenAt(Loc
);
337 auto Macro
= locateMacroAt(*Tok
, PP
);
340 auto DefLoc
= Macro
->NameLoc
;
341 if (!DefLoc
.isValid())
344 {include_cleaner::Macro
{/*Name=*/PP
.getIdentifierInfo(Tok
->text(SM
)),
347 Ref
.InConditionalDirective
? include_cleaner::RefType::Ambiguous
348 : include_cleaner::RefType::Explicit
});
355 include_cleaner::Includes
convertIncludes(const ParsedAST
&AST
) {
356 auto &SM
= AST
.getSourceManager();
358 include_cleaner::Includes ConvertedIncludes
;
359 // We satisfy Includes's contract that search dirs and included files have
360 // matching path styles: both ultimately use FileManager::getCanonicalName().
361 for (const auto &Dir
: AST
.getIncludeStructure().SearchPathsCanonical
)
362 ConvertedIncludes
.addSearchDirectory(Dir
);
364 for (const Inclusion
&Inc
: AST
.getIncludeStructure().MainFileIncludes
) {
365 include_cleaner::Include TransformedInc
;
366 llvm::StringRef WrittenRef
= llvm::StringRef(Inc
.Written
);
367 TransformedInc
.Spelled
= WrittenRef
.trim("\"<>");
368 TransformedInc
.HashLocation
=
369 SM
.getComposedLoc(SM
.getMainFileID(), Inc
.HashOffset
);
370 TransformedInc
.Line
= Inc
.HashLine
+ 1;
371 TransformedInc
.Angled
= WrittenRef
.starts_with("<");
372 // Inc.Resolved is canonicalized with clangd::getCanonicalPath(),
373 // which is based on FileManager::getCanonicalName(ParentDir).
374 auto FE
= SM
.getFileManager().getFileRef(Inc
.Resolved
);
376 elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
377 Inc
.Resolved
, FE
.takeError());
380 TransformedInc
.Resolved
= *FE
;
381 ConvertedIncludes
.add(std::move(TransformedInc
));
383 return ConvertedIncludes
;
386 IncludeCleanerFindings
computeIncludeCleanerFindings(ParsedAST
&AST
) {
387 // Interaction is only polished for C/CPP.
388 if (AST
.getLangOpts().ObjC
)
390 const auto &SM
= AST
.getSourceManager();
391 include_cleaner::Includes ConvertedIncludes
= convertIncludes(AST
);
392 const FileEntry
*MainFile
= SM
.getFileEntryForID(SM
.getMainFileID());
393 auto PreamblePatch
= PreamblePatch::getPatchEntry(AST
.tuPath(), SM
);
395 std::vector
<include_cleaner::SymbolReference
> Macros
=
396 collectMacroReferences(AST
);
397 std::vector
<MissingIncludeDiagInfo
> MissingIncludes
;
398 llvm::DenseSet
<IncludeStructure::HeaderID
> Used
;
399 trace::Span
Tracer("include_cleaner::walkUsed");
400 const DirectoryEntry
*ResourceDir
= AST
.getPreprocessor()
401 .getHeaderSearchInfo()
404 include_cleaner::walkUsed(
405 AST
.getLocalTopLevelDecls(), /*MacroRefs=*/Macros
,
406 AST
.getPragmaIncludes().get(), AST
.getPreprocessor(),
407 [&](const include_cleaner::SymbolReference
&Ref
,
408 llvm::ArrayRef
<include_cleaner::Header
> Providers
) {
409 bool Satisfied
= false;
410 for (const auto &H
: Providers
) {
411 if (H
.kind() == include_cleaner::Header::Physical
&&
412 (H
.physical() == MainFile
|| H
.physical() == PreamblePatch
||
413 H
.physical().getDir() == ResourceDir
)) {
417 for (auto *Inc
: ConvertedIncludes
.match(H
)) {
420 AST
.getIncludeStructure().getID(&Inc
->Resolved
->getFileEntry());
421 assert(HeaderID
.has_value() &&
422 "ConvertedIncludes only contains resolved includes.");
423 Used
.insert(*HeaderID
);
427 if (Satisfied
|| Providers
.empty() ||
428 Ref
.RT
!= include_cleaner::RefType::Explicit
)
431 // We actually always want to map usages to their spellings, but
432 // spelling locations can point into preamble section. Using these
433 // offsets could lead into crashes in presence of stale preambles. Hence
434 // we use "getFileLoc" instead to make sure it always points into main
436 // FIXME: Use presumed locations to map such usages back to patched
438 auto Loc
= SM
.getFileLoc(Ref
.RefLocation
);
439 // File locations can be outside of the main file if macro is expanded
440 // through an #include.
441 while (SM
.getFileID(Loc
) != SM
.getMainFileID())
442 Loc
= SM
.getIncludeLoc(SM
.getFileID(Loc
));
443 auto TouchingTokens
=
444 syntax::spelledTokensTouching(Loc
, AST
.getTokens());
445 assert(!TouchingTokens
.empty());
446 // Loc points to the start offset of the ref token, here we use the last
447 // element of the TouchingTokens, e.g. avoid getting the "::" for
449 MissingIncludeDiagInfo DiagInfo
{
450 Ref
.Target
, TouchingTokens
.back().range(SM
), Providers
};
451 MissingIncludes
.push_back(std::move(DiagInfo
));
453 // Put possibly equal diagnostics together for deduplication.
454 // The duplicates might be from macro arguments that get expanded multiple
456 llvm::stable_sort(MissingIncludes
, [](const MissingIncludeDiagInfo
&LHS
,
457 const MissingIncludeDiagInfo
&RHS
) {
458 // First sort by reference location.
459 if (LHS
.SymRefRange
!= RHS
.SymRefRange
) {
460 // We can get away just by comparing the offsets as all the ranges are in
462 return LHS
.SymRefRange
.beginOffset() < RHS
.SymRefRange
.beginOffset();
464 // For the same location, break ties using the symbol. Note that this won't
465 // be stable across runs.
466 using MapInfo
= llvm::DenseMapInfo
<include_cleaner::Symbol
>;
467 return MapInfo::getHashValue(LHS
.Symbol
) <
468 MapInfo::getHashValue(RHS
.Symbol
);
470 MissingIncludes
.erase(llvm::unique(MissingIncludes
), MissingIncludes
.end());
471 std::vector
<const Inclusion
*> UnusedIncludes
= getUnused(AST
, Used
);
472 return {std::move(UnusedIncludes
), std::move(MissingIncludes
)};
475 bool isPreferredProvider(const Inclusion
&Inc
,
476 const include_cleaner::Includes
&Includes
,
477 llvm::ArrayRef
<include_cleaner::Header
> Providers
) {
478 for (const auto &H
: Providers
) {
479 auto Matches
= Includes
.match(H
);
480 for (const include_cleaner::Include
*Match
: Matches
)
481 if (Match
->Line
== unsigned(Inc
.HashLine
+ 1))
482 return true; // this header is (equal) best
483 if (!Matches
.empty())
484 return false; // another header is better
486 return false; // no header provides the symbol
490 issueIncludeCleanerDiagnostics(ParsedAST
&AST
, llvm::StringRef Code
,
491 const IncludeCleanerFindings
&Findings
,
492 HeaderFilter IgnoreHeaders
) {
493 trace::Span
Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics");
494 std::vector
<Diag
> UnusedIncludes
= generateUnusedIncludeDiagnostics(
495 AST
.tuPath(), Findings
.UnusedIncludes
, Code
, IgnoreHeaders
);
496 std::optional
<Fix
> RemoveAllUnused
= removeAllUnusedIncludes(UnusedIncludes
);
498 std::vector
<Diag
> MissingIncludeDiags
= generateMissingIncludeDiagnostics(
499 AST
, Findings
.MissingIncludes
, Code
, IgnoreHeaders
);
500 std::optional
<Fix
> AddAllMissing
= addAllMissingIncludes(MissingIncludeDiags
);
502 std::optional
<Fix
> FixAll
;
503 if (RemoveAllUnused
&& AddAllMissing
)
504 FixAll
= fixAll(*RemoveAllUnused
, *AddAllMissing
);
506 auto AddBatchFix
= [](const std::optional
<Fix
> &F
, clang::clangd::Diag
*Out
) {
509 Out
->Fixes
.push_back(*F
);
511 for (auto &Diag
: MissingIncludeDiags
) {
512 AddBatchFix(MissingIncludeDiags
.size() > 1 ? AddAllMissing
: std::nullopt
,
514 AddBatchFix(FixAll
, &Diag
);
516 for (auto &Diag
: UnusedIncludes
) {
517 AddBatchFix(UnusedIncludes
.size() > 1 ? RemoveAllUnused
: std::nullopt
,
519 AddBatchFix(FixAll
, &Diag
);
522 auto Result
= std::move(MissingIncludeDiags
);
523 llvm::move(UnusedIncludes
, std::back_inserter(Result
));
527 } // namespace clang::clangd