Bump version to 19.1.0 (final)
[llvm-project.git] / clang-tools-extra / clangd / IncludeCleaner.cpp
blobe34706172f0bf6a48271fd78e1e93aef17fc7904
1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "IncludeCleaner.h"
10 #include "Diagnostics.h"
11 #include "Headers.h"
12 #include "ParsedAST.h"
13 #include "Preamble.h"
14 #include "Protocol.h"
15 #include "SourceCode.h"
16 #include "clang-include-cleaner/Analysis.h"
17 #include "clang-include-cleaner/IncludeSpeller.h"
18 #include "clang-include-cleaner/Record.h"
19 #include "clang-include-cleaner/Types.h"
20 #include "support/Logger.h"
21 #include "support/Path.h"
22 #include "support/Trace.h"
23 #include "clang/AST/ASTContext.h"
24 #include "clang/Basic/Diagnostic.h"
25 #include "clang/Basic/LLVM.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Format/Format.h"
29 #include "clang/Lex/DirectoryLookup.h"
30 #include "clang/Lex/HeaderSearch.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Tooling/Core/Replacement.h"
33 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
34 #include "clang/Tooling/Inclusions/StandardLibrary.h"
35 #include "clang/Tooling/Syntax/Tokens.h"
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseSet.h"
38 #include "llvm/ADT/GenericUniformityImpl.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallString.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/FormatVariadic.h"
46 #include "llvm/Support/Path.h"
47 #include "llvm/Support/Regex.h"
48 #include <cassert>
49 #include <iterator>
50 #include <map>
51 #include <optional>
52 #include <string>
53 #include <utility>
54 #include <vector>
56 namespace clang::clangd {
57 namespace {
59 bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) {
60 // Convert the path to Unix slashes and try to match against the filter.
61 llvm::SmallString<64> NormalizedPath(HeaderPath);
62 llvm::sys::path::native(NormalizedPath, llvm::sys::path::Style::posix);
63 for (auto &Filter : IgnoreHeaders) {
64 if (Filter(NormalizedPath))
65 return true;
67 return false;
70 bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
71 const include_cleaner::PragmaIncludes *PI,
72 bool AnalyzeAngledIncludes) {
73 assert(Inc.HeaderID);
74 auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
75 auto FE = AST.getSourceManager().getFileManager().getFileRef(
76 AST.getIncludeStructure().getRealPath(HID));
77 assert(FE);
78 if (FE->getDir() == AST.getPreprocessor()
79 .getHeaderSearchInfo()
80 .getModuleMap()
81 .getBuiltinDir())
82 return false;
83 if (PI && PI->shouldKeep(*FE))
84 return false;
85 // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
86 // System headers are likely to be standard library headers.
87 // Until we have good support for umbrella headers, don't warn about them
88 // (unless analysis is explicitly enabled).
89 if (Inc.Written.front() == '<') {
90 if (tooling::stdlib::Header::named(Inc.Written))
91 return true;
92 if (!AnalyzeAngledIncludes)
93 return false;
95 if (PI) {
96 // Check if main file is the public interface for a private header. If so we
97 // shouldn't diagnose it as unused.
98 if (auto PHeader = PI->getPublic(*FE); !PHeader.empty()) {
99 PHeader = PHeader.trim("<>\"");
100 // Since most private -> public mappings happen in a verbatim way, we
101 // check textually here. This might go wrong in presence of symlinks or
102 // header mappings. But that's not different than rest of the places.
103 if (AST.tuPath().ends_with(PHeader))
104 return false;
107 // Headers without include guards have side effects and are not
108 // self-contained, skip them.
109 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
110 *FE)) {
111 dlog("{0} doesn't have header guard and will not be considered unused",
112 FE->getName());
113 return false;
115 return true;
118 std::vector<Diag> generateMissingIncludeDiagnostics(
119 ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes,
120 llvm::StringRef Code, HeaderFilter IgnoreHeaders, const ThreadsafeFS &TFS) {
121 std::vector<Diag> Result;
122 const SourceManager &SM = AST.getSourceManager();
123 const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
125 auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false);
127 tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
128 FileStyle.IncludeStyle);
129 for (const auto &SymbolWithMissingInclude : MissingIncludes) {
130 llvm::StringRef ResolvedPath =
131 SymbolWithMissingInclude.Providers.front().resolvedPath();
132 if (isIgnored(ResolvedPath, IgnoreHeaders)) {
133 dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by "
134 "config",
135 ResolvedPath);
136 continue;
139 std::string Spelling = include_cleaner::spellHeader(
140 {SymbolWithMissingInclude.Providers.front(),
141 AST.getPreprocessor().getHeaderSearchInfo(), MainFile});
143 llvm::StringRef HeaderRef{Spelling};
144 bool Angled = HeaderRef.starts_with("<");
145 // We might suggest insertion of an existing include in edge cases, e.g.,
146 // include is present in a PP-disabled region, or spelling of the header
147 // turns out to be the same as one of the unresolved includes in the
148 // main file.
149 std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert(
150 HeaderRef.trim("\"<>"), Angled, tooling::IncludeDirective::Include);
151 if (!Replacement.has_value())
152 continue;
154 Diag &D = Result.emplace_back();
155 D.Message =
156 llvm::formatv("No header providing \"{0}\" is directly included",
157 SymbolWithMissingInclude.Symbol.name());
158 D.Name = "missing-includes";
159 D.Source = Diag::DiagSource::Clangd;
160 D.File = AST.tuPath();
161 D.InsideMainFile = true;
162 // We avoid the "warning" severity here in favor of LSP's "information".
164 // Users treat most warnings on code being edited as high-priority.
165 // They don't think of include cleanups the same way: they want to edit
166 // lines with existing violations without fixing them.
167 // Diagnostics at the same level tend to be visually indistinguishable,
168 // and a few missing includes can cause many diagnostics.
169 // Marking these as "information" leaves them visible, but less intrusive.
171 // (These concerns don't apply to unused #include warnings: these are fewer,
172 // they appear on infrequently-edited lines with few other warnings, and
173 // the 'Unneccesary' tag often result in a different rendering)
175 // Usually clang's "note" severity usually has special semantics, being
176 // translated into LSP RelatedInformation of a parent diagnostic.
177 // But not here: these aren't processed by clangd's DiagnosticConsumer.
178 D.Severity = DiagnosticsEngine::Note;
179 D.Range = clangd::Range{
180 offsetToPosition(Code,
181 SymbolWithMissingInclude.SymRefRange.beginOffset()),
182 offsetToPosition(Code,
183 SymbolWithMissingInclude.SymRefRange.endOffset())};
184 auto &F = D.Fixes.emplace_back();
185 F.Message = "#include " + Spelling;
186 TextEdit Edit = replacementToEdit(Code, *Replacement);
187 F.Edits.emplace_back(std::move(Edit));
189 return Result;
192 std::vector<Diag> generateUnusedIncludeDiagnostics(
193 PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes,
194 llvm::StringRef Code, HeaderFilter IgnoreHeaders) {
195 std::vector<Diag> Result;
196 for (const auto *Inc : UnusedIncludes) {
197 if (isIgnored(Inc->Resolved, IgnoreHeaders))
198 continue;
199 Diag &D = Result.emplace_back();
200 D.Message =
201 llvm::formatv("included header {0} is not used directly",
202 llvm::sys::path::filename(
203 Inc->Written.substr(1, Inc->Written.size() - 2),
204 llvm::sys::path::Style::posix));
205 D.Name = "unused-includes";
206 D.Source = Diag::DiagSource::Clangd;
207 D.File = FileName;
208 D.InsideMainFile = true;
209 D.Severity = DiagnosticsEngine::Warning;
210 D.Tags.push_back(Unnecessary);
211 D.Range = rangeTillEOL(Code, Inc->HashOffset);
212 // FIXME(kirillbobyrev): Removing inclusion might break the code if the
213 // used headers are only reachable transitively through this one. Suggest
214 // including them directly instead.
215 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
216 // (keep/export) remove the warning once we support IWYU pragmas.
217 auto &F = D.Fixes.emplace_back();
218 F.Message = "remove #include directive";
219 F.Edits.emplace_back();
220 F.Edits.back().range.start.line = Inc->HashLine;
221 F.Edits.back().range.end.line = Inc->HashLine + 1;
223 return Result;
226 std::optional<Fix>
227 removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) {
228 if (UnusedIncludes.empty())
229 return std::nullopt;
231 Fix RemoveAll;
232 RemoveAll.Message = "remove all unused includes";
233 for (const auto &Diag : UnusedIncludes) {
234 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
235 RemoveAll.Edits.insert(RemoveAll.Edits.end(),
236 Diag.Fixes.front().Edits.begin(),
237 Diag.Fixes.front().Edits.end());
239 return RemoveAll;
242 std::optional<Fix>
243 addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) {
244 if (MissingIncludeDiags.empty())
245 return std::nullopt;
247 Fix AddAllMissing;
248 AddAllMissing.Message = "add all missing includes";
249 // A map to deduplicate the edits with the same new text.
250 // newText (#include "my_missing_header.h") -> TextEdit.
251 std::map<std::string, TextEdit> Edits;
252 for (const auto &Diag : MissingIncludeDiags) {
253 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
254 for (const auto &Edit : Diag.Fixes.front().Edits) {
255 Edits.try_emplace(Edit.newText, Edit);
258 for (auto &It : Edits)
259 AddAllMissing.Edits.push_back(std::move(It.second));
260 return AddAllMissing;
262 Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) {
263 Fix FixAll;
264 FixAll.Message = "fix all includes";
266 for (const auto &F : RemoveAllUnused.Edits)
267 FixAll.Edits.push_back(F);
268 for (const auto &F : AddAllMissing.Edits)
269 FixAll.Edits.push_back(F);
270 return FixAll;
273 std::vector<const Inclusion *>
274 getUnused(ParsedAST &AST,
275 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
276 bool AnalyzeAngledIncludes) {
277 trace::Span Tracer("IncludeCleaner::getUnused");
278 std::vector<const Inclusion *> Unused;
279 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
280 if (!MFI.HeaderID)
281 continue;
282 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
283 if (ReferencedFiles.contains(IncludeID))
284 continue;
285 if (!mayConsiderUnused(MFI, AST, &AST.getPragmaIncludes(),
286 AnalyzeAngledIncludes)) {
287 dlog("{0} was not used, but is not eligible to be diagnosed as unused",
288 MFI.Written);
289 continue;
291 Unused.push_back(&MFI);
293 return Unused;
296 } // namespace
298 std::vector<include_cleaner::SymbolReference>
299 collectMacroReferences(ParsedAST &AST) {
300 const auto &SM = AST.getSourceManager();
301 auto &PP = AST.getPreprocessor();
302 std::vector<include_cleaner::SymbolReference> Macros;
303 for (const auto &[_, Refs] : AST.getMacros().MacroRefs) {
304 for (const auto &Ref : Refs) {
305 auto Loc = SM.getComposedLoc(SM.getMainFileID(), Ref.StartOffset);
306 const auto *Tok = AST.getTokens().spelledTokenContaining(Loc);
307 if (!Tok)
308 continue;
309 auto Macro = locateMacroAt(*Tok, PP);
310 if (!Macro)
311 continue;
312 auto DefLoc = Macro->NameLoc;
313 if (!DefLoc.isValid())
314 continue;
315 Macros.push_back(
316 {include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Tok->text(SM)),
317 DefLoc},
318 Tok->location(),
319 Ref.InConditionalDirective ? include_cleaner::RefType::Ambiguous
320 : include_cleaner::RefType::Explicit});
324 return Macros;
327 include_cleaner::Includes convertIncludes(const ParsedAST &AST) {
328 auto &SM = AST.getSourceManager();
330 include_cleaner::Includes ConvertedIncludes;
331 // We satisfy Includes's contract that search dirs and included files have
332 // matching path styles: both ultimately use FileManager::getCanonicalName().
333 for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical)
334 ConvertedIncludes.addSearchDirectory(Dir);
336 for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) {
337 include_cleaner::Include TransformedInc;
338 llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
339 TransformedInc.Spelled = WrittenRef.trim("\"<>");
340 TransformedInc.HashLocation =
341 SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
342 TransformedInc.Line = Inc.HashLine + 1;
343 TransformedInc.Angled = WrittenRef.starts_with("<");
344 // Inc.Resolved is canonicalized with clangd::getCanonicalPath(),
345 // which is based on FileManager::getCanonicalName(ParentDir).
346 auto FE = SM.getFileManager().getFileRef(Inc.Resolved);
347 if (!FE) {
348 elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
349 Inc.Resolved, FE.takeError());
350 continue;
352 TransformedInc.Resolved = *FE;
353 ConvertedIncludes.add(std::move(TransformedInc));
355 return ConvertedIncludes;
358 IncludeCleanerFindings
359 computeIncludeCleanerFindings(ParsedAST &AST, bool AnalyzeAngledIncludes) {
360 // Interaction is only polished for C/CPP.
361 if (AST.getLangOpts().ObjC)
362 return {};
363 const auto &SM = AST.getSourceManager();
364 include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
365 const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
366 auto PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM);
368 std::vector<include_cleaner::SymbolReference> Macros =
369 collectMacroReferences(AST);
370 std::vector<MissingIncludeDiagInfo> MissingIncludes;
371 llvm::DenseSet<IncludeStructure::HeaderID> Used;
372 trace::Span Tracer("include_cleaner::walkUsed");
373 OptionalDirectoryEntryRef ResourceDir = AST.getPreprocessor()
374 .getHeaderSearchInfo()
375 .getModuleMap()
376 .getBuiltinDir();
377 include_cleaner::walkUsed(
378 AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros,
379 &AST.getPragmaIncludes(), AST.getPreprocessor(),
380 [&](const include_cleaner::SymbolReference &Ref,
381 llvm::ArrayRef<include_cleaner::Header> Providers) {
382 bool Satisfied = false;
383 for (const auto &H : Providers) {
384 if (H.kind() == include_cleaner::Header::Physical &&
385 (H.physical() == MainFile || H.physical() == PreamblePatch ||
386 H.physical().getDir() == ResourceDir)) {
387 Satisfied = true;
388 continue;
390 for (auto *Inc : ConvertedIncludes.match(H)) {
391 Satisfied = true;
392 auto HeaderID =
393 AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry());
394 assert(HeaderID.has_value() &&
395 "ConvertedIncludes only contains resolved includes.");
396 Used.insert(*HeaderID);
400 if (Satisfied || Providers.empty() ||
401 Ref.RT != include_cleaner::RefType::Explicit)
402 return;
404 // Check if we have any headers with the same spelling, in edge cases
405 // like `#include_next "foo.h"`, the user can't ever include the
406 // physical foo.h, but can have a spelling that refers to it.
407 // We postpone this check because spelling a header for every usage is
408 // expensive.
409 std::string Spelling = include_cleaner::spellHeader(
410 {Providers.front(), AST.getPreprocessor().getHeaderSearchInfo(),
411 MainFile});
412 for (auto *Inc :
413 ConvertedIncludes.match(include_cleaner::Header{Spelling})) {
414 Satisfied = true;
415 auto HeaderID =
416 AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry());
417 assert(HeaderID.has_value() &&
418 "ConvertedIncludes only contains resolved includes.");
419 Used.insert(*HeaderID);
421 if (Satisfied)
422 return;
424 // We actually always want to map usages to their spellings, but
425 // spelling locations can point into preamble section. Using these
426 // offsets could lead into crashes in presence of stale preambles. Hence
427 // we use "getFileLoc" instead to make sure it always points into main
428 // file.
429 // FIXME: Use presumed locations to map such usages back to patched
430 // locations safely.
431 auto Loc = SM.getFileLoc(Ref.RefLocation);
432 // File locations can be outside of the main file if macro is expanded
433 // through an #include.
434 while (SM.getFileID(Loc) != SM.getMainFileID())
435 Loc = SM.getIncludeLoc(SM.getFileID(Loc));
436 auto TouchingTokens =
437 syntax::spelledTokensTouching(Loc, AST.getTokens());
438 assert(!TouchingTokens.empty());
439 // Loc points to the start offset of the ref token, here we use the last
440 // element of the TouchingTokens, e.g. avoid getting the "::" for
441 // "ns::^abc".
442 MissingIncludeDiagInfo DiagInfo{
443 Ref.Target, TouchingTokens.back().range(SM), Providers};
444 MissingIncludes.push_back(std::move(DiagInfo));
446 // Put possibly equal diagnostics together for deduplication.
447 // The duplicates might be from macro arguments that get expanded multiple
448 // times.
449 llvm::stable_sort(MissingIncludes, [](const MissingIncludeDiagInfo &LHS,
450 const MissingIncludeDiagInfo &RHS) {
451 // First sort by reference location.
452 if (LHS.SymRefRange != RHS.SymRefRange) {
453 // We can get away just by comparing the offsets as all the ranges are in
454 // main file.
455 return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset();
457 // For the same location, break ties using the symbol. Note that this won't
458 // be stable across runs.
459 using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>;
460 return MapInfo::getHashValue(LHS.Symbol) <
461 MapInfo::getHashValue(RHS.Symbol);
463 MissingIncludes.erase(llvm::unique(MissingIncludes), MissingIncludes.end());
464 std::vector<const Inclusion *> UnusedIncludes =
465 getUnused(AST, Used, AnalyzeAngledIncludes);
466 return {std::move(UnusedIncludes), std::move(MissingIncludes)};
469 bool isPreferredProvider(const Inclusion &Inc,
470 const include_cleaner::Includes &Includes,
471 llvm::ArrayRef<include_cleaner::Header> Providers) {
472 for (const auto &H : Providers) {
473 auto Matches = Includes.match(H);
474 for (const include_cleaner::Include *Match : Matches)
475 if (Match->Line == unsigned(Inc.HashLine + 1))
476 return true; // this header is (equal) best
477 if (!Matches.empty())
478 return false; // another header is better
480 return false; // no header provides the symbol
483 std::vector<Diag>
484 issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
485 const IncludeCleanerFindings &Findings,
486 const ThreadsafeFS &TFS,
487 HeaderFilter IgnoreHeaders) {
488 trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics");
489 std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics(
490 AST.tuPath(), Findings.UnusedIncludes, Code, IgnoreHeaders);
491 std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes);
493 std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics(
494 AST, Findings.MissingIncludes, Code, IgnoreHeaders, TFS);
495 std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags);
497 std::optional<Fix> FixAll;
498 if (RemoveAllUnused && AddAllMissing)
499 FixAll = fixAll(*RemoveAllUnused, *AddAllMissing);
501 auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) {
502 if (!F)
503 return;
504 Out->Fixes.push_back(*F);
506 for (auto &Diag : MissingIncludeDiags) {
507 AddBatchFix(MissingIncludeDiags.size() > 1 ? AddAllMissing : std::nullopt,
508 &Diag);
509 AddBatchFix(FixAll, &Diag);
511 for (auto &Diag : UnusedIncludes) {
512 AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused : std::nullopt,
513 &Diag);
514 AddBatchFix(FixAll, &Diag);
517 auto Result = std::move(MissingIncludeDiags);
518 llvm::move(UnusedIncludes, std::back_inserter(Result));
519 return Result;
522 } // namespace clang::clangd