[LLD] [COFF] Handle undefined weak symbols in LTO (#70430)
[llvm-project.git] / clang-tools-extra / clangd / refactor / Rename.cpp
blob11f9e4627af76095471dd58e8a87ab22ea0e33ed
1 //===--- Rename.cpp - Symbol-rename refactorings -----------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "refactor/Rename.h"
10 #include "AST.h"
11 #include "FindTarget.h"
12 #include "ParsedAST.h"
13 #include "Selection.h"
14 #include "SourceCode.h"
15 #include "index/SymbolCollector.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/ASTTypeTraits.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/DeclCXX.h"
22 #include "clang/AST/DeclObjC.h"
23 #include "clang/AST/DeclTemplate.h"
24 #include "clang/AST/ParentMapContext.h"
25 #include "clang/AST/Stmt.h"
26 #include "clang/Basic/CharInfo.h"
27 #include "clang/Basic/LLVM.h"
28 #include "clang/Basic/SourceLocation.h"
29 #include "clang/Tooling/Syntax/Tokens.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/JSON.h"
36 #include <algorithm>
37 #include <optional>
39 namespace clang {
40 namespace clangd {
41 namespace {
43 std::optional<std::string> filePath(const SymbolLocation &Loc,
44 llvm::StringRef HintFilePath) {
45 if (!Loc)
46 return std::nullopt;
47 auto Path = URI::resolve(Loc.FileURI, HintFilePath);
48 if (!Path) {
49 elog("Could not resolve URI {0}: {1}", Loc.FileURI, Path.takeError());
50 return std::nullopt;
53 return *Path;
56 // Returns true if the given location is expanded from any macro body.
57 bool isInMacroBody(const SourceManager &SM, SourceLocation Loc) {
58 while (Loc.isMacroID()) {
59 if (SM.isMacroBodyExpansion(Loc))
60 return true;
61 Loc = SM.getImmediateMacroCallerLoc(Loc);
64 return false;
67 // Canonical declarations help simplify the process of renaming. Examples:
68 // - Template's canonical decl is the templated declaration (i.e.
69 // ClassTemplateDecl is canonicalized to its child CXXRecordDecl,
70 // FunctionTemplateDecl - to child FunctionDecl)
71 // - Given a constructor/destructor, canonical declaration is the parent
72 // CXXRecordDecl because we want to rename both type name and its ctor/dtor.
73 // - All specializations are canonicalized to the primary template. For example:
75 // template <typename T, int U>
76 // bool Foo = true; (1)
78 // template <typename T>
79 // bool Foo<T, 0> = true; (2)
81 // template <>
82 // bool Foo<int, 0> = true; (3)
84 // Here, both partial (2) and full (3) specializations are canonicalized to (1)
85 // which ensures all three of them are renamed.
86 const NamedDecl *canonicalRenameDecl(const NamedDecl *D) {
87 if (const auto *VarTemplate = dyn_cast<VarTemplateSpecializationDecl>(D))
88 return canonicalRenameDecl(
89 VarTemplate->getSpecializedTemplate()->getTemplatedDecl());
90 if (const auto *Template = dyn_cast<TemplateDecl>(D))
91 if (const NamedDecl *TemplatedDecl = Template->getTemplatedDecl())
92 return canonicalRenameDecl(TemplatedDecl);
93 if (const auto *ClassTemplateSpecialization =
94 dyn_cast<ClassTemplateSpecializationDecl>(D))
95 return canonicalRenameDecl(
96 ClassTemplateSpecialization->getSpecializedTemplate()
97 ->getTemplatedDecl());
98 if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
99 if (Method->getDeclKind() == Decl::Kind::CXXConstructor ||
100 Method->getDeclKind() == Decl::Kind::CXXDestructor)
101 return canonicalRenameDecl(Method->getParent());
102 if (const FunctionDecl *InstantiatedMethod =
103 Method->getInstantiatedFromMemberFunction())
104 return canonicalRenameDecl(InstantiatedMethod);
105 // FIXME(kirillbobyrev): For virtual methods with
106 // size_overridden_methods() > 1, this will not rename all functions it
107 // overrides, because this code assumes there is a single canonical
108 // declaration.
109 if (Method->isVirtual() && Method->size_overridden_methods())
110 return canonicalRenameDecl(*Method->overridden_methods().begin());
112 if (const auto *Function = dyn_cast<FunctionDecl>(D))
113 if (const FunctionTemplateDecl *Template = Function->getPrimaryTemplate())
114 return canonicalRenameDecl(Template);
115 if (const auto *Field = dyn_cast<FieldDecl>(D)) {
116 // This is a hacky way to do something like
117 // CXXMethodDecl::getInstantiatedFromMemberFunction for the field because
118 // Clang AST does not store relevant information about the field that is
119 // instantiated.
120 const auto *FieldParent =
121 dyn_cast_or_null<CXXRecordDecl>(Field->getParent());
122 if (!FieldParent)
123 return Field->getCanonicalDecl();
124 FieldParent = FieldParent->getTemplateInstantiationPattern();
125 // Field is not instantiation.
126 if (!FieldParent || Field->getParent() == FieldParent)
127 return Field->getCanonicalDecl();
128 for (const FieldDecl *Candidate : FieldParent->fields())
129 if (Field->getDeclName() == Candidate->getDeclName())
130 return Candidate->getCanonicalDecl();
131 elog("FieldParent should have field with the same name as Field.");
133 if (const auto *VD = dyn_cast<VarDecl>(D)) {
134 if (const VarDecl *OriginalVD = VD->getInstantiatedFromStaticDataMember())
135 return canonicalRenameDecl(OriginalVD);
137 if (const auto *UD = dyn_cast<UsingShadowDecl>(D)) {
138 if (const auto *TargetDecl = UD->getTargetDecl())
139 return canonicalRenameDecl(TargetDecl);
141 return dyn_cast<NamedDecl>(D->getCanonicalDecl());
144 // Some AST nodes can reference multiple declarations. We try to pick the
145 // relevant one to rename here.
146 const NamedDecl *pickInterestingTarget(const NamedDecl *D) {
147 // We only support renaming the class name, not the category name. This has
148 // to be done outside of canonicalization since we don't want a category name
149 // reference to be canonicalized to the class.
150 if (const auto *CD = dyn_cast<ObjCCategoryDecl>(D))
151 if (const auto CI = CD->getClassInterface())
152 return CI;
153 return D;
156 llvm::DenseSet<const NamedDecl *> locateDeclAt(ParsedAST &AST,
157 SourceLocation TokenStartLoc) {
158 unsigned Offset =
159 AST.getSourceManager().getDecomposedSpellingLoc(TokenStartLoc).second;
161 SelectionTree Selection = SelectionTree::createRight(
162 AST.getASTContext(), AST.getTokens(), Offset, Offset);
163 const SelectionTree::Node *SelectedNode = Selection.commonAncestor();
164 if (!SelectedNode)
165 return {};
167 llvm::DenseSet<const NamedDecl *> Result;
168 for (const NamedDecl *D :
169 targetDecl(SelectedNode->ASTNode,
170 DeclRelation::Alias | DeclRelation::TemplatePattern,
171 AST.getHeuristicResolver())) {
172 D = pickInterestingTarget(D);
173 Result.insert(canonicalRenameDecl(D));
175 return Result;
178 void filterRenameTargets(llvm::DenseSet<const NamedDecl *> &Decls) {
179 // For something like
180 // namespace ns { void foo(); }
181 // void bar() { using ns::f^oo; foo(); }
182 // locateDeclAt() will return a UsingDecl and foo's actual declaration.
183 // For renaming, we're only interested in foo's declaration, so drop the other
184 // one. There should never be more than one UsingDecl here, otherwise the
185 // rename would be ambiguos anyway.
186 auto UD = std::find_if(Decls.begin(), Decls.end(), [](const NamedDecl *D) {
187 return llvm::isa<UsingDecl>(D);
189 if (UD != Decls.end()) {
190 Decls.erase(UD);
194 // By default, we exclude symbols from system headers and protobuf symbols as
195 // renaming these symbols would change system/generated files which are unlikely
196 // to be good candidates for modification.
197 bool isExcluded(const NamedDecl &RenameDecl) {
198 const auto &SM = RenameDecl.getASTContext().getSourceManager();
199 return SM.isInSystemHeader(RenameDecl.getLocation()) ||
200 isProtoFile(RenameDecl.getLocation(), SM);
203 enum class ReasonToReject {
204 NoSymbolFound,
205 NoIndexProvided,
206 NonIndexable,
207 UnsupportedSymbol,
208 AmbiguousSymbol,
210 // name validation. FIXME: reconcile with InvalidName
211 SameName,
214 std::optional<ReasonToReject> renameable(const NamedDecl &RenameDecl,
215 StringRef MainFilePath,
216 const SymbolIndex *Index,
217 const RenameOptions &Opts) {
218 trace::Span Tracer("Renameable");
219 if (!Opts.RenameVirtual) {
220 if (const auto *S = llvm::dyn_cast<CXXMethodDecl>(&RenameDecl)) {
221 if (S->isVirtual())
222 return ReasonToReject::UnsupportedSymbol;
225 // Filter out symbols that are unsupported in both rename modes.
226 if (llvm::isa<NamespaceDecl>(&RenameDecl))
227 return ReasonToReject::UnsupportedSymbol;
228 if (const auto *FD = llvm::dyn_cast<FunctionDecl>(&RenameDecl)) {
229 if (FD->isOverloadedOperator())
230 return ReasonToReject::UnsupportedSymbol;
232 // function-local symbols is safe to rename.
233 if (RenameDecl.getParentFunctionOrMethod())
234 return std::nullopt;
236 if (isExcluded(RenameDecl))
237 return ReasonToReject::UnsupportedSymbol;
239 // Check whether the symbol being rename is indexable.
240 auto &ASTCtx = RenameDecl.getASTContext();
241 bool MainFileIsHeader = isHeaderFile(MainFilePath, ASTCtx.getLangOpts());
242 bool DeclaredInMainFile =
243 isInsideMainFile(RenameDecl.getBeginLoc(), ASTCtx.getSourceManager());
244 bool IsMainFileOnly = true;
245 if (MainFileIsHeader)
246 // main file is a header, the symbol can't be main file only.
247 IsMainFileOnly = false;
248 else if (!DeclaredInMainFile)
249 IsMainFileOnly = false;
250 // If the symbol is not indexable, we disallow rename.
251 if (!SymbolCollector::shouldCollectSymbol(
252 RenameDecl, RenameDecl.getASTContext(), SymbolCollector::Options(),
253 IsMainFileOnly))
254 return ReasonToReject::NonIndexable;
256 return std::nullopt;
259 llvm::Error makeError(ReasonToReject Reason) {
260 auto Message = [](ReasonToReject Reason) {
261 switch (Reason) {
262 case ReasonToReject::NoSymbolFound:
263 return "there is no symbol at the given location";
264 case ReasonToReject::NoIndexProvided:
265 return "no index provided";
266 case ReasonToReject::NonIndexable:
267 return "symbol may be used in other files (not eligible for indexing)";
268 case ReasonToReject::UnsupportedSymbol:
269 return "symbol is not a supported kind (e.g. namespace, macro)";
270 case ReasonToReject::AmbiguousSymbol:
271 return "there are multiple symbols at the given location";
272 case ReasonToReject::SameName:
273 return "new name is the same as the old name";
275 llvm_unreachable("unhandled reason kind");
277 return error("Cannot rename symbol: {0}", Message(Reason));
280 // Return all rename occurrences in the main file.
281 std::vector<SourceLocation> findOccurrencesWithinFile(ParsedAST &AST,
282 const NamedDecl &ND) {
283 trace::Span Tracer("FindOccurrencesWithinFile");
284 assert(canonicalRenameDecl(&ND) == &ND &&
285 "ND should be already canonicalized.");
287 std::vector<SourceLocation> Results;
288 for (Decl *TopLevelDecl : AST.getLocalTopLevelDecls()) {
289 findExplicitReferences(
290 TopLevelDecl,
291 [&](ReferenceLoc Ref) {
292 if (Ref.Targets.empty())
293 return;
294 for (const auto *Target : Ref.Targets) {
295 if (canonicalRenameDecl(Target) == &ND) {
296 Results.push_back(Ref.NameLoc);
297 return;
301 AST.getHeuristicResolver());
304 return Results;
307 // Detect name conflict with othter DeclStmts in the same enclosing scope.
308 const NamedDecl *lookupSiblingWithinEnclosingScope(ASTContext &Ctx,
309 const NamedDecl &RenamedDecl,
310 StringRef NewName) {
311 // Store Parents list outside of GetSingleParent, so that returned pointer is
312 // not invalidated.
313 DynTypedNodeList Storage(DynTypedNode::create(RenamedDecl));
314 auto GetSingleParent = [&](const DynTypedNode &Node) -> const DynTypedNode * {
315 Storage = Ctx.getParents(Node);
316 return (Storage.size() == 1) ? Storage.begin() : nullptr;
319 // We need to get to the enclosing scope: NamedDecl's parent is typically
320 // DeclStmt (or FunctionProtoTypeLoc in case of function arguments), so
321 // enclosing scope would be the second order parent.
322 const auto *Parent = GetSingleParent(DynTypedNode::create(RenamedDecl));
323 if (!Parent || !(Parent->get<DeclStmt>() || Parent->get<TypeLoc>()))
324 return nullptr;
325 Parent = GetSingleParent(*Parent);
327 // The following helpers check corresponding AST nodes for variable
328 // declarations with the name collision.
329 auto CheckDeclStmt = [&](const DeclStmt *DS,
330 StringRef Name) -> const NamedDecl * {
331 if (!DS)
332 return nullptr;
333 for (const auto &Child : DS->getDeclGroup())
334 if (const auto *ND = dyn_cast<NamedDecl>(Child))
335 if (ND != &RenamedDecl && ND->getDeclName().isIdentifier() &&
336 ND->getName() == Name)
337 return ND;
338 return nullptr;
340 auto CheckCompoundStmt = [&](const Stmt *S,
341 StringRef Name) -> const NamedDecl * {
342 if (const auto *CS = dyn_cast_or_null<CompoundStmt>(S))
343 for (const auto *Node : CS->children())
344 if (const auto *Result = CheckDeclStmt(dyn_cast<DeclStmt>(Node), Name))
345 return Result;
346 return nullptr;
348 auto CheckConditionVariable = [&](const auto *Scope,
349 StringRef Name) -> const NamedDecl * {
350 if (!Scope)
351 return nullptr;
352 return CheckDeclStmt(Scope->getConditionVariableDeclStmt(), Name);
355 // CompoundStmt is the most common enclosing scope for function-local symbols
356 // In the simplest case we just iterate through sibling DeclStmts and check
357 // for collisions.
358 if (const auto *EnclosingCS = Parent->get<CompoundStmt>()) {
359 if (const auto *Result = CheckCompoundStmt(EnclosingCS, NewName))
360 return Result;
361 const auto *ScopeParent = GetSingleParent(*Parent);
362 // CompoundStmt may be found within if/while/for. In these cases, rename can
363 // collide with the init-statement variable decalaration, they should be
364 // checked.
365 if (const auto *Result =
366 CheckConditionVariable(ScopeParent->get<IfStmt>(), NewName))
367 return Result;
368 if (const auto *Result =
369 CheckConditionVariable(ScopeParent->get<WhileStmt>(), NewName))
370 return Result;
371 if (const auto *For = ScopeParent->get<ForStmt>())
372 if (const auto *Result = CheckDeclStmt(
373 dyn_cast_or_null<DeclStmt>(For->getInit()), NewName))
374 return Result;
375 // Also check if there is a name collision with function arguments.
376 if (const auto *Function = ScopeParent->get<FunctionDecl>())
377 for (const auto *Parameter : Function->parameters())
378 if (Parameter->getName() == NewName)
379 return Parameter;
380 return nullptr;
383 // When renaming a variable within init-statement within if/while/for
384 // condition, also check the CompoundStmt in the body.
385 if (const auto *EnclosingIf = Parent->get<IfStmt>()) {
386 if (const auto *Result = CheckCompoundStmt(EnclosingIf->getElse(), NewName))
387 return Result;
388 return CheckCompoundStmt(EnclosingIf->getThen(), NewName);
390 if (const auto *EnclosingWhile = Parent->get<WhileStmt>())
391 return CheckCompoundStmt(EnclosingWhile->getBody(), NewName);
392 if (const auto *EnclosingFor = Parent->get<ForStmt>()) {
393 // Check for conflicts with other declarations within initialization
394 // statement.
395 if (const auto *Result = CheckDeclStmt(
396 dyn_cast_or_null<DeclStmt>(EnclosingFor->getInit()), NewName))
397 return Result;
398 return CheckCompoundStmt(EnclosingFor->getBody(), NewName);
400 if (const auto *EnclosingFunction = Parent->get<FunctionDecl>()) {
401 // Check for conflicts with other arguments.
402 for (const auto *Parameter : EnclosingFunction->parameters())
403 if (Parameter != &RenamedDecl && Parameter->getName() == NewName)
404 return Parameter;
405 // FIXME: We don't modify all references to function parameters when
406 // renaming from forward declaration now, so using a name colliding with
407 // something in the definition's body is a valid transformation.
408 if (!EnclosingFunction->doesThisDeclarationHaveABody())
409 return nullptr;
410 return CheckCompoundStmt(EnclosingFunction->getBody(), NewName);
413 return nullptr;
416 // Lookup the declarations (if any) with the given Name in the context of
417 // RenameDecl.
418 const NamedDecl *lookupSiblingsWithinContext(ASTContext &Ctx,
419 const NamedDecl &RenamedDecl,
420 llvm::StringRef NewName) {
421 const auto &II = Ctx.Idents.get(NewName);
422 DeclarationName LookupName(&II);
423 DeclContextLookupResult LookupResult;
424 const auto *DC = RenamedDecl.getDeclContext();
425 while (DC->isTransparentContext())
426 DC = DC->getParent();
427 switch (DC->getDeclKind()) {
428 // The enclosing DeclContext may not be the enclosing scope, it might have
429 // false positives and negatives, so we only choose "confident" DeclContexts
430 // that don't have any subscopes that are neither DeclContexts nor
431 // transparent.
433 // Notably, FunctionDecl is excluded -- because local variables are not scoped
434 // to the function, but rather to the CompoundStmt that is its body. Lookup
435 // will not find function-local variables.
436 case Decl::TranslationUnit:
437 case Decl::Namespace:
438 case Decl::Record:
439 case Decl::Enum:
440 case Decl::CXXRecord:
441 LookupResult = DC->lookup(LookupName);
442 break;
443 default:
444 break;
446 // Lookup may contain the RenameDecl itself, exclude it.
447 for (const auto *D : LookupResult)
448 if (D->getCanonicalDecl() != RenamedDecl.getCanonicalDecl())
449 return D;
450 return nullptr;
453 const NamedDecl *lookupSiblingWithName(ASTContext &Ctx,
454 const NamedDecl &RenamedDecl,
455 llvm::StringRef NewName) {
456 trace::Span Tracer("LookupSiblingWithName");
457 if (const auto *Result =
458 lookupSiblingsWithinContext(Ctx, RenamedDecl, NewName))
459 return Result;
460 return lookupSiblingWithinEnclosingScope(Ctx, RenamedDecl, NewName);
463 struct InvalidName {
464 enum Kind {
465 Keywords,
466 Conflict,
467 BadIdentifier,
469 Kind K;
470 std::string Details;
472 std::string toString(InvalidName::Kind K) {
473 switch (K) {
474 case InvalidName::Keywords:
475 return "Keywords";
476 case InvalidName::Conflict:
477 return "Conflict";
478 case InvalidName::BadIdentifier:
479 return "BadIdentifier";
481 llvm_unreachable("unhandled InvalidName kind");
484 llvm::Error makeError(InvalidName Reason) {
485 auto Message = [](const InvalidName &Reason) {
486 switch (Reason.K) {
487 case InvalidName::Keywords:
488 return llvm::formatv("the chosen name \"{0}\" is a keyword",
489 Reason.Details);
490 case InvalidName::Conflict:
491 return llvm::formatv("conflict with the symbol in {0}", Reason.Details);
492 case InvalidName::BadIdentifier:
493 return llvm::formatv("the chosen name \"{0}\" is not a valid identifier",
494 Reason.Details);
496 llvm_unreachable("unhandled InvalidName kind");
498 return error("invalid name: {0}", Message(Reason));
501 static bool mayBeValidIdentifier(llvm::StringRef Ident) {
502 assert(llvm::json::isUTF8(Ident));
503 if (Ident.empty())
504 return false;
505 // We don't check all the rules for non-ascii characters (most are allowed).
506 bool AllowDollar = true; // lenient
507 if (llvm::isASCII(Ident.front()) &&
508 !isAsciiIdentifierStart(Ident.front(), AllowDollar))
509 return false;
510 for (char C : Ident) {
511 if (llvm::isASCII(C) && !isAsciiIdentifierContinue(C, AllowDollar))
512 return false;
514 return true;
517 // Check if we can rename the given RenameDecl into NewName.
518 // Return details if the rename would produce a conflict.
519 std::optional<InvalidName> checkName(const NamedDecl &RenameDecl,
520 llvm::StringRef NewName) {
521 trace::Span Tracer("CheckName");
522 static constexpr trace::Metric InvalidNameMetric(
523 "rename_name_invalid", trace::Metric::Counter, "invalid_kind");
524 auto &ASTCtx = RenameDecl.getASTContext();
525 std::optional<InvalidName> Result;
526 if (isKeyword(NewName, ASTCtx.getLangOpts()))
527 Result = InvalidName{InvalidName::Keywords, NewName.str()};
528 else if (!mayBeValidIdentifier(NewName))
529 Result = InvalidName{InvalidName::BadIdentifier, NewName.str()};
530 else {
531 // Name conflict detection.
532 // Function conflicts are subtle (overloading), so ignore them.
533 if (RenameDecl.getKind() != Decl::Function &&
534 RenameDecl.getKind() != Decl::CXXMethod) {
535 if (auto *Conflict = lookupSiblingWithName(ASTCtx, RenameDecl, NewName))
536 Result = InvalidName{
537 InvalidName::Conflict,
538 Conflict->getLocation().printToString(ASTCtx.getSourceManager())};
541 if (Result)
542 InvalidNameMetric.record(1, toString(Result->K));
543 return Result;
546 // AST-based rename, it renames all occurrences in the main file.
547 llvm::Expected<tooling::Replacements>
548 renameWithinFile(ParsedAST &AST, const NamedDecl &RenameDecl,
549 llvm::StringRef NewName) {
550 trace::Span Tracer("RenameWithinFile");
551 const SourceManager &SM = AST.getSourceManager();
553 tooling::Replacements FilteredChanges;
554 for (SourceLocation Loc : findOccurrencesWithinFile(AST, RenameDecl)) {
555 SourceLocation RenameLoc = Loc;
556 // We don't rename in any macro bodies, but we allow rename the symbol
557 // spelled in a top-level macro argument in the main file.
558 if (RenameLoc.isMacroID()) {
559 if (isInMacroBody(SM, RenameLoc))
560 continue;
561 RenameLoc = SM.getSpellingLoc(Loc);
563 // Filter out locations not from main file.
564 // We traverse only main file decls, but locations could come from an
565 // non-preamble #include file e.g.
566 // void test() {
567 // int f^oo;
568 // #include "use_foo.inc"
569 // }
570 if (!isInsideMainFile(RenameLoc, SM))
571 continue;
572 if (auto Err = FilteredChanges.add(tooling::Replacement(
573 SM, CharSourceRange::getTokenRange(RenameLoc), NewName)))
574 return std::move(Err);
576 return FilteredChanges;
579 Range toRange(const SymbolLocation &L) {
580 Range R;
581 R.start.line = L.Start.line();
582 R.start.character = L.Start.column();
583 R.end.line = L.End.line();
584 R.end.character = L.End.column();
585 return R;
588 // Walk down from a virtual method to overriding methods, we rename them as a
589 // group. Note that canonicalRenameDecl() ensures we're starting from the base
590 // method.
591 void insertTransitiveOverrides(SymbolID Base, llvm::DenseSet<SymbolID> &IDs,
592 const SymbolIndex &Index) {
593 RelationsRequest Req;
594 Req.Predicate = RelationKind::OverriddenBy;
596 llvm::DenseSet<SymbolID> Pending = {Base};
597 while (!Pending.empty()) {
598 Req.Subjects = std::move(Pending);
599 Pending.clear();
601 Index.relations(Req, [&](const SymbolID &, const Symbol &Override) {
602 if (IDs.insert(Override.ID).second)
603 Pending.insert(Override.ID);
608 // Return all rename occurrences (using the index) outside of the main file,
609 // grouped by the absolute file path.
610 llvm::Expected<llvm::StringMap<std::vector<Range>>>
611 findOccurrencesOutsideFile(const NamedDecl &RenameDecl,
612 llvm::StringRef MainFile, const SymbolIndex &Index,
613 size_t MaxLimitFiles) {
614 trace::Span Tracer("FindOccurrencesOutsideFile");
615 RefsRequest RQuest;
616 RQuest.IDs.insert(getSymbolID(&RenameDecl));
618 if (const auto *MethodDecl = llvm::dyn_cast<CXXMethodDecl>(&RenameDecl))
619 if (MethodDecl->isVirtual())
620 insertTransitiveOverrides(*RQuest.IDs.begin(), RQuest.IDs, Index);
622 // Absolute file path => rename occurrences in that file.
623 llvm::StringMap<std::vector<Range>> AffectedFiles;
624 bool HasMore = Index.refs(RQuest, [&](const Ref &R) {
625 if (AffectedFiles.size() >= MaxLimitFiles)
626 return;
627 if ((R.Kind & RefKind::Spelled) == RefKind::Unknown)
628 return;
629 if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) {
630 if (!pathEqual(*RefFilePath, MainFile))
631 AffectedFiles[*RefFilePath].push_back(toRange(R.Location));
635 if (AffectedFiles.size() >= MaxLimitFiles)
636 return error("The number of affected files exceeds the max limit {0}",
637 MaxLimitFiles);
638 if (HasMore)
639 return error("The symbol {0} has too many occurrences",
640 RenameDecl.getQualifiedNameAsString());
641 // Sort and deduplicate the results, in case that index returns duplications.
642 for (auto &FileAndOccurrences : AffectedFiles) {
643 auto &Ranges = FileAndOccurrences.getValue();
644 llvm::sort(Ranges);
645 Ranges.erase(std::unique(Ranges.begin(), Ranges.end()), Ranges.end());
647 SPAN_ATTACH(Tracer, FileAndOccurrences.first(),
648 static_cast<int64_t>(Ranges.size()));
650 return AffectedFiles;
653 // Index-based rename, it renames all occurrences outside of the main file.
655 // The cross-file rename is purely based on the index, as we don't want to
656 // build all ASTs for affected files, which may cause a performance hit.
657 // We choose to trade off some correctness for performance and scalability.
659 // Clangd builds a dynamic index for all opened files on top of the static
660 // index of the whole codebase. Dynamic index is up-to-date (respects dirty
661 // buffers) as long as clangd finishes processing opened files, while static
662 // index (background index) is relatively stale. We choose the dirty buffers
663 // as the file content we rename on, and fallback to file content on disk if
664 // there is no dirty buffer.
665 llvm::Expected<FileEdits>
666 renameOutsideFile(const NamedDecl &RenameDecl, llvm::StringRef MainFilePath,
667 llvm::StringRef NewName, const SymbolIndex &Index,
668 size_t MaxLimitFiles, llvm::vfs::FileSystem &FS) {
669 trace::Span Tracer("RenameOutsideFile");
670 auto AffectedFiles = findOccurrencesOutsideFile(RenameDecl, MainFilePath,
671 Index, MaxLimitFiles);
672 if (!AffectedFiles)
673 return AffectedFiles.takeError();
674 FileEdits Results;
675 for (auto &FileAndOccurrences : *AffectedFiles) {
676 llvm::StringRef FilePath = FileAndOccurrences.first();
678 auto ExpBuffer = FS.getBufferForFile(FilePath);
679 if (!ExpBuffer) {
680 elog("Fail to read file content: Fail to open file {0}: {1}", FilePath,
681 ExpBuffer.getError().message());
682 continue;
685 auto AffectedFileCode = (*ExpBuffer)->getBuffer();
686 auto RenameRanges =
687 adjustRenameRanges(AffectedFileCode, RenameDecl.getNameAsString(),
688 std::move(FileAndOccurrences.second),
689 RenameDecl.getASTContext().getLangOpts());
690 if (!RenameRanges) {
691 // Our heuristics fails to adjust rename ranges to the current state of
692 // the file, it is most likely the index is stale, so we give up the
693 // entire rename.
694 return error("Index results don't match the content of file {0} "
695 "(the index may be stale)",
696 FilePath);
698 auto RenameEdit =
699 buildRenameEdit(FilePath, AffectedFileCode, *RenameRanges, NewName);
700 if (!RenameEdit)
701 return error("failed to rename in file {0}: {1}", FilePath,
702 RenameEdit.takeError());
703 if (!RenameEdit->Replacements.empty())
704 Results.insert({FilePath, std::move(*RenameEdit)});
706 return Results;
709 // A simple edit is either changing line or column, but not both.
710 bool impliesSimpleEdit(const Position &LHS, const Position &RHS) {
711 return LHS.line == RHS.line || LHS.character == RHS.character;
714 // Performs a DFS to enumerate all possible near-miss matches.
715 // It finds the locations where the indexed occurrences are now spelled in
716 // Lexed occurrences, a near miss is defined as:
717 // - a near miss maps all of the **name** occurrences from the index onto a
718 // *subset* of lexed occurrences (we allow a single name refers to more
719 // than one symbol)
720 // - all indexed occurrences must be mapped, and Result must be distinct and
721 // preserve order (only support detecting simple edits to ensure a
722 // robust mapping)
723 // - each indexed -> lexed occurrences mapping correspondence may change the
724 // *line* or *column*, but not both (increases chance of a robust mapping)
725 void findNearMiss(
726 std::vector<size_t> &PartialMatch, ArrayRef<Range> IndexedRest,
727 ArrayRef<Range> LexedRest, int LexedIndex, int &Fuel,
728 llvm::function_ref<void(const std::vector<size_t> &)> MatchedCB) {
729 if (--Fuel < 0)
730 return;
731 if (IndexedRest.size() > LexedRest.size())
732 return;
733 if (IndexedRest.empty()) {
734 MatchedCB(PartialMatch);
735 return;
737 if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) {
738 PartialMatch.push_back(LexedIndex);
739 findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(),
740 LexedIndex + 1, Fuel, MatchedCB);
741 PartialMatch.pop_back();
743 findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(),
744 LexedIndex + 1, Fuel, MatchedCB);
747 } // namespace
749 llvm::Expected<RenameResult> rename(const RenameInputs &RInputs) {
750 assert(!RInputs.Index == !RInputs.FS &&
751 "Index and FS must either both be specified or both null.");
752 trace::Span Tracer("Rename flow");
753 const auto &Opts = RInputs.Opts;
754 ParsedAST &AST = RInputs.AST;
755 const SourceManager &SM = AST.getSourceManager();
756 llvm::StringRef MainFileCode = SM.getBufferData(SM.getMainFileID());
757 // Try to find the tokens adjacent to the cursor position.
758 auto Loc = sourceLocationInMainFile(SM, RInputs.Pos);
759 if (!Loc)
760 return Loc.takeError();
761 const syntax::Token *IdentifierToken =
762 spelledIdentifierTouching(*Loc, AST.getTokens());
764 // Renames should only triggered on identifiers.
765 if (!IdentifierToken)
766 return makeError(ReasonToReject::NoSymbolFound);
767 Range CurrentIdentifier = halfOpenToRange(
768 SM, CharSourceRange::getCharRange(IdentifierToken->location(),
769 IdentifierToken->endLocation()));
770 // FIXME: Renaming macros is not supported yet, the macro-handling code should
771 // be moved to rename tooling library.
772 if (locateMacroAt(*IdentifierToken, AST.getPreprocessor()))
773 return makeError(ReasonToReject::UnsupportedSymbol);
775 auto DeclsUnderCursor = locateDeclAt(AST, IdentifierToken->location());
776 filterRenameTargets(DeclsUnderCursor);
777 if (DeclsUnderCursor.empty())
778 return makeError(ReasonToReject::NoSymbolFound);
779 if (DeclsUnderCursor.size() > 1)
780 return makeError(ReasonToReject::AmbiguousSymbol);
781 const auto &RenameDecl = **DeclsUnderCursor.begin();
782 const auto *ID = RenameDecl.getIdentifier();
783 if (!ID)
784 return makeError(ReasonToReject::UnsupportedSymbol);
785 if (ID->getName() == RInputs.NewName)
786 return makeError(ReasonToReject::SameName);
787 auto Invalid = checkName(RenameDecl, RInputs.NewName);
788 if (Invalid)
789 return makeError(std::move(*Invalid));
791 auto Reject =
792 renameable(RenameDecl, RInputs.MainFilePath, RInputs.Index, Opts);
793 if (Reject)
794 return makeError(*Reject);
796 // We have two implementations of the rename:
797 // - AST-based rename: used for renaming local symbols, e.g. variables
798 // defined in a function body;
799 // - index-based rename: used for renaming non-local symbols, and not
800 // feasible for local symbols (as by design our index don't index these
801 // symbols by design;
802 // To make cross-file rename work for local symbol, we use a hybrid solution:
803 // - run AST-based rename on the main file;
804 // - run index-based rename on other affected files;
805 auto MainFileRenameEdit = renameWithinFile(AST, RenameDecl, RInputs.NewName);
806 if (!MainFileRenameEdit)
807 return MainFileRenameEdit.takeError();
809 // Check the rename-triggering location is actually being renamed.
810 // This is a robustness check to avoid surprising rename results -- if the
811 // the triggering location is not actually the name of the node we identified
812 // (e.g. for broken code), then rename is likely not what users expect, so we
813 // reject this kind of rename.
814 auto StartOffset = positionToOffset(MainFileCode, CurrentIdentifier.start);
815 auto EndOffset = positionToOffset(MainFileCode, CurrentIdentifier.end);
816 if (!StartOffset)
817 return StartOffset.takeError();
818 if (!EndOffset)
819 return EndOffset.takeError();
820 if (llvm::none_of(
821 *MainFileRenameEdit,
822 [&StartOffset, &EndOffset](const clang::tooling::Replacement &R) {
823 return R.getOffset() == *StartOffset &&
824 R.getLength() == *EndOffset - *StartOffset;
825 })) {
826 return makeError(ReasonToReject::NoSymbolFound);
828 RenameResult Result;
829 Result.Target = CurrentIdentifier;
830 Edit MainFileEdits = Edit(MainFileCode, std::move(*MainFileRenameEdit));
831 for (const TextEdit &TE : MainFileEdits.asTextEdits())
832 Result.LocalChanges.push_back(TE.range);
834 // return the main file edit if this is a within-file rename or the symbol
835 // being renamed is function local.
836 if (RenameDecl.getParentFunctionOrMethod()) {
837 Result.GlobalChanges = FileEdits(
838 {std::make_pair(RInputs.MainFilePath, std::move(MainFileEdits))});
839 return Result;
842 // If the index is nullptr, we don't know the completeness of the result, so
843 // we don't populate the field GlobalChanges.
844 if (!RInputs.Index) {
845 assert(Result.GlobalChanges.empty());
846 return Result;
849 auto OtherFilesEdits = renameOutsideFile(
850 RenameDecl, RInputs.MainFilePath, RInputs.NewName, *RInputs.Index,
851 Opts.LimitFiles == 0 ? std::numeric_limits<size_t>::max()
852 : Opts.LimitFiles,
853 *RInputs.FS);
854 if (!OtherFilesEdits)
855 return OtherFilesEdits.takeError();
856 Result.GlobalChanges = *OtherFilesEdits;
857 // Attach the rename edits for the main file.
858 Result.GlobalChanges.try_emplace(RInputs.MainFilePath,
859 std::move(MainFileEdits));
860 return Result;
863 llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
864 llvm::StringRef InitialCode,
865 std::vector<Range> Occurrences,
866 llvm::StringRef NewName) {
867 trace::Span Tracer("BuildRenameEdit");
868 SPAN_ATTACH(Tracer, "file_path", AbsFilePath);
869 SPAN_ATTACH(Tracer, "rename_occurrences",
870 static_cast<int64_t>(Occurrences.size()));
872 assert(llvm::is_sorted(Occurrences));
873 assert(std::unique(Occurrences.begin(), Occurrences.end()) ==
874 Occurrences.end() &&
875 "Occurrences must be unique");
877 // These two always correspond to the same position.
878 Position LastPos{0, 0};
879 size_t LastOffset = 0;
881 auto Offset = [&](const Position &P) -> llvm::Expected<size_t> {
882 assert(LastPos <= P && "malformed input");
883 Position Shifted = {
884 P.line - LastPos.line,
885 P.line > LastPos.line ? P.character : P.character - LastPos.character};
886 auto ShiftedOffset =
887 positionToOffset(InitialCode.substr(LastOffset), Shifted);
888 if (!ShiftedOffset)
889 return error("fail to convert the position {0} to offset ({1})", P,
890 ShiftedOffset.takeError());
891 LastPos = P;
892 LastOffset += *ShiftedOffset;
893 return LastOffset;
896 std::vector<std::pair</*start*/ size_t, /*end*/ size_t>> OccurrencesOffsets;
897 for (const auto &R : Occurrences) {
898 auto StartOffset = Offset(R.start);
899 if (!StartOffset)
900 return StartOffset.takeError();
901 auto EndOffset = Offset(R.end);
902 if (!EndOffset)
903 return EndOffset.takeError();
904 OccurrencesOffsets.push_back({*StartOffset, *EndOffset});
907 tooling::Replacements RenameEdit;
908 for (const auto &R : OccurrencesOffsets) {
909 auto ByteLength = R.second - R.first;
910 if (auto Err = RenameEdit.add(
911 tooling::Replacement(AbsFilePath, R.first, ByteLength, NewName)))
912 return std::move(Err);
914 return Edit(InitialCode, std::move(RenameEdit));
917 // Details:
918 // - lex the draft code to get all rename candidates, this yields a superset
919 // of candidates.
920 // - apply range patching heuristics to generate "authoritative" occurrences,
921 // cases we consider:
922 // (a) index returns a subset of candidates, we use the indexed results.
923 // - fully equal, we are sure the index is up-to-date
924 // - proper subset, index is correct in most cases? there may be false
925 // positives (e.g. candidates got appended), but rename is still safe
926 // (b) index returns non-candidate results, we attempt to map the indexed
927 // ranges onto candidates in a plausible way (e.g. guess that lines
928 // were inserted). If such a "near miss" is found, the rename is still
929 // possible
930 std::optional<std::vector<Range>>
931 adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
932 std::vector<Range> Indexed, const LangOptions &LangOpts) {
933 trace::Span Tracer("AdjustRenameRanges");
934 assert(!Indexed.empty());
935 assert(llvm::is_sorted(Indexed));
936 std::vector<Range> Lexed =
937 collectIdentifierRanges(Identifier, DraftCode, LangOpts);
938 llvm::sort(Lexed);
939 return getMappedRanges(Indexed, Lexed);
942 std::optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
943 ArrayRef<Range> Lexed) {
944 trace::Span Tracer("GetMappedRanges");
945 assert(!Indexed.empty());
946 assert(llvm::is_sorted(Indexed));
947 assert(llvm::is_sorted(Lexed));
949 if (Indexed.size() > Lexed.size()) {
950 vlog("The number of lexed occurrences is less than indexed occurrences");
951 SPAN_ATTACH(
952 Tracer, "error",
953 "The number of lexed occurrences is less than indexed occurrences");
954 return std::nullopt;
956 // Fast check for the special subset case.
957 if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end()))
958 return Indexed.vec();
960 std::vector<size_t> Best;
961 size_t BestCost = std::numeric_limits<size_t>::max();
962 bool HasMultiple = false;
963 std::vector<size_t> ResultStorage;
964 int Fuel = 10000;
965 findNearMiss(ResultStorage, Indexed, Lexed, 0, Fuel,
966 [&](const std::vector<size_t> &Matched) {
967 size_t MCost =
968 renameRangeAdjustmentCost(Indexed, Lexed, Matched);
969 if (MCost < BestCost) {
970 BestCost = MCost;
971 Best = std::move(Matched);
972 HasMultiple = false; // reset
973 return;
975 if (MCost == BestCost)
976 HasMultiple = true;
978 if (HasMultiple) {
979 vlog("The best near miss is not unique.");
980 SPAN_ATTACH(Tracer, "error", "The best near miss is not unique");
981 return std::nullopt;
983 if (Best.empty()) {
984 vlog("Didn't find a near miss.");
985 SPAN_ATTACH(Tracer, "error", "Didn't find a near miss");
986 return std::nullopt;
988 std::vector<Range> Mapped;
989 for (auto I : Best)
990 Mapped.push_back(Lexed[I]);
991 SPAN_ATTACH(Tracer, "mapped_ranges", static_cast<int64_t>(Mapped.size()));
992 return Mapped;
995 // The cost is the sum of the implied edit sizes between successive diffs, only
996 // simple edits are considered:
997 // - insert/remove a line (change line offset)
998 // - insert/remove a character on an existing line (change column offset)
1000 // Example I, total result is 1 + 1 = 2.
1001 // diff[0]: line + 1 <- insert a line before edit 0.
1002 // diff[1]: line + 1
1003 // diff[2]: line + 1
1004 // diff[3]: line + 2 <- insert a line before edits 2 and 3.
1006 // Example II, total result is 1 + 1 + 1 = 3.
1007 // diff[0]: line + 1 <- insert a line before edit 0.
1008 // diff[1]: column + 1 <- remove a line between edits 0 and 1, and insert a
1009 // character on edit 1.
1010 size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
1011 ArrayRef<size_t> MappedIndex) {
1012 assert(Indexed.size() == MappedIndex.size());
1013 assert(llvm::is_sorted(Indexed));
1014 assert(llvm::is_sorted(Lexed));
1016 int LastLine = -1;
1017 int LastDLine = 0, LastDColumn = 0;
1018 int Cost = 0;
1019 for (size_t I = 0; I < Indexed.size(); ++I) {
1020 int DLine = Indexed[I].start.line - Lexed[MappedIndex[I]].start.line;
1021 int DColumn =
1022 Indexed[I].start.character - Lexed[MappedIndex[I]].start.character;
1023 int Line = Indexed[I].start.line;
1024 if (Line != LastLine)
1025 LastDColumn = 0; // column offsets don't carry cross lines.
1026 Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn);
1027 std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn);
1029 return Cost;
1032 } // namespace clangd
1033 } // namespace clang