[mlir][sparse] implement non-permutation MapRef encoding (#69406)
[llvm-project.git] / clang-tools-extra / clangd / Selection.cpp
blob8c6d5750ecefdba4e970282cdcbfc34ff5920881
1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "Selection.h"
10 #include "AST.h"
11 #include "support/Logger.h"
12 #include "support/Trace.h"
13 #include "clang/AST/ASTConcept.h"
14 #include "clang/AST/ASTTypeTraits.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/PrettyPrinter.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/OperatorKinds.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Tooling/Syntax/Tokens.h"
28 #include "llvm/ADT/BitVector.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <algorithm>
34 #include <optional>
35 #include <set>
36 #include <string>
38 namespace clang {
39 namespace clangd {
40 namespace {
41 using Node = SelectionTree::Node;
43 // Measure the fraction of selections that were enabled by recovery AST.
44 void recordMetrics(const SelectionTree &S, const LangOptions &Lang) {
45 if (!trace::enabled())
46 return;
47 const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C";
48 static constexpr trace::Metric SelectionUsedRecovery(
49 "selection_recovery", trace::Metric::Distribution, "language");
50 static constexpr trace::Metric RecoveryType(
51 "selection_recovery_type", trace::Metric::Distribution, "language");
52 const auto *Common = S.commonAncestor();
53 for (const auto *N = Common; N; N = N->Parent) {
54 if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
55 SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast.
56 RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel);
57 return;
60 if (Common)
61 SelectionUsedRecovery.record(0, LanguageLabel); // unused.
64 // Return the range covering a node and all its children.
65 SourceRange getSourceRange(const DynTypedNode &N) {
66 // MemberExprs to implicitly access anonymous fields should not claim any
67 // tokens for themselves. Given:
68 // struct A { struct { int b; }; };
69 // The clang AST reports the following nodes for an access to b:
70 // A().b;
71 // [----] MemberExpr, base = A().<anonymous>, member = b
72 // [----] MemberExpr: base = A(), member = <anonymous>
73 // [-] CXXConstructExpr
74 // For our purposes, we don't want the second MemberExpr to own any tokens,
75 // so we reduce its range to match the CXXConstructExpr.
76 // (It's not clear that changing the clang AST would be correct in general).
77 if (const auto *ME = N.get<MemberExpr>()) {
78 if (!ME->getMemberDecl()->getDeclName())
79 return ME->getBase()
80 ? getSourceRange(DynTypedNode::create(*ME->getBase()))
81 : SourceRange();
83 return N.getSourceRange();
86 // An IntervalSet maintains a set of disjoint subranges of an array.
88 // Initially, it contains the entire array.
89 // [-----------------------------------------------------------]
91 // When a range is erased(), it will typically split the array in two.
92 // Claim: [--------------------]
93 // after: [----------------] [-------------------]
95 // erase() returns the segments actually erased. Given the state above:
96 // Claim: [---------------------------------------]
97 // Out: [---------] [------]
98 // After: [-----] [-----------]
100 // It is used to track (expanded) tokens not yet associated with an AST node.
101 // On traversing an AST node, its token range is erased from the unclaimed set.
102 // The tokens actually removed are associated with that node, and hit-tested
103 // against the selection to determine whether the node is selected.
104 template <typename T> class IntervalSet {
105 public:
106 IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
108 // Removes the elements of Claim from the set, modifying or removing ranges
109 // that overlap it.
110 // Returns the continuous subranges of Claim that were actually removed.
111 llvm::SmallVector<llvm::ArrayRef<T>> erase(llvm::ArrayRef<T> Claim) {
112 llvm::SmallVector<llvm::ArrayRef<T>> Out;
113 if (Claim.empty())
114 return Out;
116 // General case:
117 // Claim: [-----------------]
118 // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
119 // Overlap: ^first ^second
120 // Ranges C and D are fully included. Ranges B and E must be trimmed.
121 auto Overlap = std::make_pair(
122 UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
123 UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
124 // Rewind to cover B.
125 if (Overlap.first != UnclaimedRanges.begin()) {
126 --Overlap.first;
127 // ...unless B isn't selected at all.
128 if (Overlap.first->end() <= Claim.begin())
129 ++Overlap.first;
131 if (Overlap.first == Overlap.second)
132 return Out;
134 // First, copy all overlapping ranges into the output.
135 auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
136 // If any of the overlapping ranges were sliced by the claim, split them:
137 // - restrict the returned range to the claimed part
138 // - save the unclaimed part so it can be reinserted
139 llvm::ArrayRef<T> RemainingHead, RemainingTail;
140 if (Claim.begin() > OutFirst->begin()) {
141 RemainingHead = {OutFirst->begin(), Claim.begin()};
142 *OutFirst = {Claim.begin(), OutFirst->end()};
144 if (Claim.end() < Out.back().end()) {
145 RemainingTail = {Claim.end(), Out.back().end()};
146 Out.back() = {Out.back().begin(), Claim.end()};
149 // Erase all the overlapping ranges (invalidating all iterators).
150 UnclaimedRanges.erase(Overlap.first, Overlap.second);
151 // Reinsert ranges that were merely trimmed.
152 if (!RemainingHead.empty())
153 UnclaimedRanges.insert(RemainingHead);
154 if (!RemainingTail.empty())
155 UnclaimedRanges.insert(RemainingTail);
157 return Out;
160 private:
161 using TokenRange = llvm::ArrayRef<T>;
162 struct RangeLess {
163 bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
164 return L.begin() < R.begin();
168 // Disjoint sorted unclaimed ranges of expanded tokens.
169 std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
172 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
173 // This resolves to Unselected if no tokens are ever seen.
174 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
175 // This value is never exposed publicly.
176 constexpr SelectionTree::Selection NoTokens =
177 static_cast<SelectionTree::Selection>(
178 static_cast<unsigned char>(SelectionTree::Complete + 1));
180 // Nodes start with NoTokens, and then use this function to aggregate the
181 // selectedness as more tokens are found.
182 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
183 if (New == NoTokens)
184 return;
185 if (Result == NoTokens)
186 Result = New;
187 else if (Result != New)
188 // Can only be completely selected (or unselected) if all tokens are.
189 Result = SelectionTree::Partial;
192 // As well as comments, don't count semicolons as real tokens.
193 // They're not properly claimed as expr-statement is missing from the AST.
194 bool shouldIgnore(const syntax::Token &Tok) {
195 switch (Tok.kind()) {
196 // Even "attached" comments are not considered part of a node's range.
197 case tok::comment:
198 // The AST doesn't directly store locations for terminating semicolons.
199 case tok::semi:
200 // We don't have locations for cvr-qualifiers: see QualifiedTypeLoc.
201 case tok::kw_const:
202 case tok::kw_volatile:
203 case tok::kw_restrict:
204 return true;
205 default:
206 return false;
210 // Determine whether 'Target' is the first expansion of the macro
211 // argument whose top-level spelling location is 'SpellingLoc'.
212 bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
213 const SourceManager &SM) {
214 SourceLocation Prev = SpellingLoc;
215 while (true) {
216 // If the arg is expanded multiple times, getMacroArgExpandedLocation()
217 // returns the first expansion.
218 SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
219 // So if we reach the target, target is the first-expansion of the
220 // first-expansion ...
221 if (SM.getFileID(Next) == Target)
222 return true;
224 // Otherwise, if the FileID stops changing, we've reached the innermost
225 // macro expansion, and Target was on a different branch.
226 if (SM.getFileID(Next) == SM.getFileID(Prev))
227 return false;
229 Prev = Next;
231 return false;
234 // SelectionTester can determine whether a range of tokens from the PP-expanded
235 // stream (corresponding to an AST node) is considered selected.
237 // When the tokens result from macro expansions, the appropriate tokens in the
238 // main file are examined (macro invocation or args). Similarly for #includes.
239 // However, only the first expansion of a given spelled token is considered
240 // selected.
242 // It tests each token in the range (not just the endpoints) as contiguous
243 // expanded tokens may not have contiguous spellings (with macros).
245 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
246 // are ignored when determining selectedness.
247 class SelectionTester {
248 public:
249 // The selection is offsets [SelBegin, SelEnd) in SelFile.
250 SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
251 unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
252 : SelFile(SelFile), SelFileBounds(SM.getLocForStartOfFile(SelFile),
253 SM.getLocForEndOfFile(SelFile)),
254 SM(SM) {
255 // Find all tokens (partially) selected in the file.
256 auto AllSpelledTokens = Buf.spelledTokens(SelFile);
257 const syntax::Token *SelFirst =
258 llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
259 return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
261 const syntax::Token *SelLimit = std::partition_point(
262 SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
263 return SM.getFileOffset(Tok.location()) < SelEnd;
265 auto Sel = llvm::ArrayRef(SelFirst, SelLimit);
266 // Find which of these are preprocessed to nothing and should be ignored.
267 llvm::BitVector PPIgnored(Sel.size(), false);
268 for (const syntax::TokenBuffer::Expansion &X :
269 Buf.expansionsOverlapping(Sel)) {
270 if (X.Expanded.empty()) {
271 for (const syntax::Token &Tok : X.Spelled) {
272 if (&Tok >= SelFirst && &Tok < SelLimit)
273 PPIgnored[&Tok - SelFirst] = true;
277 // Precompute selectedness and offset for selected spelled tokens.
278 for (unsigned I = 0; I < Sel.size(); ++I) {
279 if (shouldIgnore(Sel[I]) || PPIgnored[I])
280 continue;
281 SelectedSpelled.emplace_back();
282 Tok &S = SelectedSpelled.back();
283 S.Offset = SM.getFileOffset(Sel[I].location());
284 if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd)
285 S.Selected = SelectionTree::Complete;
286 else
287 S.Selected = SelectionTree::Partial;
289 MaybeSelectedExpanded = computeMaybeSelectedExpandedTokens(Buf);
292 // Test whether a consecutive range of tokens is selected.
293 // The tokens are taken from the expanded token stream.
294 SelectionTree::Selection
295 test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
296 if (ExpandedTokens.empty())
297 return NoTokens;
298 if (SelectedSpelled.empty())
299 return SelectionTree::Unselected;
300 // Cheap (pointer) check whether any of the tokens could touch selection.
301 // In most cases, the node's overall source range touches ExpandedTokens,
302 // or we would have failed mayHit(). However now we're only considering
303 // the *unclaimed* spans of expanded tokens.
304 // This is a significant performance improvement when a lot of nodes
305 // surround the selection, including when generated by macros.
306 if (MaybeSelectedExpanded.empty() ||
307 &ExpandedTokens.front() > &MaybeSelectedExpanded.back() ||
308 &ExpandedTokens.back() < &MaybeSelectedExpanded.front()) {
309 return SelectionTree::Unselected;
312 // The eof token is used as a sentinel.
313 // In general, source range from an AST node should not claim the eof token,
314 // but it could occur for unmatched-bracket cases.
315 // FIXME: fix it in TokenBuffer, expandedTokens(SourceRange) should not
316 // return the eof token.
317 if (ExpandedTokens.back().kind() == tok::eof)
318 ExpandedTokens = ExpandedTokens.drop_back();
320 SelectionTree::Selection Result = NoTokens;
321 while (!ExpandedTokens.empty()) {
322 // Take consecutive tokens from the same context together for efficiency.
323 SourceLocation Start = ExpandedTokens.front().location();
324 FileID FID = SM.getFileID(Start);
325 // Comparing SourceLocations against bounds is cheaper than getFileID().
326 SourceLocation Limit = SM.getComposedLoc(FID, SM.getFileIDSize(FID));
327 auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
328 return T.location() >= Start && T.location() < Limit;
330 assert(!Batch.empty());
331 ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
333 update(Result, testChunk(FID, Batch));
335 return Result;
338 // Cheap check whether any of the tokens in R might be selected.
339 // If it returns false, test() will return NoTokens or Unselected.
340 // If it returns true, test() may return any value.
341 bool mayHit(SourceRange R) const {
342 if (SelectedSpelled.empty() || MaybeSelectedExpanded.empty())
343 return false;
344 // If the node starts after the selection ends, it is not selected.
345 // Tokens a macro location might claim are >= its expansion start.
346 // So if the expansion start > last selected token, we can prune it.
347 // (This is particularly helpful for GTest's TEST macro).
348 if (auto B = offsetInSelFile(getExpansionStart(R.getBegin())))
349 if (*B > SelectedSpelled.back().Offset)
350 return false;
351 // If the node ends before the selection begins, it is not selected.
352 SourceLocation EndLoc = R.getEnd();
353 while (EndLoc.isMacroID())
354 EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
355 // In the rare case that the expansion range is a char range, EndLoc is
356 // ~one token too far to the right. We may fail to prune, that's OK.
357 if (auto E = offsetInSelFile(EndLoc))
358 if (*E < SelectedSpelled.front().Offset)
359 return false;
360 return true;
363 private:
364 // Plausible expanded tokens that might be affected by the selection.
365 // This is an overestimate, it may contain tokens that are not selected.
366 // The point is to allow cheap pruning in test()
367 llvm::ArrayRef<syntax::Token>
368 computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer &Toks) {
369 if (SelectedSpelled.empty())
370 return {};
372 auto LastAffectedToken = [&](SourceLocation Loc) {
373 auto Offset = offsetInSelFile(Loc);
374 while (Loc.isValid() && !Offset) {
375 Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getEnd()
376 : SM.getIncludeLoc(SM.getFileID(Loc));
377 Offset = offsetInSelFile(Loc);
379 return Offset;
381 auto FirstAffectedToken = [&](SourceLocation Loc) {
382 auto Offset = offsetInSelFile(Loc);
383 while (Loc.isValid() && !Offset) {
384 Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin()
385 : SM.getIncludeLoc(SM.getFileID(Loc));
386 Offset = offsetInSelFile(Loc);
388 return Offset;
391 const syntax::Token *Start = llvm::partition_point(
392 Toks.expandedTokens(),
393 [&, First = SelectedSpelled.front().Offset](const syntax::Token &Tok) {
394 if (Tok.kind() == tok::eof)
395 return false;
396 // Implausible if upperbound(Tok) < First.
397 if (auto Offset = LastAffectedToken(Tok.location()))
398 return *Offset < First;
399 // A prefix of the expanded tokens may be from an implicit
400 // inclusion (e.g. preamble patch, or command-line -include).
401 return true;
404 bool EndInvalid = false;
405 const syntax::Token *End = std::partition_point(
406 Start, Toks.expandedTokens().end(),
407 [&, Last = SelectedSpelled.back().Offset](const syntax::Token &Tok) {
408 if (Tok.kind() == tok::eof)
409 return false;
410 // Plausible if lowerbound(Tok) <= Last.
411 if (auto Offset = FirstAffectedToken(Tok.location()))
412 return *Offset <= Last;
413 // Shouldn't happen: once we've seen tokens traceable to the main
414 // file, there shouldn't be any more implicit inclusions.
415 assert(false && "Expanded token could not be resolved to main file!");
416 EndInvalid = true;
417 return true; // conservatively assume this token can overlap
419 if (EndInvalid)
420 End = Toks.expandedTokens().end();
422 return llvm::ArrayRef(Start, End);
425 // Hit-test a consecutive range of tokens from a single file ID.
426 SelectionTree::Selection
427 testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
428 assert(!Batch.empty());
429 SourceLocation StartLoc = Batch.front().location();
430 // There are several possible categories of FileID depending on how the
431 // preprocessor was used to generate these tokens:
432 // main file, #included file, macro args, macro bodies.
433 // We need to identify the main-file tokens that represent Batch, and
434 // determine whether we want to exclusively claim them. Regular tokens
435 // represent one AST construct, but a macro invocation can represent many.
437 // Handle tokens written directly in the main file.
438 if (FID == SelFile) {
439 return testTokenRange(*offsetInSelFile(Batch.front().location()),
440 *offsetInSelFile(Batch.back().location()));
443 // Handle tokens in another file #included into the main file.
444 // Check if the #include is selected, but don't claim it exclusively.
445 if (StartLoc.isFileID()) {
446 for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
447 Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
448 if (auto Offset = offsetInSelFile(Loc))
449 // FIXME: use whole #include directive, not just the filename string.
450 return testToken(*Offset);
452 return NoTokens;
455 assert(StartLoc.isMacroID());
456 // Handle tokens that were passed as a macro argument.
457 SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
458 if (auto ArgOffset = offsetInSelFile(ArgStart)) {
459 if (isFirstExpansion(FID, ArgStart, SM)) {
460 SourceLocation ArgEnd =
461 SM.getTopMacroCallerLoc(Batch.back().location());
462 return testTokenRange(*ArgOffset, *offsetInSelFile(ArgEnd));
463 } else { // NOLINT(llvm-else-after-return)
464 /* fall through and treat as part of the macro body */
468 // Handle tokens produced by non-argument macro expansion.
469 // Check if the macro name is selected, don't claim it exclusively.
470 if (auto ExpansionOffset = offsetInSelFile(getExpansionStart(StartLoc)))
471 // FIXME: also check ( and ) for function-like macros?
472 return testToken(*ExpansionOffset);
473 return NoTokens;
476 // Is the closed token range [Begin, End] selected?
477 SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
478 assert(Begin <= End);
479 // Outside the selection entirely?
480 if (End < SelectedSpelled.front().Offset ||
481 Begin > SelectedSpelled.back().Offset)
482 return SelectionTree::Unselected;
484 // Compute range of tokens.
485 auto B = llvm::partition_point(
486 SelectedSpelled, [&](const Tok &T) { return T.Offset < Begin; });
487 auto E = std::partition_point(B, SelectedSpelled.end(), [&](const Tok &T) {
488 return T.Offset <= End;
491 // Aggregate selectedness of tokens in range.
492 bool ExtendsOutsideSelection = Begin < SelectedSpelled.front().Offset ||
493 End > SelectedSpelled.back().Offset;
494 SelectionTree::Selection Result =
495 ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
496 for (auto It = B; It != E; ++It)
497 update(Result, It->Selected);
498 return Result;
501 // Is the token at `Offset` selected?
502 SelectionTree::Selection testToken(unsigned Offset) const {
503 // Outside the selection entirely?
504 if (Offset < SelectedSpelled.front().Offset ||
505 Offset > SelectedSpelled.back().Offset)
506 return SelectionTree::Unselected;
507 // Find the token, if it exists.
508 auto It = llvm::partition_point(
509 SelectedSpelled, [&](const Tok &T) { return T.Offset < Offset; });
510 if (It != SelectedSpelled.end() && It->Offset == Offset)
511 return It->Selected;
512 return NoTokens;
515 // Decomposes Loc and returns the offset if the file ID is SelFile.
516 std::optional<unsigned> offsetInSelFile(SourceLocation Loc) const {
517 // Decoding Loc with SM.getDecomposedLoc is relatively expensive.
518 // But SourceLocations for a file are numerically contiguous, so we
519 // can use cheap integer operations instead.
520 if (Loc < SelFileBounds.getBegin() || Loc >= SelFileBounds.getEnd())
521 return std::nullopt;
522 // FIXME: subtracting getRawEncoding() is dubious, move this logic into SM.
523 return Loc.getRawEncoding() - SelFileBounds.getBegin().getRawEncoding();
526 SourceLocation getExpansionStart(SourceLocation Loc) const {
527 while (Loc.isMacroID())
528 Loc = SM.getImmediateExpansionRange(Loc).getBegin();
529 return Loc;
532 struct Tok {
533 unsigned Offset;
534 SelectionTree::Selection Selected;
536 std::vector<Tok> SelectedSpelled;
537 llvm::ArrayRef<syntax::Token> MaybeSelectedExpanded;
538 FileID SelFile;
539 SourceRange SelFileBounds;
540 const SourceManager &SM;
543 // Show the type of a node for debugging.
544 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
545 if (const TypeLoc *TL = N.get<TypeLoc>()) {
546 // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
547 // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
548 if (TL->getTypeLocClass() == TypeLoc::Qualified)
549 OS << "QualifiedTypeLoc";
550 else
551 OS << TL->getType()->getTypeClassName() << "TypeLoc";
552 } else {
553 OS << N.getNodeKind().asStringRef();
557 #ifndef NDEBUG
558 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
559 std::string S;
560 llvm::raw_string_ostream OS(S);
561 printNodeKind(OS, N);
562 return std::move(OS.str());
564 #endif
566 bool isImplicit(const Stmt *S) {
567 // Some Stmts are implicit and shouldn't be traversed, but there's no
568 // "implicit" attribute on Stmt/Expr.
569 // Unwrap implicit casts first if present (other nodes too?).
570 if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
571 S = ICE->getSubExprAsWritten();
572 // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
573 // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
574 if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
575 if (CTI->isImplicit())
576 return true;
577 // Make sure implicit access of anonymous structs don't end up owning tokens.
578 if (auto *ME = llvm::dyn_cast<MemberExpr>(S)) {
579 if (auto *FD = llvm::dyn_cast<FieldDecl>(ME->getMemberDecl()))
580 if (FD->isAnonymousStructOrUnion())
581 // If Base is an implicit CXXThis, then the whole MemberExpr has no
582 // tokens. If it's a normal e.g. DeclRef, we treat the MemberExpr like
583 // an implicit cast.
584 return isImplicit(ME->getBase());
586 // Refs to operator() and [] are (almost?) always implicit as part of calls.
587 if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
588 if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
589 switch (FD->getOverloadedOperator()) {
590 case OO_Call:
591 case OO_Subscript:
592 return true;
593 default:
594 break;
598 return false;
601 // We find the selection by visiting written nodes in the AST, looking for nodes
602 // that intersect with the selected character range.
604 // While traversing, we maintain a parent stack. As nodes pop off the stack,
605 // we decide whether to keep them or not. To be kept, they must either be
606 // selected or contain some nodes that are.
608 // For simple cases (not inside macros) we prune subtrees that don't intersect.
609 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
610 public:
611 // Runs the visitor to gather selected nodes and their ancestors.
612 // If there is any selection, the root (TUDecl) is the first node.
613 static std::deque<Node> collect(ASTContext &AST,
614 const syntax::TokenBuffer &Tokens,
615 const PrintingPolicy &PP, unsigned Begin,
616 unsigned End, FileID File) {
617 SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
618 V.TraverseAST(AST);
619 assert(V.Stack.size() == 1 && "Unpaired push/pop?");
620 assert(V.Stack.top() == &V.Nodes.front());
621 return std::move(V.Nodes);
624 // We traverse all "well-behaved" nodes the same way:
625 // - push the node onto the stack
626 // - traverse its children recursively
627 // - pop it from the stack
628 // - hit testing: is intersection(node, selection) - union(children) empty?
629 // - attach it to the tree if it or any children hit the selection
631 // Two categories of nodes are not "well-behaved":
632 // - those without source range information, we don't record those
633 // - those that can't be stored in DynTypedNode.
634 bool TraverseDecl(Decl *X) {
635 if (llvm::isa_and_nonnull<TranslationUnitDecl>(X))
636 return Base::TraverseDecl(X); // Already pushed by constructor.
637 // Base::TraverseDecl will suppress children, but not this node itself.
638 if (X && X->isImplicit()) {
639 // Most implicit nodes have only implicit children and can be skipped.
640 // However there are exceptions (`void foo(Concept auto x)`), and
641 // the base implementation knows how to find them.
642 return Base::TraverseDecl(X);
644 return traverseNode(X, [&] { return Base::TraverseDecl(X); });
646 bool TraverseTypeLoc(TypeLoc X) {
647 return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
649 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) {
650 return traverseNode(&X,
651 [&] { return Base::TraverseTemplateArgumentLoc(X); });
653 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
654 return traverseNode(
655 &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
657 bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
658 return traverseNode(
659 X, [&] { return Base::TraverseConstructorInitializer(X); });
661 bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &X) {
662 return traverseNode(&X, [&] { return Base::TraverseCXXBaseSpecifier(X); });
664 bool TraverseAttr(Attr *X) {
665 return traverseNode(X, [&] { return Base::TraverseAttr(X); });
667 bool TraverseConceptReference(ConceptReference *X) {
668 return traverseNode(X, [&] { return Base::TraverseConceptReference(X); });
670 // Stmt is the same, but this form allows the data recursion optimization.
671 bool dataTraverseStmtPre(Stmt *X) {
672 if (!X || isImplicit(X))
673 return false;
674 auto N = DynTypedNode::create(*X);
675 if (canSafelySkipNode(N))
676 return false;
677 push(std::move(N));
678 if (shouldSkipChildren(X)) {
679 pop();
680 return false;
682 return true;
684 bool dataTraverseStmtPost(Stmt *X) {
685 pop();
686 return true;
688 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
689 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
690 // This means we'd never see 'int' in 'const int'! Work around that here.
691 // (The reason for the behavior is to avoid traversing the nested Type twice,
692 // but we ignore TraverseType anyway).
693 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
694 return traverseNode<TypeLoc>(
695 &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
697 bool TraverseObjCProtocolLoc(ObjCProtocolLoc PL) {
698 return traverseNode(&PL, [&] { return Base::TraverseObjCProtocolLoc(PL); });
700 // Uninteresting parts of the AST that don't have locations within them.
701 bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
702 bool TraverseType(QualType) { return true; }
704 // The DeclStmt for the loop variable claims to cover the whole range
705 // inside the parens, this causes the range-init expression to not be hit.
706 // Traverse the loop VarDecl instead, which has the right source range.
707 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
708 return traverseNode(S, [&] {
709 return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
710 TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
713 // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
714 bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
715 return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
717 // We only want to traverse the *syntactic form* to understand the selection.
718 bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
719 return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
721 bool TraverseTypeConstraint(const TypeConstraint *C) {
722 if (auto *E = C->getImmediatelyDeclaredConstraint()) {
723 // Technically this expression is 'implicit' and not traversed by the RAV.
724 // However, the range is correct, so we visit expression to avoid adding
725 // an extra kind to 'DynTypeNode' that hold 'TypeConstraint'.
726 return TraverseStmt(E);
728 return Base::TraverseTypeConstraint(C);
731 // Override child traversal for certain node types.
732 using RecursiveASTVisitor::getStmtChildren;
733 // PredefinedExpr like __func__ has a StringLiteral child for its value.
734 // It's not written, so don't traverse it.
735 Stmt::child_range getStmtChildren(PredefinedExpr *) {
736 return {StmtIterator{}, StmtIterator{}};
739 private:
740 using Base = RecursiveASTVisitor<SelectionVisitor>;
742 SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
743 const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
744 FileID SelFile)
745 : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
746 #ifndef NDEBUG
747 PrintPolicy(PP),
748 #endif
749 TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
750 UnclaimedExpandedTokens(Tokens.expandedTokens()) {
751 // Ensure we have a node for the TU decl, regardless of traversal scope.
752 Nodes.emplace_back();
753 Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
754 Nodes.back().Parent = nullptr;
755 Nodes.back().Selected = SelectionTree::Unselected;
756 Stack.push(&Nodes.back());
759 // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
760 // Node is always a pointer so the generic code can handle any null checks.
761 template <typename T, typename Func>
762 bool traverseNode(T *Node, const Func &Body) {
763 if (Node == nullptr)
764 return true;
765 auto N = DynTypedNode::create(*Node);
766 if (canSafelySkipNode(N))
767 return true;
768 push(DynTypedNode::create(*Node));
769 bool Ret = Body();
770 pop();
771 return Ret;
774 // HIT TESTING
776 // We do rough hit testing on the way down the tree to avoid traversing
777 // subtrees that don't touch the selection (canSafelySkipNode), but
778 // fine-grained hit-testing is mostly done on the way back up (in pop()).
779 // This means children get to claim parts of the selection first, and parents
780 // are only selected if they own tokens that no child owned.
782 // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
783 // parent's, and a node (transitively) owns all tokens in its range.
785 // Exception 1: when declarators nest, *inner* declarator is the *outer* type.
786 // e.g. void foo[5](int) is an array of functions.
787 // To handle this case, declarators are careful to only claim the tokens they
788 // own, rather than claim a range and rely on claim ordering.
790 // Exception 2: siblings both claim the same node.
791 // e.g. `int x, y;` produces two sibling VarDecls.
792 // ~~~~~ x
793 // ~~~~~~~~ y
794 // Here the first ("leftmost") sibling claims the tokens it wants, and the
795 // other sibling gets what's left. So selecting "int" only includes the left
796 // VarDecl in the selection tree.
798 // An optimization for a common case: nodes outside macro expansions that
799 // don't intersect the selection may be recursively skipped.
800 bool canSafelySkipNode(const DynTypedNode &N) {
801 SourceRange S = getSourceRange(N);
802 if (auto *TL = N.get<TypeLoc>()) {
803 // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile
804 // heuristics. We should consider only pruning critical TypeLoc nodes, to
805 // be more robust.
807 // AttributedTypeLoc may point to the attribute's range, NOT the modified
808 // type's range.
809 if (auto AT = TL->getAs<AttributedTypeLoc>())
810 S = AT.getModifiedLoc().getSourceRange();
812 // SourceRange often doesn't manage to accurately cover attributes.
813 // Fortunately, attributes are rare.
814 if (llvm::any_of(getAttributes(N),
815 [](const Attr *A) { return !A->isImplicit(); }))
816 return false;
817 if (!SelChecker.mayHit(S)) {
818 dlog("{2}skip: {0} {1}", printNodeToString(N, PrintPolicy),
819 S.printToString(SM), indent());
820 return true;
822 return false;
825 // There are certain nodes we want to treat as leaves in the SelectionTree,
826 // although they do have children.
827 bool shouldSkipChildren(const Stmt *X) const {
828 // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
829 // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
830 // So we treat UserDefinedLiteral as a leaf node, owning the token.
831 return llvm::isa<UserDefinedLiteral>(X);
834 // Pushes a node onto the ancestor stack. Pairs with pop().
835 // Performs early hit detection for some nodes (on the earlySourceRange).
836 void push(DynTypedNode Node) {
837 SourceRange Early = earlySourceRange(Node);
838 dlog("{2}push: {0} {1}", printNodeToString(Node, PrintPolicy),
839 Node.getSourceRange().printToString(SM), indent());
840 Nodes.emplace_back();
841 Nodes.back().ASTNode = std::move(Node);
842 Nodes.back().Parent = Stack.top();
843 Nodes.back().Selected = NoTokens;
844 Stack.push(&Nodes.back());
845 claimRange(Early, Nodes.back().Selected);
848 // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
849 // Performs primary hit detection.
850 void pop() {
851 Node &N = *Stack.top();
852 dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
853 claimTokensFor(N.ASTNode, N.Selected);
854 if (N.Selected == NoTokens)
855 N.Selected = SelectionTree::Unselected;
856 if (N.Selected || !N.Children.empty()) {
857 // Attach to the tree.
858 N.Parent->Children.push_back(&N);
859 } else {
860 // Neither N any children are selected, it doesn't belong in the tree.
861 assert(&N == &Nodes.back());
862 Nodes.pop_back();
864 Stack.pop();
867 // Returns the range of tokens that this node will claim directly, and
868 // is not available to the node's children.
869 // Usually empty, but sometimes children cover tokens but shouldn't own them.
870 SourceRange earlySourceRange(const DynTypedNode &N) {
871 if (const Decl *VD = N.get<VarDecl>()) {
872 // We want the name in the var-decl to be claimed by the decl itself and
873 // not by any children. Ususally, we don't need this, because source
874 // ranges of children are not overlapped with their parent's.
875 // An exception is lambda captured var decl, where AutoTypeLoc is
876 // overlapped with the name loc.
877 // auto fun = [bar = foo]() { ... }
878 // ~~~~~~~~~ VarDecl
879 // ~~~ |- AutoTypeLoc
880 return VD->getLocation();
883 // When referring to a destructor ~Foo(), attribute Foo to the destructor
884 // rather than the TypeLoc nested inside it.
885 // We still traverse the TypeLoc, because it may contain other targeted
886 // things like the T in ~Foo<T>().
887 if (const auto *CDD = N.get<CXXDestructorDecl>())
888 return CDD->getNameInfo().getNamedTypeInfo()->getTypeLoc().getBeginLoc();
889 if (const auto *ME = N.get<MemberExpr>()) {
890 auto NameInfo = ME->getMemberNameInfo();
891 if (NameInfo.getName().getNameKind() ==
892 DeclarationName::CXXDestructorName)
893 return NameInfo.getNamedTypeInfo()->getTypeLoc().getBeginLoc();
896 return SourceRange();
899 // Claim tokens for N, after processing its children.
900 // By default this claims all unclaimed tokens in getSourceRange().
901 // We override this if we want to claim fewer tokens (e.g. there are gaps).
902 void claimTokensFor(const DynTypedNode &N, SelectionTree::Selection &Result) {
903 // CXXConstructExpr often shows implicit construction, like `string s;`.
904 // Don't associate any tokens with it unless there's some syntax like {}.
905 // This prevents it from claiming 's', its primary location.
906 if (const auto *CCE = N.get<CXXConstructExpr>()) {
907 claimRange(CCE->getParenOrBraceRange(), Result);
908 return;
910 // ExprWithCleanups is always implicit. It often wraps CXXConstructExpr.
911 // Prevent it claiming 's' in the case above.
912 if (N.get<ExprWithCleanups>())
913 return;
915 // Declarators nest "inside out", with parent types inside child ones.
916 // Instead of claiming the whole range (clobbering parent tokens), carefully
917 // claim the tokens owned by this node and non-declarator children.
918 // (We could manipulate traversal order instead, but this is easier).
920 // Non-declarator types nest normally, and are handled like other nodes.
922 // Example:
923 // Vec<R<int>(*[2])(A<char>)> is a Vec of arrays of pointers to functions,
924 // which accept A<char> and return R<int>.
925 // The TypeLoc hierarchy:
926 // Vec<R<int>(*[2])(A<char>)> m;
927 // Vec<#####################> TemplateSpecialization Vec
928 // --------[2]---------- `-Array
929 // -------*------------- `-Pointer
930 // ------(----)--------- `-Paren
931 // ------------(#######) `-Function
932 // R<###> |-TemplateSpecialization R
933 // int | `-Builtin int
934 // A<####> `-TemplateSpecialization A
935 // char `-Builtin char
937 // In each row
938 // --- represents unclaimed parts of the SourceRange.
939 // ### represents parts that children already claimed.
940 if (const auto *TL = N.get<TypeLoc>()) {
941 if (auto PTL = TL->getAs<ParenTypeLoc>()) {
942 claimRange(PTL.getLParenLoc(), Result);
943 claimRange(PTL.getRParenLoc(), Result);
944 return;
946 if (auto ATL = TL->getAs<ArrayTypeLoc>()) {
947 claimRange(ATL.getBracketsRange(), Result);
948 return;
950 if (auto PTL = TL->getAs<PointerTypeLoc>()) {
951 claimRange(PTL.getStarLoc(), Result);
952 return;
954 if (auto FTL = TL->getAs<FunctionTypeLoc>()) {
955 claimRange(SourceRange(FTL.getLParenLoc(), FTL.getEndLoc()), Result);
956 return;
959 claimRange(getSourceRange(N), Result);
962 // Perform hit-testing of a complete Node against the selection.
963 // This runs for every node in the AST, and must be fast in common cases.
964 // This is usually called from pop(), so we can take children into account.
965 // The existing state of Result is relevant.
966 void claimRange(SourceRange S, SelectionTree::Selection &Result) {
967 for (const auto &ClaimedRange :
968 UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
969 update(Result, SelChecker.test(ClaimedRange));
971 if (Result && Result != NoTokens)
972 dlog("{1}hit selection: {0}", S.printToString(SM), indent());
975 std::string indent(int Offset = 0) {
976 // Cast for signed arithmetic.
977 int Amount = int(Stack.size()) + Offset;
978 assert(Amount >= 0);
979 return std::string(Amount, ' ');
982 SourceManager &SM;
983 const LangOptions &LangOpts;
984 #ifndef NDEBUG
985 const PrintingPolicy &PrintPolicy;
986 #endif
987 const syntax::TokenBuffer &TokenBuf;
988 std::stack<Node *> Stack;
989 SelectionTester SelChecker;
990 IntervalSet<syntax::Token> UnclaimedExpandedTokens;
991 std::deque<Node> Nodes; // Stable pointers as we add more nodes.
994 } // namespace
996 llvm::SmallString<256> abbreviatedString(DynTypedNode N,
997 const PrintingPolicy &PP) {
998 llvm::SmallString<256> Result;
1000 llvm::raw_svector_ostream OS(Result);
1001 N.print(OS, PP);
1003 auto Pos = Result.find('\n');
1004 if (Pos != llvm::StringRef::npos) {
1005 bool MoreText = !llvm::all_of(Result.str().drop_front(Pos), llvm::isSpace);
1006 Result.resize(Pos);
1007 if (MoreText)
1008 Result.append(" …");
1010 return Result;
1013 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
1014 int Indent) const {
1015 if (N.Selected)
1016 OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
1017 : '.');
1018 else
1019 OS.indent(Indent);
1020 printNodeKind(OS, N.ASTNode);
1021 OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
1022 for (const Node *Child : N.Children)
1023 print(OS, *Child, Indent + 2);
1026 std::string SelectionTree::Node::kind() const {
1027 std::string S;
1028 llvm::raw_string_ostream OS(S);
1029 printNodeKind(OS, ASTNode);
1030 return std::move(OS.str());
1033 // Decide which selections emulate a "point" query in between characters.
1034 // If it's ambiguous (the neighboring characters are selectable tokens), returns
1035 // both possibilities in preference order.
1036 // Always returns at least one range - if no tokens touched, and empty range.
1037 static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
1038 pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
1039 const auto &SM = Tokens.sourceManager();
1040 SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
1041 llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
1042 // Prefer right token over left.
1043 for (const syntax::Token &Tok :
1044 llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
1045 if (shouldIgnore(Tok))
1046 continue;
1047 unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
1048 Result.emplace_back(Offset, Offset + Tok.length());
1050 if (Result.empty())
1051 Result.emplace_back(Offset, Offset);
1052 return Result;
1055 bool SelectionTree::createEach(ASTContext &AST,
1056 const syntax::TokenBuffer &Tokens,
1057 unsigned Begin, unsigned End,
1058 llvm::function_ref<bool(SelectionTree)> Func) {
1059 if (Begin != End)
1060 return Func(SelectionTree(AST, Tokens, Begin, End));
1061 for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
1062 if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
1063 return true;
1064 return false;
1067 SelectionTree SelectionTree::createRight(ASTContext &AST,
1068 const syntax::TokenBuffer &Tokens,
1069 unsigned int Begin, unsigned int End) {
1070 std::optional<SelectionTree> Result;
1071 createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
1072 Result = std::move(T);
1073 return true;
1075 return std::move(*Result);
1078 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
1079 unsigned Begin, unsigned End)
1080 : PrintPolicy(AST.getLangOpts()) {
1081 // No fundamental reason the selection needs to be in the main file,
1082 // but that's all clangd has needed so far.
1083 const SourceManager &SM = AST.getSourceManager();
1084 FileID FID = SM.getMainFileID();
1085 PrintPolicy.TerseOutput = true;
1086 PrintPolicy.IncludeNewlines = false;
1088 dlog("Computing selection for {0}",
1089 SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
1090 .printToString(SM));
1091 Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
1092 Root = Nodes.empty() ? nullptr : &Nodes.front();
1093 recordMetrics(*this, AST.getLangOpts());
1094 dlog("Built selection tree\n{0}", *this);
1097 const Node *SelectionTree::commonAncestor() const {
1098 const Node *Ancestor = Root;
1099 while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
1100 Ancestor = Ancestor->Children.front();
1101 // Returning nullptr here is a bit unprincipled, but it makes the API safer:
1102 // the TranslationUnitDecl contains all of the preamble, so traversing it is a
1103 // performance cliff. Callers can check for null and use root() if they want.
1104 return Ancestor != Root ? Ancestor : nullptr;
1107 const DeclContext &SelectionTree::Node::getDeclContext() const {
1108 for (const Node *CurrentNode = this; CurrentNode != nullptr;
1109 CurrentNode = CurrentNode->Parent) {
1110 if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
1111 if (CurrentNode != this)
1112 if (auto *DC = dyn_cast<DeclContext>(Current))
1113 return *DC;
1114 return *Current->getLexicalDeclContext();
1117 llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
1120 const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
1121 if (Children.size() == 1 &&
1122 getSourceRange(Children.front()->ASTNode) == getSourceRange(ASTNode))
1123 return Children.front()->ignoreImplicit();
1124 return *this;
1127 const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
1128 if (Parent && getSourceRange(Parent->ASTNode) == getSourceRange(ASTNode))
1129 return Parent->outerImplicit();
1130 return *this;
1133 } // namespace clangd
1134 } // namespace clang