1 //===--- Selection.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "support/Logger.h"
12 #include "support/Trace.h"
13 #include "clang/AST/ASTConcept.h"
14 #include "clang/AST/ASTTypeTraits.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/PrettyPrinter.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/OperatorKinds.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Tooling/Syntax/Tokens.h"
28 #include "llvm/ADT/BitVector.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/raw_ostream.h"
41 using Node
= SelectionTree::Node
;
43 // Measure the fraction of selections that were enabled by recovery AST.
44 void recordMetrics(const SelectionTree
&S
, const LangOptions
&Lang
) {
45 if (!trace::enabled())
47 const char *LanguageLabel
= Lang
.CPlusPlus
? "C++" : Lang
.ObjC
? "ObjC" : "C";
48 static constexpr trace::Metric
SelectionUsedRecovery(
49 "selection_recovery", trace::Metric::Distribution
, "language");
50 static constexpr trace::Metric
RecoveryType(
51 "selection_recovery_type", trace::Metric::Distribution
, "language");
52 const auto *Common
= S
.commonAncestor();
53 for (const auto *N
= Common
; N
; N
= N
->Parent
) {
54 if (const auto *RE
= N
->ASTNode
.get
<RecoveryExpr
>()) {
55 SelectionUsedRecovery
.record(1, LanguageLabel
); // used recovery ast.
56 RecoveryType
.record(RE
->isTypeDependent() ? 0 : 1, LanguageLabel
);
61 SelectionUsedRecovery
.record(0, LanguageLabel
); // unused.
64 // Return the range covering a node and all its children.
65 SourceRange
getSourceRange(const DynTypedNode
&N
) {
66 // MemberExprs to implicitly access anonymous fields should not claim any
67 // tokens for themselves. Given:
68 // struct A { struct { int b; }; };
69 // The clang AST reports the following nodes for an access to b:
71 // [----] MemberExpr, base = A().<anonymous>, member = b
72 // [----] MemberExpr: base = A(), member = <anonymous>
73 // [-] CXXConstructExpr
74 // For our purposes, we don't want the second MemberExpr to own any tokens,
75 // so we reduce its range to match the CXXConstructExpr.
76 // (It's not clear that changing the clang AST would be correct in general).
77 if (const auto *ME
= N
.get
<MemberExpr
>()) {
78 if (!ME
->getMemberDecl()->getDeclName())
80 ? getSourceRange(DynTypedNode::create(*ME
->getBase()))
83 return N
.getSourceRange();
86 // An IntervalSet maintains a set of disjoint subranges of an array.
88 // Initially, it contains the entire array.
89 // [-----------------------------------------------------------]
91 // When a range is erased(), it will typically split the array in two.
92 // Claim: [--------------------]
93 // after: [----------------] [-------------------]
95 // erase() returns the segments actually erased. Given the state above:
96 // Claim: [---------------------------------------]
97 // Out: [---------] [------]
98 // After: [-----] [-----------]
100 // It is used to track (expanded) tokens not yet associated with an AST node.
101 // On traversing an AST node, its token range is erased from the unclaimed set.
102 // The tokens actually removed are associated with that node, and hit-tested
103 // against the selection to determine whether the node is selected.
104 template <typename T
> class IntervalSet
{
106 IntervalSet(llvm::ArrayRef
<T
> Range
) { UnclaimedRanges
.insert(Range
); }
108 // Removes the elements of Claim from the set, modifying or removing ranges
110 // Returns the continuous subranges of Claim that were actually removed.
111 llvm::SmallVector
<llvm::ArrayRef
<T
>> erase(llvm::ArrayRef
<T
> Claim
) {
112 llvm::SmallVector
<llvm::ArrayRef
<T
>> Out
;
117 // Claim: [-----------------]
118 // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
119 // Overlap: ^first ^second
120 // Ranges C and D are fully included. Ranges B and E must be trimmed.
121 auto Overlap
= std::make_pair(
122 UnclaimedRanges
.lower_bound({Claim
.begin(), Claim
.begin()}), // C
123 UnclaimedRanges
.lower_bound({Claim
.end(), Claim
.end()})); // F
124 // Rewind to cover B.
125 if (Overlap
.first
!= UnclaimedRanges
.begin()) {
127 // ...unless B isn't selected at all.
128 if (Overlap
.first
->end() <= Claim
.begin())
131 if (Overlap
.first
== Overlap
.second
)
134 // First, copy all overlapping ranges into the output.
135 auto OutFirst
= Out
.insert(Out
.end(), Overlap
.first
, Overlap
.second
);
136 // If any of the overlapping ranges were sliced by the claim, split them:
137 // - restrict the returned range to the claimed part
138 // - save the unclaimed part so it can be reinserted
139 llvm::ArrayRef
<T
> RemainingHead
, RemainingTail
;
140 if (Claim
.begin() > OutFirst
->begin()) {
141 RemainingHead
= {OutFirst
->begin(), Claim
.begin()};
142 *OutFirst
= {Claim
.begin(), OutFirst
->end()};
144 if (Claim
.end() < Out
.back().end()) {
145 RemainingTail
= {Claim
.end(), Out
.back().end()};
146 Out
.back() = {Out
.back().begin(), Claim
.end()};
149 // Erase all the overlapping ranges (invalidating all iterators).
150 UnclaimedRanges
.erase(Overlap
.first
, Overlap
.second
);
151 // Reinsert ranges that were merely trimmed.
152 if (!RemainingHead
.empty())
153 UnclaimedRanges
.insert(RemainingHead
);
154 if (!RemainingTail
.empty())
155 UnclaimedRanges
.insert(RemainingTail
);
161 using TokenRange
= llvm::ArrayRef
<T
>;
163 bool operator()(llvm::ArrayRef
<T
> L
, llvm::ArrayRef
<T
> R
) const {
164 return L
.begin() < R
.begin();
168 // Disjoint sorted unclaimed ranges of expanded tokens.
169 std::set
<llvm::ArrayRef
<T
>, RangeLess
> UnclaimedRanges
;
172 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
173 // This resolves to Unselected if no tokens are ever seen.
174 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
175 // This value is never exposed publicly.
176 constexpr SelectionTree::Selection NoTokens
=
177 static_cast<SelectionTree::Selection
>(
178 static_cast<unsigned char>(SelectionTree::Complete
+ 1));
180 // Nodes start with NoTokens, and then use this function to aggregate the
181 // selectedness as more tokens are found.
182 void update(SelectionTree::Selection
&Result
, SelectionTree::Selection New
) {
185 if (Result
== NoTokens
)
187 else if (Result
!= New
)
188 // Can only be completely selected (or unselected) if all tokens are.
189 Result
= SelectionTree::Partial
;
192 // As well as comments, don't count semicolons as real tokens.
193 // They're not properly claimed as expr-statement is missing from the AST.
194 bool shouldIgnore(const syntax::Token
&Tok
) {
195 switch (Tok
.kind()) {
196 // Even "attached" comments are not considered part of a node's range.
198 // The AST doesn't directly store locations for terminating semicolons.
200 // We don't have locations for cvr-qualifiers: see QualifiedTypeLoc.
202 case tok::kw_volatile
:
203 case tok::kw_restrict
:
210 // Determine whether 'Target' is the first expansion of the macro
211 // argument whose top-level spelling location is 'SpellingLoc'.
212 bool isFirstExpansion(FileID Target
, SourceLocation SpellingLoc
,
213 const SourceManager
&SM
) {
214 SourceLocation Prev
= SpellingLoc
;
216 // If the arg is expanded multiple times, getMacroArgExpandedLocation()
217 // returns the first expansion.
218 SourceLocation Next
= SM
.getMacroArgExpandedLocation(Prev
);
219 // So if we reach the target, target is the first-expansion of the
220 // first-expansion ...
221 if (SM
.getFileID(Next
) == Target
)
224 // Otherwise, if the FileID stops changing, we've reached the innermost
225 // macro expansion, and Target was on a different branch.
226 if (SM
.getFileID(Next
) == SM
.getFileID(Prev
))
234 // SelectionTester can determine whether a range of tokens from the PP-expanded
235 // stream (corresponding to an AST node) is considered selected.
237 // When the tokens result from macro expansions, the appropriate tokens in the
238 // main file are examined (macro invocation or args). Similarly for #includes.
239 // However, only the first expansion of a given spelled token is considered
242 // It tests each token in the range (not just the endpoints) as contiguous
243 // expanded tokens may not have contiguous spellings (with macros).
245 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
246 // are ignored when determining selectedness.
247 class SelectionTester
{
249 // The selection is offsets [SelBegin, SelEnd) in SelFile.
250 SelectionTester(const syntax::TokenBuffer
&Buf
, FileID SelFile
,
251 unsigned SelBegin
, unsigned SelEnd
, const SourceManager
&SM
)
252 : SelFile(SelFile
), SelFileBounds(SM
.getLocForStartOfFile(SelFile
),
253 SM
.getLocForEndOfFile(SelFile
)),
255 // Find all tokens (partially) selected in the file.
256 auto AllSpelledTokens
= Buf
.spelledTokens(SelFile
);
257 const syntax::Token
*SelFirst
=
258 llvm::partition_point(AllSpelledTokens
, [&](const syntax::Token
&Tok
) {
259 return SM
.getFileOffset(Tok
.endLocation()) <= SelBegin
;
261 const syntax::Token
*SelLimit
= std::partition_point(
262 SelFirst
, AllSpelledTokens
.end(), [&](const syntax::Token
&Tok
) {
263 return SM
.getFileOffset(Tok
.location()) < SelEnd
;
265 auto Sel
= llvm::ArrayRef(SelFirst
, SelLimit
);
266 // Find which of these are preprocessed to nothing and should be ignored.
267 llvm::BitVector
PPIgnored(Sel
.size(), false);
268 for (const syntax::TokenBuffer::Expansion
&X
:
269 Buf
.expansionsOverlapping(Sel
)) {
270 if (X
.Expanded
.empty()) {
271 for (const syntax::Token
&Tok
: X
.Spelled
) {
272 if (&Tok
>= SelFirst
&& &Tok
< SelLimit
)
273 PPIgnored
[&Tok
- SelFirst
] = true;
277 // Precompute selectedness and offset for selected spelled tokens.
278 for (unsigned I
= 0; I
< Sel
.size(); ++I
) {
279 if (shouldIgnore(Sel
[I
]) || PPIgnored
[I
])
281 SelectedSpelled
.emplace_back();
282 Tok
&S
= SelectedSpelled
.back();
283 S
.Offset
= SM
.getFileOffset(Sel
[I
].location());
284 if (S
.Offset
>= SelBegin
&& S
.Offset
+ Sel
[I
].length() <= SelEnd
)
285 S
.Selected
= SelectionTree::Complete
;
287 S
.Selected
= SelectionTree::Partial
;
289 MaybeSelectedExpanded
= computeMaybeSelectedExpandedTokens(Buf
);
292 // Test whether a consecutive range of tokens is selected.
293 // The tokens are taken from the expanded token stream.
294 SelectionTree::Selection
295 test(llvm::ArrayRef
<syntax::Token
> ExpandedTokens
) const {
296 if (ExpandedTokens
.empty())
298 if (SelectedSpelled
.empty())
299 return SelectionTree::Unselected
;
300 // Cheap (pointer) check whether any of the tokens could touch selection.
301 // In most cases, the node's overall source range touches ExpandedTokens,
302 // or we would have failed mayHit(). However now we're only considering
303 // the *unclaimed* spans of expanded tokens.
304 // This is a significant performance improvement when a lot of nodes
305 // surround the selection, including when generated by macros.
306 if (MaybeSelectedExpanded
.empty() ||
307 &ExpandedTokens
.front() > &MaybeSelectedExpanded
.back() ||
308 &ExpandedTokens
.back() < &MaybeSelectedExpanded
.front()) {
309 return SelectionTree::Unselected
;
312 // The eof token is used as a sentinel.
313 // In general, source range from an AST node should not claim the eof token,
314 // but it could occur for unmatched-bracket cases.
315 // FIXME: fix it in TokenBuffer, expandedTokens(SourceRange) should not
316 // return the eof token.
317 if (ExpandedTokens
.back().kind() == tok::eof
)
318 ExpandedTokens
= ExpandedTokens
.drop_back();
320 SelectionTree::Selection Result
= NoTokens
;
321 while (!ExpandedTokens
.empty()) {
322 // Take consecutive tokens from the same context together for efficiency.
323 SourceLocation Start
= ExpandedTokens
.front().location();
324 FileID FID
= SM
.getFileID(Start
);
325 // Comparing SourceLocations against bounds is cheaper than getFileID().
326 SourceLocation Limit
= SM
.getComposedLoc(FID
, SM
.getFileIDSize(FID
));
327 auto Batch
= ExpandedTokens
.take_while([&](const syntax::Token
&T
) {
328 return T
.location() >= Start
&& T
.location() < Limit
;
330 assert(!Batch
.empty());
331 ExpandedTokens
= ExpandedTokens
.drop_front(Batch
.size());
333 update(Result
, testChunk(FID
, Batch
));
338 // Cheap check whether any of the tokens in R might be selected.
339 // If it returns false, test() will return NoTokens or Unselected.
340 // If it returns true, test() may return any value.
341 bool mayHit(SourceRange R
) const {
342 if (SelectedSpelled
.empty() || MaybeSelectedExpanded
.empty())
344 // If the node starts after the selection ends, it is not selected.
345 // Tokens a macro location might claim are >= its expansion start.
346 // So if the expansion start > last selected token, we can prune it.
347 // (This is particularly helpful for GTest's TEST macro).
348 if (auto B
= offsetInSelFile(getExpansionStart(R
.getBegin())))
349 if (*B
> SelectedSpelled
.back().Offset
)
351 // If the node ends before the selection begins, it is not selected.
352 SourceLocation EndLoc
= R
.getEnd();
353 while (EndLoc
.isMacroID())
354 EndLoc
= SM
.getImmediateExpansionRange(EndLoc
).getEnd();
355 // In the rare case that the expansion range is a char range, EndLoc is
356 // ~one token too far to the right. We may fail to prune, that's OK.
357 if (auto E
= offsetInSelFile(EndLoc
))
358 if (*E
< SelectedSpelled
.front().Offset
)
364 // Plausible expanded tokens that might be affected by the selection.
365 // This is an overestimate, it may contain tokens that are not selected.
366 // The point is to allow cheap pruning in test()
367 llvm::ArrayRef
<syntax::Token
>
368 computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer
&Toks
) {
369 if (SelectedSpelled
.empty())
372 auto LastAffectedToken
= [&](SourceLocation Loc
) {
373 auto Offset
= offsetInSelFile(Loc
);
374 while (Loc
.isValid() && !Offset
) {
375 Loc
= Loc
.isMacroID() ? SM
.getImmediateExpansionRange(Loc
).getEnd()
376 : SM
.getIncludeLoc(SM
.getFileID(Loc
));
377 Offset
= offsetInSelFile(Loc
);
381 auto FirstAffectedToken
= [&](SourceLocation Loc
) {
382 auto Offset
= offsetInSelFile(Loc
);
383 while (Loc
.isValid() && !Offset
) {
384 Loc
= Loc
.isMacroID() ? SM
.getImmediateExpansionRange(Loc
).getBegin()
385 : SM
.getIncludeLoc(SM
.getFileID(Loc
));
386 Offset
= offsetInSelFile(Loc
);
391 const syntax::Token
*Start
= llvm::partition_point(
392 Toks
.expandedTokens(),
393 [&, First
= SelectedSpelled
.front().Offset
](const syntax::Token
&Tok
) {
394 if (Tok
.kind() == tok::eof
)
396 // Implausible if upperbound(Tok) < First.
397 if (auto Offset
= LastAffectedToken(Tok
.location()))
398 return *Offset
< First
;
399 // A prefix of the expanded tokens may be from an implicit
400 // inclusion (e.g. preamble patch, or command-line -include).
404 bool EndInvalid
= false;
405 const syntax::Token
*End
= std::partition_point(
406 Start
, Toks
.expandedTokens().end(),
407 [&, Last
= SelectedSpelled
.back().Offset
](const syntax::Token
&Tok
) {
408 if (Tok
.kind() == tok::eof
)
410 // Plausible if lowerbound(Tok) <= Last.
411 if (auto Offset
= FirstAffectedToken(Tok
.location()))
412 return *Offset
<= Last
;
413 // Shouldn't happen: once we've seen tokens traceable to the main
414 // file, there shouldn't be any more implicit inclusions.
415 assert(false && "Expanded token could not be resolved to main file!");
417 return true; // conservatively assume this token can overlap
420 End
= Toks
.expandedTokens().end();
422 return llvm::ArrayRef(Start
, End
);
425 // Hit-test a consecutive range of tokens from a single file ID.
426 SelectionTree::Selection
427 testChunk(FileID FID
, llvm::ArrayRef
<syntax::Token
> Batch
) const {
428 assert(!Batch
.empty());
429 SourceLocation StartLoc
= Batch
.front().location();
430 // There are several possible categories of FileID depending on how the
431 // preprocessor was used to generate these tokens:
432 // main file, #included file, macro args, macro bodies.
433 // We need to identify the main-file tokens that represent Batch, and
434 // determine whether we want to exclusively claim them. Regular tokens
435 // represent one AST construct, but a macro invocation can represent many.
437 // Handle tokens written directly in the main file.
438 if (FID
== SelFile
) {
439 return testTokenRange(*offsetInSelFile(Batch
.front().location()),
440 *offsetInSelFile(Batch
.back().location()));
443 // Handle tokens in another file #included into the main file.
444 // Check if the #include is selected, but don't claim it exclusively.
445 if (StartLoc
.isFileID()) {
446 for (SourceLocation Loc
= Batch
.front().location(); Loc
.isValid();
447 Loc
= SM
.getIncludeLoc(SM
.getFileID(Loc
))) {
448 if (auto Offset
= offsetInSelFile(Loc
))
449 // FIXME: use whole #include directive, not just the filename string.
450 return testToken(*Offset
);
455 assert(StartLoc
.isMacroID());
456 // Handle tokens that were passed as a macro argument.
457 SourceLocation ArgStart
= SM
.getTopMacroCallerLoc(StartLoc
);
458 if (auto ArgOffset
= offsetInSelFile(ArgStart
)) {
459 if (isFirstExpansion(FID
, ArgStart
, SM
)) {
460 SourceLocation ArgEnd
=
461 SM
.getTopMacroCallerLoc(Batch
.back().location());
462 return testTokenRange(*ArgOffset
, *offsetInSelFile(ArgEnd
));
463 } else { // NOLINT(llvm-else-after-return)
464 /* fall through and treat as part of the macro body */
468 // Handle tokens produced by non-argument macro expansion.
469 // Check if the macro name is selected, don't claim it exclusively.
470 if (auto ExpansionOffset
= offsetInSelFile(getExpansionStart(StartLoc
)))
471 // FIXME: also check ( and ) for function-like macros?
472 return testToken(*ExpansionOffset
);
476 // Is the closed token range [Begin, End] selected?
477 SelectionTree::Selection
testTokenRange(unsigned Begin
, unsigned End
) const {
478 assert(Begin
<= End
);
479 // Outside the selection entirely?
480 if (End
< SelectedSpelled
.front().Offset
||
481 Begin
> SelectedSpelled
.back().Offset
)
482 return SelectionTree::Unselected
;
484 // Compute range of tokens.
485 auto B
= llvm::partition_point(
486 SelectedSpelled
, [&](const Tok
&T
) { return T
.Offset
< Begin
; });
487 auto E
= std::partition_point(B
, SelectedSpelled
.end(), [&](const Tok
&T
) {
488 return T
.Offset
<= End
;
491 // Aggregate selectedness of tokens in range.
492 bool ExtendsOutsideSelection
= Begin
< SelectedSpelled
.front().Offset
||
493 End
> SelectedSpelled
.back().Offset
;
494 SelectionTree::Selection Result
=
495 ExtendsOutsideSelection
? SelectionTree::Unselected
: NoTokens
;
496 for (auto It
= B
; It
!= E
; ++It
)
497 update(Result
, It
->Selected
);
501 // Is the token at `Offset` selected?
502 SelectionTree::Selection
testToken(unsigned Offset
) const {
503 // Outside the selection entirely?
504 if (Offset
< SelectedSpelled
.front().Offset
||
505 Offset
> SelectedSpelled
.back().Offset
)
506 return SelectionTree::Unselected
;
507 // Find the token, if it exists.
508 auto It
= llvm::partition_point(
509 SelectedSpelled
, [&](const Tok
&T
) { return T
.Offset
< Offset
; });
510 if (It
!= SelectedSpelled
.end() && It
->Offset
== Offset
)
515 // Decomposes Loc and returns the offset if the file ID is SelFile.
516 std::optional
<unsigned> offsetInSelFile(SourceLocation Loc
) const {
517 // Decoding Loc with SM.getDecomposedLoc is relatively expensive.
518 // But SourceLocations for a file are numerically contiguous, so we
519 // can use cheap integer operations instead.
520 if (Loc
< SelFileBounds
.getBegin() || Loc
>= SelFileBounds
.getEnd())
522 // FIXME: subtracting getRawEncoding() is dubious, move this logic into SM.
523 return Loc
.getRawEncoding() - SelFileBounds
.getBegin().getRawEncoding();
526 SourceLocation
getExpansionStart(SourceLocation Loc
) const {
527 while (Loc
.isMacroID())
528 Loc
= SM
.getImmediateExpansionRange(Loc
).getBegin();
534 SelectionTree::Selection Selected
;
536 std::vector
<Tok
> SelectedSpelled
;
537 llvm::ArrayRef
<syntax::Token
> MaybeSelectedExpanded
;
539 SourceRange SelFileBounds
;
540 const SourceManager
&SM
;
543 // Show the type of a node for debugging.
544 void printNodeKind(llvm::raw_ostream
&OS
, const DynTypedNode
&N
) {
545 if (const TypeLoc
*TL
= N
.get
<TypeLoc
>()) {
546 // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
547 // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
548 if (TL
->getTypeLocClass() == TypeLoc::Qualified
)
549 OS
<< "QualifiedTypeLoc";
551 OS
<< TL
->getType()->getTypeClassName() << "TypeLoc";
553 OS
<< N
.getNodeKind().asStringRef();
558 std::string
printNodeToString(const DynTypedNode
&N
, const PrintingPolicy
&PP
) {
560 llvm::raw_string_ostream
OS(S
);
561 printNodeKind(OS
, N
);
562 return std::move(OS
.str());
566 bool isImplicit(const Stmt
*S
) {
567 // Some Stmts are implicit and shouldn't be traversed, but there's no
568 // "implicit" attribute on Stmt/Expr.
569 // Unwrap implicit casts first if present (other nodes too?).
570 if (auto *ICE
= llvm::dyn_cast
<ImplicitCastExpr
>(S
))
571 S
= ICE
->getSubExprAsWritten();
572 // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
573 // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
574 if (auto *CTI
= llvm::dyn_cast
<CXXThisExpr
>(S
))
575 if (CTI
->isImplicit())
577 // Make sure implicit access of anonymous structs don't end up owning tokens.
578 if (auto *ME
= llvm::dyn_cast
<MemberExpr
>(S
)) {
579 if (auto *FD
= llvm::dyn_cast
<FieldDecl
>(ME
->getMemberDecl()))
580 if (FD
->isAnonymousStructOrUnion())
581 // If Base is an implicit CXXThis, then the whole MemberExpr has no
582 // tokens. If it's a normal e.g. DeclRef, we treat the MemberExpr like
584 return isImplicit(ME
->getBase());
586 // Refs to operator() and [] are (almost?) always implicit as part of calls.
587 if (auto *DRE
= llvm::dyn_cast
<DeclRefExpr
>(S
)) {
588 if (auto *FD
= llvm::dyn_cast
<FunctionDecl
>(DRE
->getDecl())) {
589 switch (FD
->getOverloadedOperator()) {
601 // We find the selection by visiting written nodes in the AST, looking for nodes
602 // that intersect with the selected character range.
604 // While traversing, we maintain a parent stack. As nodes pop off the stack,
605 // we decide whether to keep them or not. To be kept, they must either be
606 // selected or contain some nodes that are.
608 // For simple cases (not inside macros) we prune subtrees that don't intersect.
609 class SelectionVisitor
: public RecursiveASTVisitor
<SelectionVisitor
> {
611 // Runs the visitor to gather selected nodes and their ancestors.
612 // If there is any selection, the root (TUDecl) is the first node.
613 static std::deque
<Node
> collect(ASTContext
&AST
,
614 const syntax::TokenBuffer
&Tokens
,
615 const PrintingPolicy
&PP
, unsigned Begin
,
616 unsigned End
, FileID File
) {
617 SelectionVisitor
V(AST
, Tokens
, PP
, Begin
, End
, File
);
619 assert(V
.Stack
.size() == 1 && "Unpaired push/pop?");
620 assert(V
.Stack
.top() == &V
.Nodes
.front());
621 return std::move(V
.Nodes
);
624 // We traverse all "well-behaved" nodes the same way:
625 // - push the node onto the stack
626 // - traverse its children recursively
627 // - pop it from the stack
628 // - hit testing: is intersection(node, selection) - union(children) empty?
629 // - attach it to the tree if it or any children hit the selection
631 // Two categories of nodes are not "well-behaved":
632 // - those without source range information, we don't record those
633 // - those that can't be stored in DynTypedNode.
634 bool TraverseDecl(Decl
*X
) {
635 if (llvm::isa_and_nonnull
<TranslationUnitDecl
>(X
))
636 return Base::TraverseDecl(X
); // Already pushed by constructor.
637 // Base::TraverseDecl will suppress children, but not this node itself.
638 if (X
&& X
->isImplicit()) {
639 // Most implicit nodes have only implicit children and can be skipped.
640 // However there are exceptions (`void foo(Concept auto x)`), and
641 // the base implementation knows how to find them.
642 return Base::TraverseDecl(X
);
644 return traverseNode(X
, [&] { return Base::TraverseDecl(X
); });
646 bool TraverseTypeLoc(TypeLoc X
) {
647 return traverseNode(&X
, [&] { return Base::TraverseTypeLoc(X
); });
649 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc
&X
) {
650 return traverseNode(&X
,
651 [&] { return Base::TraverseTemplateArgumentLoc(X
); });
653 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X
) {
655 &X
, [&] { return Base::TraverseNestedNameSpecifierLoc(X
); });
657 bool TraverseConstructorInitializer(CXXCtorInitializer
*X
) {
659 X
, [&] { return Base::TraverseConstructorInitializer(X
); });
661 bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier
&X
) {
662 return traverseNode(&X
, [&] { return Base::TraverseCXXBaseSpecifier(X
); });
664 bool TraverseAttr(Attr
*X
) {
665 return traverseNode(X
, [&] { return Base::TraverseAttr(X
); });
667 bool TraverseConceptReference(ConceptReference
*X
) {
668 return traverseNode(X
, [&] { return Base::TraverseConceptReference(X
); });
670 // Stmt is the same, but this form allows the data recursion optimization.
671 bool dataTraverseStmtPre(Stmt
*X
) {
672 if (!X
|| isImplicit(X
))
674 auto N
= DynTypedNode::create(*X
);
675 if (canSafelySkipNode(N
))
678 if (shouldSkipChildren(X
)) {
684 bool dataTraverseStmtPost(Stmt
*X
) {
688 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
689 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
690 // This means we'd never see 'int' in 'const int'! Work around that here.
691 // (The reason for the behavior is to avoid traversing the nested Type twice,
692 // but we ignore TraverseType anyway).
693 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX
) {
694 return traverseNode
<TypeLoc
>(
695 &QX
, [&] { return TraverseTypeLoc(QX
.getUnqualifiedLoc()); });
697 bool TraverseObjCProtocolLoc(ObjCProtocolLoc PL
) {
698 return traverseNode(&PL
, [&] { return Base::TraverseObjCProtocolLoc(PL
); });
700 // Uninteresting parts of the AST that don't have locations within them.
701 bool TraverseNestedNameSpecifier(NestedNameSpecifier
*) { return true; }
702 bool TraverseType(QualType
) { return true; }
704 // The DeclStmt for the loop variable claims to cover the whole range
705 // inside the parens, this causes the range-init expression to not be hit.
706 // Traverse the loop VarDecl instead, which has the right source range.
707 bool TraverseCXXForRangeStmt(CXXForRangeStmt
*S
) {
708 return traverseNode(S
, [&] {
709 return TraverseStmt(S
->getInit()) && TraverseDecl(S
->getLoopVariable()) &&
710 TraverseStmt(S
->getRangeInit()) && TraverseStmt(S
->getBody());
713 // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
714 bool TraverseOpaqueValueExpr(OpaqueValueExpr
*E
) {
715 return traverseNode(E
, [&] { return TraverseStmt(E
->getSourceExpr()); });
717 // We only want to traverse the *syntactic form* to understand the selection.
718 bool TraversePseudoObjectExpr(PseudoObjectExpr
*E
) {
719 return traverseNode(E
, [&] { return TraverseStmt(E
->getSyntacticForm()); });
721 bool TraverseTypeConstraint(const TypeConstraint
*C
) {
722 if (auto *E
= C
->getImmediatelyDeclaredConstraint()) {
723 // Technically this expression is 'implicit' and not traversed by the RAV.
724 // However, the range is correct, so we visit expression to avoid adding
725 // an extra kind to 'DynTypeNode' that hold 'TypeConstraint'.
726 return TraverseStmt(E
);
728 return Base::TraverseTypeConstraint(C
);
731 // Override child traversal for certain node types.
732 using RecursiveASTVisitor::getStmtChildren
;
733 // PredefinedExpr like __func__ has a StringLiteral child for its value.
734 // It's not written, so don't traverse it.
735 Stmt::child_range
getStmtChildren(PredefinedExpr
*) {
736 return {StmtIterator
{}, StmtIterator
{}};
740 using Base
= RecursiveASTVisitor
<SelectionVisitor
>;
742 SelectionVisitor(ASTContext
&AST
, const syntax::TokenBuffer
&Tokens
,
743 const PrintingPolicy
&PP
, unsigned SelBegin
, unsigned SelEnd
,
745 : SM(AST
.getSourceManager()), LangOpts(AST
.getLangOpts()),
749 TokenBuf(Tokens
), SelChecker(Tokens
, SelFile
, SelBegin
, SelEnd
, SM
),
750 UnclaimedExpandedTokens(Tokens
.expandedTokens()) {
751 // Ensure we have a node for the TU decl, regardless of traversal scope.
752 Nodes
.emplace_back();
753 Nodes
.back().ASTNode
= DynTypedNode::create(*AST
.getTranslationUnitDecl());
754 Nodes
.back().Parent
= nullptr;
755 Nodes
.back().Selected
= SelectionTree::Unselected
;
756 Stack
.push(&Nodes
.back());
759 // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
760 // Node is always a pointer so the generic code can handle any null checks.
761 template <typename T
, typename Func
>
762 bool traverseNode(T
*Node
, const Func
&Body
) {
765 auto N
= DynTypedNode::create(*Node
);
766 if (canSafelySkipNode(N
))
768 push(DynTypedNode::create(*Node
));
776 // We do rough hit testing on the way down the tree to avoid traversing
777 // subtrees that don't touch the selection (canSafelySkipNode), but
778 // fine-grained hit-testing is mostly done on the way back up (in pop()).
779 // This means children get to claim parts of the selection first, and parents
780 // are only selected if they own tokens that no child owned.
782 // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
783 // parent's, and a node (transitively) owns all tokens in its range.
785 // Exception 1: when declarators nest, *inner* declarator is the *outer* type.
786 // e.g. void foo[5](int) is an array of functions.
787 // To handle this case, declarators are careful to only claim the tokens they
788 // own, rather than claim a range and rely on claim ordering.
790 // Exception 2: siblings both claim the same node.
791 // e.g. `int x, y;` produces two sibling VarDecls.
794 // Here the first ("leftmost") sibling claims the tokens it wants, and the
795 // other sibling gets what's left. So selecting "int" only includes the left
796 // VarDecl in the selection tree.
798 // An optimization for a common case: nodes outside macro expansions that
799 // don't intersect the selection may be recursively skipped.
800 bool canSafelySkipNode(const DynTypedNode
&N
) {
801 SourceRange S
= getSourceRange(N
);
802 if (auto *TL
= N
.get
<TypeLoc
>()) {
803 // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile
804 // heuristics. We should consider only pruning critical TypeLoc nodes, to
807 // AttributedTypeLoc may point to the attribute's range, NOT the modified
809 if (auto AT
= TL
->getAs
<AttributedTypeLoc
>())
810 S
= AT
.getModifiedLoc().getSourceRange();
812 // SourceRange often doesn't manage to accurately cover attributes.
813 // Fortunately, attributes are rare.
814 if (llvm::any_of(getAttributes(N
),
815 [](const Attr
*A
) { return !A
->isImplicit(); }))
817 if (!SelChecker
.mayHit(S
)) {
818 dlog("{2}skip: {0} {1}", printNodeToString(N
, PrintPolicy
),
819 S
.printToString(SM
), indent());
825 // There are certain nodes we want to treat as leaves in the SelectionTree,
826 // although they do have children.
827 bool shouldSkipChildren(const Stmt
*X
) const {
828 // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
829 // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
830 // So we treat UserDefinedLiteral as a leaf node, owning the token.
831 return llvm::isa
<UserDefinedLiteral
>(X
);
834 // Pushes a node onto the ancestor stack. Pairs with pop().
835 // Performs early hit detection for some nodes (on the earlySourceRange).
836 void push(DynTypedNode Node
) {
837 SourceRange Early
= earlySourceRange(Node
);
838 dlog("{2}push: {0} {1}", printNodeToString(Node
, PrintPolicy
),
839 Node
.getSourceRange().printToString(SM
), indent());
840 Nodes
.emplace_back();
841 Nodes
.back().ASTNode
= std::move(Node
);
842 Nodes
.back().Parent
= Stack
.top();
843 Nodes
.back().Selected
= NoTokens
;
844 Stack
.push(&Nodes
.back());
845 claimRange(Early
, Nodes
.back().Selected
);
848 // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
849 // Performs primary hit detection.
851 Node
&N
= *Stack
.top();
852 dlog("{1}pop: {0}", printNodeToString(N
.ASTNode
, PrintPolicy
), indent(-1));
853 claimTokensFor(N
.ASTNode
, N
.Selected
);
854 if (N
.Selected
== NoTokens
)
855 N
.Selected
= SelectionTree::Unselected
;
856 if (N
.Selected
|| !N
.Children
.empty()) {
857 // Attach to the tree.
858 N
.Parent
->Children
.push_back(&N
);
860 // Neither N any children are selected, it doesn't belong in the tree.
861 assert(&N
== &Nodes
.back());
867 // Returns the range of tokens that this node will claim directly, and
868 // is not available to the node's children.
869 // Usually empty, but sometimes children cover tokens but shouldn't own them.
870 SourceRange
earlySourceRange(const DynTypedNode
&N
) {
871 if (const Decl
*VD
= N
.get
<VarDecl
>()) {
872 // We want the name in the var-decl to be claimed by the decl itself and
873 // not by any children. Ususally, we don't need this, because source
874 // ranges of children are not overlapped with their parent's.
875 // An exception is lambda captured var decl, where AutoTypeLoc is
876 // overlapped with the name loc.
877 // auto fun = [bar = foo]() { ... }
879 // ~~~ |- AutoTypeLoc
880 return VD
->getLocation();
883 // When referring to a destructor ~Foo(), attribute Foo to the destructor
884 // rather than the TypeLoc nested inside it.
885 // We still traverse the TypeLoc, because it may contain other targeted
886 // things like the T in ~Foo<T>().
887 if (const auto *CDD
= N
.get
<CXXDestructorDecl
>())
888 return CDD
->getNameInfo().getNamedTypeInfo()->getTypeLoc().getBeginLoc();
889 if (const auto *ME
= N
.get
<MemberExpr
>()) {
890 auto NameInfo
= ME
->getMemberNameInfo();
891 if (NameInfo
.getName().getNameKind() ==
892 DeclarationName::CXXDestructorName
)
893 return NameInfo
.getNamedTypeInfo()->getTypeLoc().getBeginLoc();
896 return SourceRange();
899 // Claim tokens for N, after processing its children.
900 // By default this claims all unclaimed tokens in getSourceRange().
901 // We override this if we want to claim fewer tokens (e.g. there are gaps).
902 void claimTokensFor(const DynTypedNode
&N
, SelectionTree::Selection
&Result
) {
903 // CXXConstructExpr often shows implicit construction, like `string s;`.
904 // Don't associate any tokens with it unless there's some syntax like {}.
905 // This prevents it from claiming 's', its primary location.
906 if (const auto *CCE
= N
.get
<CXXConstructExpr
>()) {
907 claimRange(CCE
->getParenOrBraceRange(), Result
);
910 // ExprWithCleanups is always implicit. It often wraps CXXConstructExpr.
911 // Prevent it claiming 's' in the case above.
912 if (N
.get
<ExprWithCleanups
>())
915 // Declarators nest "inside out", with parent types inside child ones.
916 // Instead of claiming the whole range (clobbering parent tokens), carefully
917 // claim the tokens owned by this node and non-declarator children.
918 // (We could manipulate traversal order instead, but this is easier).
920 // Non-declarator types nest normally, and are handled like other nodes.
923 // Vec<R<int>(*[2])(A<char>)> is a Vec of arrays of pointers to functions,
924 // which accept A<char> and return R<int>.
925 // The TypeLoc hierarchy:
926 // Vec<R<int>(*[2])(A<char>)> m;
927 // Vec<#####################> TemplateSpecialization Vec
928 // --------[2]---------- `-Array
929 // -------*------------- `-Pointer
930 // ------(----)--------- `-Paren
931 // ------------(#######) `-Function
932 // R<###> |-TemplateSpecialization R
933 // int | `-Builtin int
934 // A<####> `-TemplateSpecialization A
935 // char `-Builtin char
938 // --- represents unclaimed parts of the SourceRange.
939 // ### represents parts that children already claimed.
940 if (const auto *TL
= N
.get
<TypeLoc
>()) {
941 if (auto PTL
= TL
->getAs
<ParenTypeLoc
>()) {
942 claimRange(PTL
.getLParenLoc(), Result
);
943 claimRange(PTL
.getRParenLoc(), Result
);
946 if (auto ATL
= TL
->getAs
<ArrayTypeLoc
>()) {
947 claimRange(ATL
.getBracketsRange(), Result
);
950 if (auto PTL
= TL
->getAs
<PointerTypeLoc
>()) {
951 claimRange(PTL
.getStarLoc(), Result
);
954 if (auto FTL
= TL
->getAs
<FunctionTypeLoc
>()) {
955 claimRange(SourceRange(FTL
.getLParenLoc(), FTL
.getEndLoc()), Result
);
959 claimRange(getSourceRange(N
), Result
);
962 // Perform hit-testing of a complete Node against the selection.
963 // This runs for every node in the AST, and must be fast in common cases.
964 // This is usually called from pop(), so we can take children into account.
965 // The existing state of Result is relevant.
966 void claimRange(SourceRange S
, SelectionTree::Selection
&Result
) {
967 for (const auto &ClaimedRange
:
968 UnclaimedExpandedTokens
.erase(TokenBuf
.expandedTokens(S
)))
969 update(Result
, SelChecker
.test(ClaimedRange
));
971 if (Result
&& Result
!= NoTokens
)
972 dlog("{1}hit selection: {0}", S
.printToString(SM
), indent());
975 std::string
indent(int Offset
= 0) {
976 // Cast for signed arithmetic.
977 int Amount
= int(Stack
.size()) + Offset
;
979 return std::string(Amount
, ' ');
983 const LangOptions
&LangOpts
;
985 const PrintingPolicy
&PrintPolicy
;
987 const syntax::TokenBuffer
&TokenBuf
;
988 std::stack
<Node
*> Stack
;
989 SelectionTester SelChecker
;
990 IntervalSet
<syntax::Token
> UnclaimedExpandedTokens
;
991 std::deque
<Node
> Nodes
; // Stable pointers as we add more nodes.
996 llvm::SmallString
<256> abbreviatedString(DynTypedNode N
,
997 const PrintingPolicy
&PP
) {
998 llvm::SmallString
<256> Result
;
1000 llvm::raw_svector_ostream
OS(Result
);
1003 auto Pos
= Result
.find('\n');
1004 if (Pos
!= llvm::StringRef::npos
) {
1005 bool MoreText
= !llvm::all_of(Result
.str().drop_front(Pos
), llvm::isSpace
);
1008 Result
.append(" …");
1013 void SelectionTree::print(llvm::raw_ostream
&OS
, const SelectionTree::Node
&N
,
1016 OS
.indent(Indent
- 1) << (N
.Selected
== SelectionTree::Complete
? '*'
1020 printNodeKind(OS
, N
.ASTNode
);
1021 OS
<< ' ' << abbreviatedString(N
.ASTNode
, PrintPolicy
) << "\n";
1022 for (const Node
*Child
: N
.Children
)
1023 print(OS
, *Child
, Indent
+ 2);
1026 std::string
SelectionTree::Node::kind() const {
1028 llvm::raw_string_ostream
OS(S
);
1029 printNodeKind(OS
, ASTNode
);
1030 return std::move(OS
.str());
1033 // Decide which selections emulate a "point" query in between characters.
1034 // If it's ambiguous (the neighboring characters are selectable tokens), returns
1035 // both possibilities in preference order.
1036 // Always returns at least one range - if no tokens touched, and empty range.
1037 static llvm::SmallVector
<std::pair
<unsigned, unsigned>, 2>
1038 pointBounds(unsigned Offset
, const syntax::TokenBuffer
&Tokens
) {
1039 const auto &SM
= Tokens
.sourceManager();
1040 SourceLocation Loc
= SM
.getComposedLoc(SM
.getMainFileID(), Offset
);
1041 llvm::SmallVector
<std::pair
<unsigned, unsigned>, 2> Result
;
1042 // Prefer right token over left.
1043 for (const syntax::Token
&Tok
:
1044 llvm::reverse(spelledTokensTouching(Loc
, Tokens
))) {
1045 if (shouldIgnore(Tok
))
1047 unsigned Offset
= Tokens
.sourceManager().getFileOffset(Tok
.location());
1048 Result
.emplace_back(Offset
, Offset
+ Tok
.length());
1051 Result
.emplace_back(Offset
, Offset
);
1055 bool SelectionTree::createEach(ASTContext
&AST
,
1056 const syntax::TokenBuffer
&Tokens
,
1057 unsigned Begin
, unsigned End
,
1058 llvm::function_ref
<bool(SelectionTree
)> Func
) {
1060 return Func(SelectionTree(AST
, Tokens
, Begin
, End
));
1061 for (std::pair
<unsigned, unsigned> Bounds
: pointBounds(Begin
, Tokens
))
1062 if (Func(SelectionTree(AST
, Tokens
, Bounds
.first
, Bounds
.second
)))
1067 SelectionTree
SelectionTree::createRight(ASTContext
&AST
,
1068 const syntax::TokenBuffer
&Tokens
,
1069 unsigned int Begin
, unsigned int End
) {
1070 std::optional
<SelectionTree
> Result
;
1071 createEach(AST
, Tokens
, Begin
, End
, [&](SelectionTree T
) {
1072 Result
= std::move(T
);
1075 return std::move(*Result
);
1078 SelectionTree::SelectionTree(ASTContext
&AST
, const syntax::TokenBuffer
&Tokens
,
1079 unsigned Begin
, unsigned End
)
1080 : PrintPolicy(AST
.getLangOpts()) {
1081 // No fundamental reason the selection needs to be in the main file,
1082 // but that's all clangd has needed so far.
1083 const SourceManager
&SM
= AST
.getSourceManager();
1084 FileID FID
= SM
.getMainFileID();
1085 PrintPolicy
.TerseOutput
= true;
1086 PrintPolicy
.IncludeNewlines
= false;
1088 dlog("Computing selection for {0}",
1089 SourceRange(SM
.getComposedLoc(FID
, Begin
), SM
.getComposedLoc(FID
, End
))
1090 .printToString(SM
));
1091 Nodes
= SelectionVisitor::collect(AST
, Tokens
, PrintPolicy
, Begin
, End
, FID
);
1092 Root
= Nodes
.empty() ? nullptr : &Nodes
.front();
1093 recordMetrics(*this, AST
.getLangOpts());
1094 dlog("Built selection tree\n{0}", *this);
1097 const Node
*SelectionTree::commonAncestor() const {
1098 const Node
*Ancestor
= Root
;
1099 while (Ancestor
->Children
.size() == 1 && !Ancestor
->Selected
)
1100 Ancestor
= Ancestor
->Children
.front();
1101 // Returning nullptr here is a bit unprincipled, but it makes the API safer:
1102 // the TranslationUnitDecl contains all of the preamble, so traversing it is a
1103 // performance cliff. Callers can check for null and use root() if they want.
1104 return Ancestor
!= Root
? Ancestor
: nullptr;
1107 const DeclContext
&SelectionTree::Node::getDeclContext() const {
1108 for (const Node
*CurrentNode
= this; CurrentNode
!= nullptr;
1109 CurrentNode
= CurrentNode
->Parent
) {
1110 if (const Decl
*Current
= CurrentNode
->ASTNode
.get
<Decl
>()) {
1111 if (CurrentNode
!= this)
1112 if (auto *DC
= dyn_cast
<DeclContext
>(Current
))
1114 return *Current
->getLexicalDeclContext();
1117 llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
1120 const SelectionTree::Node
&SelectionTree::Node::ignoreImplicit() const {
1121 if (Children
.size() == 1 &&
1122 getSourceRange(Children
.front()->ASTNode
) == getSourceRange(ASTNode
))
1123 return Children
.front()->ignoreImplicit();
1127 const SelectionTree::Node
&SelectionTree::Node::outerImplicit() const {
1128 if (Parent
&& getSourceRange(Parent
->ASTNode
) == getSourceRange(ASTNode
))
1129 return Parent
->outerImplicit();
1133 } // namespace clangd
1134 } // namespace clang