1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "Selection.h"
12 #include "SourceCode.h"
13 #include "refactor/Tweak.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/LambdaCapture.h"
20 #include "clang/AST/OperationKinds.h"
21 #include "clang/AST/RecursiveASTVisitor.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtCXX.h"
24 #include "clang/Basic/LangOptions.h"
25 #include "clang/Basic/SourceLocation.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Tooling/Core/Replacement.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/raw_ostream.h"
37 // information regarding the Expr that is being extracted
38 class ExtractionContext
{
40 ExtractionContext(const SelectionTree::Node
*Node
, const SourceManager
&SM
,
41 const ASTContext
&Ctx
);
42 const clang::Expr
*getExpr() const { return Expr
; }
43 const SelectionTree::Node
*getExprNode() const { return ExprNode
; }
44 bool isExtractable() const { return Extractable
; }
45 // The half-open range for the expression to be extracted.
46 SourceRange
getExtractionChars() const;
47 // Generate Replacement for replacing selected expression with given VarName
48 tooling::Replacement
replaceWithVar(SourceRange Chars
,
49 llvm::StringRef VarName
) const;
50 // Generate Replacement for declaring the selected Expr as a new variable
51 tooling::Replacement
insertDeclaration(llvm::StringRef VarName
,
52 SourceRange InitChars
) const;
55 bool Extractable
= false;
56 const clang::Expr
*Expr
;
58 const SelectionTree::Node
*ExprNode
;
59 // Stmt before which we will extract
60 const clang::Stmt
*InsertionPoint
= nullptr;
61 const SourceManager
&SM
;
62 const ASTContext
&Ctx
;
63 // Decls referenced in the Expr
64 std::vector
<clang::Decl
*> ReferencedDecls
;
65 // returns true if the Expr doesn't reference any variable declared in scope
66 bool exprIsValidOutside(const clang::Stmt
*Scope
) const;
67 // computes the Stmt before which we will extract out Expr
68 const clang::Stmt
*computeInsertionPoint() const;
71 // Returns all the Decls referenced inside the given Expr
72 static std::vector
<clang::Decl
*>
73 computeReferencedDecls(const clang::Expr
*Expr
) {
74 // RAV subclass to find all DeclRefs in a given Stmt
75 class FindDeclRefsVisitor
76 : public clang::RecursiveASTVisitor
<FindDeclRefsVisitor
> {
78 std::vector
<Decl
*> ReferencedDecls
;
79 bool VisitDeclRefExpr(DeclRefExpr
*DeclRef
) { // NOLINT
80 // Stop the call operator of lambdas from being marked as a referenced
81 // DeclRefExpr in immediately invoked lambdas.
82 if (const auto *const Method
=
83 llvm::dyn_cast
<CXXMethodDecl
>(DeclRef
->getDecl());
84 Method
!= nullptr && Method
->getParent()->isLambda()) {
87 ReferencedDecls
.push_back(DeclRef
->getDecl());
91 // Local variables declared inside of the selected lambda cannot go out of
92 // scope. The DeclRefExprs that are important are the variables captured,
93 // the DeclRefExprs inside the initializers of init-capture variables,
94 // variables mentioned in trailing return types, constraints and explicit
95 // defaulted template parameters.
96 bool TraverseLambdaExpr(LambdaExpr
*LExpr
) {
97 for (const auto &[Capture
, Initializer
] :
98 llvm::zip(LExpr
->captures(), LExpr
->capture_inits())) {
99 TraverseLambdaCapture(LExpr
, &Capture
, Initializer
);
102 if (clang::Expr
*const RequiresClause
=
103 LExpr
->getTrailingRequiresClause()) {
104 TraverseStmt(RequiresClause
);
107 for (auto *const TemplateParam
: LExpr
->getExplicitTemplateParameters())
108 TraverseDecl(TemplateParam
);
110 if (auto *const CallOperator
= LExpr
->getCallOperator()) {
111 TraverseType(CallOperator
->getDeclaredReturnType());
113 for (auto *const Param
: CallOperator
->parameters()) {
114 TraverseParmVarDecl(Param
);
117 for (auto *const Attr
: CallOperator
->attrs()) {
126 FindDeclRefsVisitor Visitor
;
127 Visitor
.TraverseStmt(const_cast<Stmt
*>(cast
<Stmt
>(Expr
)));
128 return Visitor
.ReferencedDecls
;
131 static QualType
computeVariableType(const Expr
*Expr
, const ASTContext
&Ctx
) {
132 if (Ctx
.getLangOpts().CPlusPlus11
)
133 return Ctx
.getAutoDeductType();
135 if (Expr
->hasPlaceholderType(BuiltinType::PseudoObject
)) {
136 if (const auto *PR
= dyn_cast
<ObjCPropertyRefExpr
>(Expr
)) {
137 if (PR
->isMessagingSetter()) {
138 // Don't support extracting a compound reference like `self.prop += 1`
139 // since the meaning changes after extraction since we'll no longer call
140 // the setter. Non compound access like `self.prop = 1` is invalid since
141 // it returns nil (setter method must have a void return type).
143 } else if (PR
->isMessagingGetter()) {
144 if (PR
->isExplicitProperty())
145 return PR
->getExplicitProperty()->getType();
147 return PR
->getImplicitPropertyGetter()->getReturnType();
153 return Expr
->getType();
156 ExtractionContext::ExtractionContext(const SelectionTree::Node
*Node
,
157 const SourceManager
&SM
,
158 const ASTContext
&Ctx
)
159 : ExprNode(Node
), SM(SM
), Ctx(Ctx
) {
160 Expr
= Node
->ASTNode
.get
<clang::Expr
>();
161 ReferencedDecls
= computeReferencedDecls(Expr
);
162 InsertionPoint
= computeInsertionPoint();
165 VarType
= computeVariableType(Expr
, Ctx
);
166 if (VarType
.isNull())
169 // Strip the outer nullability since it's not common for local variables.
170 AttributedType::stripOuterNullability(VarType
);
173 // checks whether extracting before InsertionPoint will take a
174 // variable reference out of scope
175 bool ExtractionContext::exprIsValidOutside(const clang::Stmt
*Scope
) const {
176 SourceLocation ScopeBegin
= Scope
->getBeginLoc();
177 SourceLocation ScopeEnd
= Scope
->getEndLoc();
178 for (const Decl
*ReferencedDecl
: ReferencedDecls
) {
179 if (SM
.isPointWithin(ReferencedDecl
->getBeginLoc(), ScopeBegin
, ScopeEnd
) &&
180 SM
.isPointWithin(ReferencedDecl
->getEndLoc(), ScopeBegin
, ScopeEnd
))
186 // Return the Stmt before which we need to insert the extraction.
187 // To find the Stmt, we go up the AST Tree and if the Parent of the current
188 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before
189 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a
192 // FIXME: Extraction from label, switch and case statements
193 // FIXME: Doens't work for FoldExpr
194 // FIXME: Ensure extraction from loops doesn't change semantics.
195 const clang::Stmt
*ExtractionContext::computeInsertionPoint() const {
196 // returns true if we can extract before InsertionPoint
197 auto CanExtractOutside
=
198 [](const SelectionTree::Node
*InsertionPoint
) -> bool {
199 if (const clang::Stmt
*Stmt
= InsertionPoint
->ASTNode
.get
<clang::Stmt
>()) {
200 if (isa
<clang::Expr
>(Stmt
)) {
201 // Do not allow extraction from the initializer of a defaulted parameter
202 // to a local variable (e.g. a function-local lambda).
203 if (InsertionPoint
->Parent
->ASTNode
.get
<ParmVarDecl
>() != nullptr) {
210 // We don't yet allow extraction from switch/case stmt as we would need to
211 // jump over the switch stmt even if there is a CompoundStmt inside the
212 // switch. And there are other Stmts which we don't care about (e.g.
213 // continue and break) as there can never be anything to extract from
215 return isa
<AttributedStmt
>(Stmt
) || isa
<CompoundStmt
>(Stmt
) ||
216 isa
<CXXForRangeStmt
>(Stmt
) || isa
<DeclStmt
>(Stmt
) ||
217 isa
<DoStmt
>(Stmt
) || isa
<ForStmt
>(Stmt
) || isa
<IfStmt
>(Stmt
) ||
218 isa
<ReturnStmt
>(Stmt
) || isa
<WhileStmt
>(Stmt
);
220 if (InsertionPoint
->ASTNode
.get
<VarDecl
>())
224 for (const SelectionTree::Node
*CurNode
= getExprNode();
225 CurNode
->Parent
&& CanExtractOutside(CurNode
);
226 CurNode
= CurNode
->Parent
) {
227 const clang::Stmt
*CurInsertionPoint
= CurNode
->ASTNode
.get
<Stmt
>();
228 // give up if extraction will take a variable out of scope
229 if (CurInsertionPoint
&& !exprIsValidOutside(CurInsertionPoint
))
231 if (const clang::Stmt
*CurParent
= CurNode
->Parent
->ASTNode
.get
<Stmt
>()) {
232 if (isa
<CompoundStmt
>(CurParent
)) {
233 // Ensure we don't write inside a macro.
234 if (CurParent
->getBeginLoc().isMacroID())
236 return CurInsertionPoint
;
243 // returns the replacement for substituting the extraction with VarName
245 ExtractionContext::replaceWithVar(SourceRange Chars
,
246 llvm::StringRef VarName
) const {
247 unsigned ExtractionLength
=
248 SM
.getFileOffset(Chars
.getEnd()) - SM
.getFileOffset(Chars
.getBegin());
249 return tooling::Replacement(SM
, Chars
.getBegin(), ExtractionLength
, VarName
);
251 // returns the Replacement for declaring a new variable storing the extraction
253 ExtractionContext::insertDeclaration(llvm::StringRef VarName
,
254 SourceRange InitializerChars
) const {
255 llvm::StringRef ExtractionCode
= toSourceCode(SM
, InitializerChars
);
256 const SourceLocation InsertionLoc
=
257 toHalfOpenFileRange(SM
, Ctx
.getLangOpts(),
258 InsertionPoint
->getSourceRange())
260 std::string ExtractedVarDecl
=
261 printType(VarType
, ExprNode
->getDeclContext(), VarName
) + " = " +
262 ExtractionCode
.str() + "; ";
263 return tooling::Replacement(SM
, InsertionLoc
, 0, ExtractedVarDecl
);
266 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
268 // These are special, because the formal AST doesn't match what users expect:
269 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
270 // - but extracting `b + c` is reasonable, as + is (mathematically) associative.
272 // So we try to support these cases with some restrictions:
273 // - the operator must be associative
274 // - no mixing of operators is allowed
275 // - we don't look inside macro expansions in the subexpressions
276 // - we only adjust the extracted range, so references in the unselected parts
277 // of the AST expression (e.g. `a`) are still considered referenced for
278 // the purposes of calculating the insertion point.
279 // FIXME: it would be nice to exclude these references, by micromanaging
280 // the computeReferencedDecls() calls around the binary operator tree.
282 // Information extracted about a binary operator encounted in a SelectionTree.
283 // It can represent either an overloaded or built-in operator.
284 struct ParsedBinaryOperator
{
285 BinaryOperatorKind Kind
;
286 SourceLocation ExprLoc
;
287 llvm::SmallVector
<const SelectionTree::Node
*> SelectedOperands
;
289 // If N is a binary operator, populate this and return true.
290 bool parse(const SelectionTree::Node
&N
) {
291 SelectedOperands
.clear();
293 if (const BinaryOperator
*Op
=
294 llvm::dyn_cast_or_null
<BinaryOperator
>(N
.ASTNode
.get
<Expr
>())) {
295 Kind
= Op
->getOpcode();
296 ExprLoc
= Op
->getExprLoc();
297 SelectedOperands
= N
.Children
;
300 if (const CXXOperatorCallExpr
*Op
=
301 llvm::dyn_cast_or_null
<CXXOperatorCallExpr
>(
302 N
.ASTNode
.get
<Expr
>())) {
303 if (!Op
->isInfixBinaryOp())
306 Kind
= BinaryOperator::getOverloadedOpcode(Op
->getOperator());
307 ExprLoc
= Op
->getExprLoc();
308 // Not all children are args, there's also the callee (operator).
309 for (const auto *Child
: N
.Children
) {
310 const Expr
*E
= Child
->ASTNode
.get
<Expr
>();
311 assert(E
&& "callee and args should be Exprs!");
312 if (E
== Op
->getArg(0) || E
== Op
->getArg(1))
313 SelectedOperands
.push_back(Child
);
320 bool associative() const {
321 // Must also be left-associative, or update getBinaryOperatorRange()!
336 bool crossesMacroBoundary(const SourceManager
&SM
) {
337 FileID F
= SM
.getFileID(ExprLoc
);
338 for (const SelectionTree::Node
*Child
: SelectedOperands
)
339 if (SM
.getFileID(Child
->ASTNode
.get
<Expr
>()->getExprLoc()) != F
)
345 // If have an associative operator at the top level, then we must find
346 // the start point (rightmost in LHS) and end point (leftmost in RHS).
347 // We can only descend into subtrees where the operator matches.
349 // e.g. for a + [[b + c]] + d
357 const SourceRange
getBinaryOperatorRange(const SelectionTree::Node
&N
,
358 const SourceManager
&SM
,
359 const LangOptions
&LangOpts
) {
360 // If N is not a suitable binary operator, bail out.
361 ParsedBinaryOperator Op
;
362 if (!Op
.parse(N
.ignoreImplicit()) || !Op
.associative() ||
363 Op
.crossesMacroBoundary(SM
) || Op
.SelectedOperands
.size() != 2)
364 return SourceRange();
365 BinaryOperatorKind OuterOp
= Op
.Kind
;
367 // Because the tree we're interested in contains only one operator type, and
368 // all eligible operators are left-associative, the shape of the tree is
369 // very restricted: it's a linked list along the left edges.
370 // This simplifies our implementation.
371 const SelectionTree::Node
*Start
= Op
.SelectedOperands
.front(); // LHS
372 const SelectionTree::Node
*End
= Op
.SelectedOperands
.back(); // RHS
373 // End is already correct: it can't be an OuterOp (as it's left-associative).
374 // Start needs to be pushed down int the subtree to the right spot.
375 while (Op
.parse(Start
->ignoreImplicit()) && Op
.Kind
== OuterOp
&&
376 !Op
.crossesMacroBoundary(SM
)) {
377 assert(!Op
.SelectedOperands
.empty() && "got only operator on one side!");
378 if (Op
.SelectedOperands
.size() == 1) { // Only Op.RHS selected
379 Start
= Op
.SelectedOperands
.back();
382 // Op.LHS is (at least partially) selected, so descend into it.
383 Start
= Op
.SelectedOperands
.front();
387 toHalfOpenFileRange(SM
, LangOpts
, Start
->ASTNode
.getSourceRange())
389 toHalfOpenFileRange(SM
, LangOpts
, End
->ASTNode
.getSourceRange())
393 SourceRange
ExtractionContext::getExtractionChars() const {
394 // Special case: we're extracting an associative binary subexpression.
395 SourceRange BinaryOperatorRange
=
396 getBinaryOperatorRange(*ExprNode
, SM
, Ctx
.getLangOpts());
397 if (BinaryOperatorRange
.isValid())
398 return BinaryOperatorRange
;
400 // Usual case: we're extracting the whole expression.
401 return *toHalfOpenFileRange(SM
, Ctx
.getLangOpts(), Expr
->getSourceRange());
404 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef
405 const SelectionTree::Node
*getCallExpr(const SelectionTree::Node
*DeclRef
) {
406 const SelectionTree::Node
&MaybeCallee
= DeclRef
->outerImplicit();
407 const SelectionTree::Node
*MaybeCall
= MaybeCallee
.Parent
;
411 llvm::dyn_cast_or_null
<CallExpr
>(MaybeCall
->ASTNode
.get
<Expr
>());
414 if (CE
->getCallee() != MaybeCallee
.ASTNode
.get
<Expr
>())
419 // Returns true if Inner (which is a direct child of Outer) is appearing as
420 // a statement rather than an expression whose value can be used.
421 bool childExprIsStmt(const Stmt
*Outer
, const Expr
*Inner
) {
422 if (!Outer
|| !Inner
)
424 // Exclude the most common places where an expr can appear but be unused.
425 if (llvm::isa
<CompoundStmt
>(Outer
))
427 if (llvm::isa
<SwitchCase
>(Outer
))
429 // Control flow statements use condition etc, but not the body.
430 if (const auto *WS
= llvm::dyn_cast
<WhileStmt
>(Outer
))
431 return Inner
== WS
->getBody();
432 if (const auto *DS
= llvm::dyn_cast
<DoStmt
>(Outer
))
433 return Inner
== DS
->getBody();
434 if (const auto *FS
= llvm::dyn_cast
<ForStmt
>(Outer
))
435 return Inner
== FS
->getBody();
436 if (const auto *FS
= llvm::dyn_cast
<CXXForRangeStmt
>(Outer
))
437 return Inner
== FS
->getBody();
438 if (const auto *IS
= llvm::dyn_cast
<IfStmt
>(Outer
))
439 return Inner
== IS
->getThen() || Inner
== IS
->getElse();
440 // Assume all other cases may be actual expressions.
441 // This includes the important case of subexpressions (where Outer is Expr).
445 // check if N can and should be extracted (e.g. is not void-typed).
446 bool eligibleForExtraction(const SelectionTree::Node
*N
) {
447 const Expr
*E
= N
->ASTNode
.get
<Expr
>();
451 // Void expressions can't be assigned to variables.
452 const Type
*ExprType
= E
->getType().getTypePtrOrNull();
453 if (!ExprType
|| ExprType
->isVoidType())
456 // A plain reference to a name (e.g. variable) isn't worth extracting.
457 // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`?
458 if (llvm::isa
<DeclRefExpr
>(E
))
461 // Similarly disallow extraction for member exprs with an implicit `this`.
462 if (const auto *ME
= dyn_cast
<MemberExpr
>(E
))
463 if (const auto *TE
= dyn_cast
<CXXThisExpr
>(ME
->getBase()->IgnoreImpCasts()))
464 if (TE
->isImplicit())
467 // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful.
468 // FIXME: we could still hoist the assignment, and leave the variable there?
469 ParsedBinaryOperator BinOp
;
470 if (BinOp
.parse(*N
) && BinaryOperator::isAssignmentOp(BinOp
.Kind
))
473 const SelectionTree::Node
&OuterImplicit
= N
->outerImplicit();
474 const auto *Parent
= OuterImplicit
.Parent
;
477 // We don't want to extract expressions used as statements, that would leave
478 // a `placeholder;` around that has no effect.
479 // Unfortunately because the AST doesn't have ExprStmt, we have to check in
480 // this roundabout way.
481 if (childExprIsStmt(Parent
->ASTNode
.get
<Stmt
>(),
482 OuterImplicit
.ASTNode
.get
<Expr
>()))
485 // Disable extraction of full RHS on assignment operations, e.g:
486 // auto x = [[RHS_EXPR]];
487 // This would just result in duplicating the code.
488 if (const auto *BO
= Parent
->ASTNode
.get
<BinaryOperator
>()) {
489 if (BO
->isAssignmentOp() &&
490 BO
->getRHS() == OuterImplicit
.ASTNode
.get
<Expr
>())
497 // Find the Expr node that we're going to extract.
498 // We don't want to trigger for assignment expressions and variable/field
499 // DeclRefs. For function/member function, we want to extract the entire
501 const SelectionTree::Node
*computeExtractedExpr(const SelectionTree::Node
*N
) {
504 const SelectionTree::Node
*TargetNode
= N
;
505 const clang::Expr
*SelectedExpr
= N
->ASTNode
.get
<clang::Expr
>();
508 // For function and member function DeclRefs, extract the whole call.
509 if (llvm::isa
<DeclRefExpr
>(SelectedExpr
) ||
510 llvm::isa
<MemberExpr
>(SelectedExpr
))
511 if (const SelectionTree::Node
*Call
= getCallExpr(N
))
513 // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful.
514 if (const BinaryOperator
*BinOpExpr
=
515 dyn_cast_or_null
<BinaryOperator
>(SelectedExpr
)) {
516 if (BinOpExpr
->getOpcode() == BinaryOperatorKind::BO_Assign
)
519 if (!TargetNode
|| !eligibleForExtraction(TargetNode
))
524 /// Extracts an expression to the variable placeholder
526 /// int x = 5 + 4 * 3;
529 /// auto placeholder = 5 + 4;
530 /// int x = placeholder * 3;
531 class ExtractVariable
: public Tweak
{
533 const char *id() const final
;
534 bool prepare(const Selection
&Inputs
) override
;
535 Expected
<Effect
> apply(const Selection
&Inputs
) override
;
536 std::string
title() const override
{
537 return "Extract subexpression to variable";
539 llvm::StringLiteral
kind() const override
{
540 return CodeAction::REFACTOR_KIND
;
544 // the expression to extract
545 std::unique_ptr
<ExtractionContext
> Target
;
547 REGISTER_TWEAK(ExtractVariable
)
548 bool ExtractVariable::prepare(const Selection
&Inputs
) {
549 // we don't trigger on empty selections for now
550 if (Inputs
.SelectionBegin
== Inputs
.SelectionEnd
)
552 const ASTContext
&Ctx
= Inputs
.AST
->getASTContext();
553 const SourceManager
&SM
= Inputs
.AST
->getSourceManager();
554 if (const SelectionTree::Node
*N
=
555 computeExtractedExpr(Inputs
.ASTSelection
.commonAncestor()))
556 Target
= std::make_unique
<ExtractionContext
>(N
, SM
, Ctx
);
557 return Target
&& Target
->isExtractable();
560 Expected
<Tweak::Effect
> ExtractVariable::apply(const Selection
&Inputs
) {
561 tooling::Replacements Result
;
562 // FIXME: get variable name from user or suggest based on type
563 std::string VarName
= "placeholder";
564 SourceRange Range
= Target
->getExtractionChars();
565 // insert new variable declaration
566 if (auto Err
= Result
.add(Target
->insertDeclaration(VarName
, Range
)))
567 return std::move(Err
);
568 // replace expression with variable name
569 if (auto Err
= Result
.add(Target
->replaceWithVar(Range
, VarName
)))
570 return std::move(Err
);
571 return Effect::mainFileEdit(Inputs
.AST
->getSourceManager(),
576 } // namespace clangd