1 //===--- ExtractFunction.cpp -------------------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Extracts statements to a new function and replaces the statements with a
10 // call to the new function.
17 // void extracted(int &a) {
25 // - Only extract statements
26 // - Extracts from non-templated free functions only.
27 // - Parameters are const only if the declaration was const
28 // - Always passed by l-value reference
30 // - Cannot extract declarations that will be needed in the original function
32 // - Checks for broken control flow (break/continue without loop/switch)
34 // 1. ExtractFunction is the tweak subclass
35 // - Prepare does basic analysis of the selection and is therefore fast.
36 // Successful prepare doesn't always mean we can apply the tweak.
37 // - Apply does a more detailed analysis and can be slower. In case of
38 // failure, we let the user know that we are unable to perform extraction.
39 // 2. ExtractionZone store information about the range being extracted and the
40 // enclosing function.
41 // 3. NewFunction stores properties of the extracted function and provides
42 // methods for rendering it.
43 // 4. CapturedZoneInfo uses a RecursiveASTVisitor to capture information about
44 // the extraction like declarations, existing return statements, etc.
45 // 5. getExtractedFunction is responsible for analyzing the CapturedZoneInfo and
46 // creating a NewFunction.
47 //===----------------------------------------------------------------------===//
50 #include "FindTarget.h"
51 #include "ParsedAST.h"
52 #include "Selection.h"
53 #include "SourceCode.h"
54 #include "refactor/Tweak.h"
55 #include "support/Logger.h"
56 #include "clang/AST/ASTContext.h"
57 #include "clang/AST/Decl.h"
58 #include "clang/AST/DeclBase.h"
59 #include "clang/AST/NestedNameSpecifier.h"
60 #include "clang/AST/RecursiveASTVisitor.h"
61 #include "clang/AST/Stmt.h"
62 #include "clang/Basic/LangOptions.h"
63 #include "clang/Basic/SourceLocation.h"
64 #include "clang/Basic/SourceManager.h"
65 #include "clang/Tooling/Core/Replacement.h"
66 #include "clang/Tooling/Refactoring/Extract/SourceExtraction.h"
67 #include "llvm/ADT/STLExtras.h"
68 #include "llvm/ADT/SmallSet.h"
69 #include "llvm/ADT/SmallVector.h"
70 #include "llvm/ADT/StringRef.h"
71 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Error.h"
73 #include "llvm/Support/raw_os_ostream.h"
80 using Node
= SelectionTree::Node
;
82 // ExtractionZone is the part of code that is being extracted.
83 // EnclosingFunction is the function/method inside which the zone lies.
84 // We split the file into 4 parts relative to extraction zone.
85 enum class ZoneRelative
{
86 Before
, // Before Zone and inside EnclosingFunction.
87 Inside
, // Inside Zone.
88 After
, // After Zone and inside EnclosingFunction.
89 OutsideFunc
// Outside EnclosingFunction.
92 enum FunctionDeclKind
{
98 // A RootStmt is a statement that's fully selected including all it's children
99 // and it's parent is unselected.
100 // Check if a node is a root statement.
101 bool isRootStmt(const Node
*N
) {
102 if (!N
->ASTNode
.get
<Stmt
>())
104 // Root statement cannot be partially selected.
105 if (N
->Selected
== SelectionTree::Partial
)
107 // Only DeclStmt can be an unselected RootStmt since VarDecls claim the entire
108 // selection range in selectionTree.
109 if (N
->Selected
== SelectionTree::Unselected
&& !N
->ASTNode
.get
<DeclStmt
>())
114 // Returns the (unselected) parent of all RootStmts given the commonAncestor.
116 // 1. any node is partially selected
117 // 2. If all completely selected nodes don't have the same common parent
118 // 3. Any child of Parent isn't a RootStmt.
119 // Returns null if any child is not a RootStmt.
120 // We only support extraction of RootStmts since it allows us to extract without
121 // having to change the selection range. Also, this means that any scope that
122 // begins in selection range, ends in selection range and any scope that begins
123 // outside the selection range, ends outside as well.
124 const Node
*getParentOfRootStmts(const Node
*CommonAnc
) {
127 const Node
*Parent
= nullptr;
128 switch (CommonAnc
->Selected
) {
129 case SelectionTree::Selection::Unselected
:
130 // Typically a block, with the { and } unselected, could also be ForStmt etc
131 // Ensure all Children are RootStmts.
134 case SelectionTree::Selection::Partial
:
135 // Only a fully-selected single statement can be selected.
137 case SelectionTree::Selection::Complete
:
138 // If the Common Ancestor is completely selected, then it's a root statement
139 // and its parent will be unselected.
140 Parent
= CommonAnc
->Parent
;
141 // If parent is a DeclStmt, even though it's unselected, we consider it a
142 // root statement and return its parent. This is done because the VarDecls
143 // claim the entire selection range of the Declaration and DeclStmt is
144 // always unselected.
145 if (Parent
->ASTNode
.get
<DeclStmt
>())
146 Parent
= Parent
->Parent
;
149 // Ensure all Children are RootStmts.
150 return llvm::all_of(Parent
->Children
, isRootStmt
) ? Parent
: nullptr;
153 // The ExtractionZone class forms a view of the code wrt Zone.
154 struct ExtractionZone
{
155 // Parent of RootStatements being extracted.
156 const Node
*Parent
= nullptr;
157 // The half-open file range of the code being extracted.
158 SourceRange ZoneRange
;
159 // The function inside which our zone resides.
160 const FunctionDecl
*EnclosingFunction
= nullptr;
161 // The half-open file range of the enclosing function.
162 SourceRange EnclosingFuncRange
;
163 // Set of statements that form the ExtractionZone.
164 llvm::DenseSet
<const Stmt
*> RootStmts
;
166 SourceLocation
getInsertionPoint() const {
167 return EnclosingFuncRange
.getBegin();
169 bool isRootStmt(const Stmt
*S
) const;
170 // The last root statement is important to decide where we need to insert a
171 // semicolon after the extraction.
172 const Node
*getLastRootStmt() const { return Parent
->Children
.back(); }
174 // Checks if declarations inside extraction zone are accessed afterwards.
176 // This performs a partial AST traversal proportional to the size of the
177 // enclosing function, so it is possibly expensive.
178 bool requiresHoisting(const SourceManager
&SM
,
179 const HeuristicResolver
*Resolver
) const {
180 // First find all the declarations that happened inside extraction zone.
181 llvm::SmallSet
<const Decl
*, 1> DeclsInExtZone
;
182 for (auto *RootStmt
: RootStmts
) {
183 findExplicitReferences(
185 [&DeclsInExtZone
](const ReferenceLoc
&Loc
) {
188 DeclsInExtZone
.insert(Loc
.Targets
.front());
192 // Early exit without performing expensive traversal below.
193 if (DeclsInExtZone
.empty())
195 // Then make sure they are not used outside the zone.
196 for (const auto *S
: EnclosingFunction
->getBody()->children()) {
197 if (SM
.isBeforeInTranslationUnit(S
->getSourceRange().getEnd(),
200 bool HasPostUse
= false;
201 findExplicitReferences(
203 [&](const ReferenceLoc
&Loc
) {
205 SM
.isBeforeInTranslationUnit(Loc
.NameLoc
, ZoneRange
.getEnd()))
207 HasPostUse
= llvm::any_of(Loc
.Targets
,
208 [&DeclsInExtZone
](const Decl
*Target
) {
209 return DeclsInExtZone
.contains(Target
);
220 // Whether the code in the extraction zone is guaranteed to return, assuming
221 // no broken control flow (unbound break/continue).
222 // This is a very naive check (does it end with a return stmt).
223 // Doing some rudimentary control flow analysis would cover more cases.
224 bool alwaysReturns(const ExtractionZone
&EZ
) {
225 const Stmt
*Last
= EZ
.getLastRootStmt()->ASTNode
.get
<Stmt
>();
226 // Unwrap enclosing (unconditional) compound statement.
227 while (const auto *CS
= llvm::dyn_cast
<CompoundStmt
>(Last
)) {
228 if (CS
->body_empty())
230 Last
= CS
->body_back();
232 return llvm::isa
<ReturnStmt
>(Last
);
235 bool ExtractionZone::isRootStmt(const Stmt
*S
) const {
236 return RootStmts
.contains(S
);
239 // Finds the function in which the zone lies.
240 const FunctionDecl
*findEnclosingFunction(const Node
*CommonAnc
) {
241 // Walk up the SelectionTree until we find a function Decl
242 for (const Node
*CurNode
= CommonAnc
; CurNode
; CurNode
= CurNode
->Parent
) {
243 // Don't extract from lambdas
244 if (CurNode
->ASTNode
.get
<LambdaExpr
>())
246 if (const FunctionDecl
*Func
= CurNode
->ASTNode
.get
<FunctionDecl
>()) {
247 // FIXME: Support extraction from templated functions.
248 if (Func
->isTemplated())
250 if (!Func
->getBody())
252 for (const auto *S
: Func
->getBody()->children()) {
253 // During apply phase, we perform semantic analysis (e.g. figure out
254 // what variables requires hoisting). We cannot perform those when the
255 // body has invalid statements, so fail up front.
265 // Zone Range is the union of SourceRanges of all child Nodes in Parent since
266 // all child Nodes are RootStmts
267 std::optional
<SourceRange
> findZoneRange(const Node
*Parent
,
268 const SourceManager
&SM
,
269 const LangOptions
&LangOpts
) {
271 if (auto BeginFileRange
= toHalfOpenFileRange(
272 SM
, LangOpts
, Parent
->Children
.front()->ASTNode
.getSourceRange()))
273 SR
.setBegin(BeginFileRange
->getBegin());
276 if (auto EndFileRange
= toHalfOpenFileRange(
277 SM
, LangOpts
, Parent
->Children
.back()->ASTNode
.getSourceRange()))
278 SR
.setEnd(EndFileRange
->getEnd());
284 // Compute the range spanned by the enclosing function.
285 // FIXME: check if EnclosingFunction has any attributes as the AST doesn't
286 // always store the source range of the attributes and thus we end up extracting
287 // between the attributes and the EnclosingFunction.
288 std::optional
<SourceRange
>
289 computeEnclosingFuncRange(const FunctionDecl
*EnclosingFunction
,
290 const SourceManager
&SM
,
291 const LangOptions
&LangOpts
) {
292 return toHalfOpenFileRange(SM
, LangOpts
, EnclosingFunction
->getSourceRange());
295 // returns true if Child can be a single RootStmt being extracted from
297 bool validSingleChild(const Node
*Child
, const FunctionDecl
*EnclosingFunc
) {
298 // Don't extract expressions.
299 // FIXME: We should extract expressions that are "statements" i.e. not
301 if (Child
->ASTNode
.get
<Expr
>())
303 // Extracting the body of EnclosingFunc would remove it's definition.
304 assert(EnclosingFunc
->hasBody() &&
305 "We should always be extracting from a function body.");
306 if (Child
->ASTNode
.get
<Stmt
>() == EnclosingFunc
->getBody())
311 // FIXME: Check we're not extracting from the initializer/condition of a control
313 std::optional
<ExtractionZone
> findExtractionZone(const Node
*CommonAnc
,
314 const SourceManager
&SM
,
315 const LangOptions
&LangOpts
) {
316 ExtractionZone ExtZone
;
317 ExtZone
.Parent
= getParentOfRootStmts(CommonAnc
);
318 if (!ExtZone
.Parent
|| ExtZone
.Parent
->Children
.empty())
320 ExtZone
.EnclosingFunction
= findEnclosingFunction(ExtZone
.Parent
);
321 if (!ExtZone
.EnclosingFunction
)
323 // When there is a single RootStmt, we must check if it's valid for
325 if (ExtZone
.Parent
->Children
.size() == 1 &&
326 !validSingleChild(ExtZone
.getLastRootStmt(), ExtZone
.EnclosingFunction
))
329 computeEnclosingFuncRange(ExtZone
.EnclosingFunction
, SM
, LangOpts
))
330 ExtZone
.EnclosingFuncRange
= *FuncRange
;
331 if (auto ZoneRange
= findZoneRange(ExtZone
.Parent
, SM
, LangOpts
))
332 ExtZone
.ZoneRange
= *ZoneRange
;
333 if (ExtZone
.EnclosingFuncRange
.isInvalid() || ExtZone
.ZoneRange
.isInvalid())
336 for (const Node
*Child
: ExtZone
.Parent
->Children
)
337 ExtZone
.RootStmts
.insert(Child
->ASTNode
.get
<Stmt
>());
342 // Stores information about the extracted function and provides methods for
348 bool PassByReference
;
349 unsigned OrderPriority
; // Lower value parameters are preferred first.
350 std::string
render(const DeclContext
*Context
) const;
351 bool operator<(const Parameter
&Other
) const {
352 return OrderPriority
< Other
.OrderPriority
;
355 std::string Name
= "extracted";
357 std::vector
<Parameter
> Parameters
;
358 SourceRange BodyRange
;
359 SourceLocation DefinitionPoint
;
360 std::optional
<SourceLocation
> ForwardDeclarationPoint
;
361 const CXXRecordDecl
*EnclosingClass
= nullptr;
362 const NestedNameSpecifier
*DefinitionQualifier
= nullptr;
363 const DeclContext
*SemanticDC
= nullptr;
364 const DeclContext
*SyntacticDC
= nullptr;
365 const DeclContext
*ForwardDeclarationSyntacticDC
= nullptr;
366 bool CallerReturnsValue
= false;
368 ConstexprSpecKind Constexpr
= ConstexprSpecKind::Unspecified
;
371 // Decides whether the extracted function body and the function call need a
372 // semicolon after extraction.
373 tooling::ExtractionSemicolonPolicy SemicolonPolicy
;
374 const LangOptions
*LangOpts
;
375 NewFunction(tooling::ExtractionSemicolonPolicy SemicolonPolicy
,
376 const LangOptions
*LangOpts
)
377 : SemicolonPolicy(SemicolonPolicy
), LangOpts(LangOpts
) {}
378 // Render the call for this function.
379 std::string
renderCall() const;
380 // Render the definition for this function.
381 std::string
renderDeclaration(FunctionDeclKind K
,
382 const DeclContext
&SemanticDC
,
383 const DeclContext
&SyntacticDC
,
384 const SourceManager
&SM
) const;
388 renderParametersForDeclaration(const DeclContext
&Enclosing
) const;
389 std::string
renderParametersForCall() const;
390 std::string
renderSpecifiers(FunctionDeclKind K
) const;
391 std::string
renderQualifiers() const;
392 std::string
renderDeclarationName(FunctionDeclKind K
) const;
393 // Generate the function body.
394 std::string
getFuncBody(const SourceManager
&SM
) const;
397 std::string
NewFunction::renderParametersForDeclaration(
398 const DeclContext
&Enclosing
) const {
400 bool NeedCommaBefore
= false;
401 for (const Parameter
&P
: Parameters
) {
404 NeedCommaBefore
= true;
405 Result
+= P
.render(&Enclosing
);
410 std::string
NewFunction::renderParametersForCall() const {
412 bool NeedCommaBefore
= false;
413 for (const Parameter
&P
: Parameters
) {
416 NeedCommaBefore
= true;
422 std::string
NewFunction::renderSpecifiers(FunctionDeclKind K
) const {
423 std::string Attributes
;
425 if (Static
&& K
!= FunctionDeclKind::OutOfLineDefinition
) {
426 Attributes
+= "static ";
430 case ConstexprSpecKind::Unspecified
:
431 case ConstexprSpecKind::Constinit
:
433 case ConstexprSpecKind::Constexpr
:
434 Attributes
+= "constexpr ";
436 case ConstexprSpecKind::Consteval
:
437 Attributes
+= "consteval ";
444 std::string
NewFunction::renderQualifiers() const {
445 std::string Attributes
;
448 Attributes
+= " const";
454 std::string
NewFunction::renderDeclarationName(FunctionDeclKind K
) const {
455 if (DefinitionQualifier
== nullptr || K
!= OutOfLineDefinition
) {
459 std::string QualifierName
;
460 llvm::raw_string_ostream
Oss(QualifierName
);
461 DefinitionQualifier
->print(Oss
, *LangOpts
);
462 return llvm::formatv("{0}{1}", QualifierName
, Name
);
465 std::string
NewFunction::renderCall() const {
467 llvm::formatv("{0}{1}({2}){3}", CallerReturnsValue
? "return " : "", Name
,
468 renderParametersForCall(),
469 (SemicolonPolicy
.isNeededInOriginalFunction() ? ";" : "")));
472 std::string
NewFunction::renderDeclaration(FunctionDeclKind K
,
473 const DeclContext
&SemanticDC
,
474 const DeclContext
&SyntacticDC
,
475 const SourceManager
&SM
) const {
476 std::string Declaration
= std::string(llvm::formatv(
477 "{0}{1} {2}({3}){4}", renderSpecifiers(K
),
478 printType(ReturnType
, SyntacticDC
), renderDeclarationName(K
),
479 renderParametersForDeclaration(SemanticDC
), renderQualifiers()));
482 case ForwardDeclaration
:
483 return std::string(llvm::formatv("{0};\n", Declaration
));
484 case OutOfLineDefinition
:
485 case InlineDefinition
:
487 llvm::formatv("{0} {\n{1}\n}\n", Declaration
, getFuncBody(SM
)));
490 llvm_unreachable("Unsupported FunctionDeclKind enum");
493 std::string
NewFunction::getFuncBody(const SourceManager
&SM
) const {
494 // FIXME: Generate tooling::Replacements instead of std::string to
496 // - add return statement
498 return toSourceCode(SM
, BodyRange
).str() +
499 (SemicolonPolicy
.isNeededInExtractedFunction() ? ";" : "");
502 std::string
NewFunction::Parameter::render(const DeclContext
*Context
) const {
503 return printType(TypeInfo
, *Context
) + (PassByReference
? " &" : " ") + Name
;
506 // Stores captured information about Extraction Zone.
507 struct CapturedZoneInfo
{
508 struct DeclInformation
{
510 ZoneRelative DeclaredIn
;
511 // index of the declaration or first reference.
513 bool IsReferencedInZone
= false;
514 bool IsReferencedInPostZone
= false;
515 // FIXME: Capture mutation information
516 DeclInformation(const Decl
*TheDecl
, ZoneRelative DeclaredIn
,
518 : TheDecl(TheDecl
), DeclaredIn(DeclaredIn
), DeclIndex(DeclIndex
){};
519 // Marks the occurence of a reference for this declaration
520 void markOccurence(ZoneRelative ReferenceLoc
);
522 // Maps Decls to their DeclInfo
523 llvm::DenseMap
<const Decl
*, DeclInformation
> DeclInfoMap
;
524 bool HasReturnStmt
= false; // Are there any return statements in the zone?
525 bool AlwaysReturns
= false; // Does the zone always return?
526 // Control flow is broken if we are extracting a break/continue without a
527 // corresponding parent loop/switch
528 bool BrokenControlFlow
= false;
529 // FIXME: capture TypeAliasDecl and UsingDirectiveDecl
530 // FIXME: Capture type information as well.
531 DeclInformation
*createDeclInfo(const Decl
*D
, ZoneRelative RelativeLoc
);
532 DeclInformation
*getDeclInfoFor(const Decl
*D
);
535 CapturedZoneInfo::DeclInformation
*
536 CapturedZoneInfo::createDeclInfo(const Decl
*D
, ZoneRelative RelativeLoc
) {
537 // The new Decl's index is the size of the map so far.
538 auto InsertionResult
= DeclInfoMap
.insert(
539 {D
, DeclInformation(D
, RelativeLoc
, DeclInfoMap
.size())});
540 // Return the newly created DeclInfo
541 return &InsertionResult
.first
->second
;
544 CapturedZoneInfo::DeclInformation
*
545 CapturedZoneInfo::getDeclInfoFor(const Decl
*D
) {
546 // If the Decl doesn't exist, we
547 auto Iter
= DeclInfoMap
.find(D
);
548 if (Iter
== DeclInfoMap
.end())
550 return &Iter
->second
;
553 void CapturedZoneInfo::DeclInformation::markOccurence(
554 ZoneRelative ReferenceLoc
) {
555 switch (ReferenceLoc
) {
556 case ZoneRelative::Inside
:
557 IsReferencedInZone
= true;
559 case ZoneRelative::After
:
560 IsReferencedInPostZone
= true;
567 bool isLoop(const Stmt
*S
) {
568 return isa
<ForStmt
>(S
) || isa
<DoStmt
>(S
) || isa
<WhileStmt
>(S
) ||
569 isa
<CXXForRangeStmt
>(S
);
572 // Captures information from Extraction Zone
573 CapturedZoneInfo
captureZoneInfo(const ExtractionZone
&ExtZone
) {
574 // We use the ASTVisitor instead of using the selection tree since we need to
575 // find references in the PostZone as well.
576 // FIXME: Check which statements we don't allow to extract.
577 class ExtractionZoneVisitor
578 : public clang::RecursiveASTVisitor
<ExtractionZoneVisitor
> {
580 ExtractionZoneVisitor(const ExtractionZone
&ExtZone
) : ExtZone(ExtZone
) {
581 TraverseDecl(const_cast<FunctionDecl
*>(ExtZone
.EnclosingFunction
));
584 bool TraverseStmt(Stmt
*S
) {
587 bool IsRootStmt
= ExtZone
.isRootStmt(const_cast<const Stmt
*>(S
));
588 // If we are starting traversal of a RootStmt, we are somewhere inside
591 CurrentLocation
= ZoneRelative::Inside
;
592 addToLoopSwitchCounters(S
, 1);
593 // Traverse using base class's TraverseStmt
594 RecursiveASTVisitor::TraverseStmt(S
);
595 addToLoopSwitchCounters(S
, -1);
596 // We set the current location as after since next stmt will either be a
597 // RootStmt (handled at the beginning) or after extractionZone
599 CurrentLocation
= ZoneRelative::After
;
603 // Add Increment to CurNumberOf{Loops,Switch} if statement is
604 // {Loop,Switch} and inside Extraction Zone.
605 void addToLoopSwitchCounters(Stmt
*S
, int Increment
) {
606 if (CurrentLocation
!= ZoneRelative::Inside
)
609 CurNumberOfNestedLoops
+= Increment
;
610 else if (isa
<SwitchStmt
>(S
))
611 CurNumberOfSwitch
+= Increment
;
614 bool VisitDecl(Decl
*D
) {
615 Info
.createDeclInfo(D
, CurrentLocation
);
619 bool VisitDeclRefExpr(DeclRefExpr
*DRE
) {
620 // Find the corresponding Decl and mark it's occurrence.
621 const Decl
*D
= DRE
->getDecl();
622 auto *DeclInfo
= Info
.getDeclInfoFor(D
);
623 // If no Decl was found, the Decl must be outside the enclosingFunc.
625 DeclInfo
= Info
.createDeclInfo(D
, ZoneRelative::OutsideFunc
);
626 DeclInfo
->markOccurence(CurrentLocation
);
627 // FIXME: check if reference mutates the Decl being referred.
631 bool VisitReturnStmt(ReturnStmt
*Return
) {
632 if (CurrentLocation
== ZoneRelative::Inside
)
633 Info
.HasReturnStmt
= true;
637 bool VisitBreakStmt(BreakStmt
*Break
) {
638 // Control flow is broken if break statement is selected without any
639 // parent loop or switch statement.
640 if (CurrentLocation
== ZoneRelative::Inside
&&
641 !(CurNumberOfNestedLoops
|| CurNumberOfSwitch
))
642 Info
.BrokenControlFlow
= true;
646 bool VisitContinueStmt(ContinueStmt
*Continue
) {
647 // Control flow is broken if Continue statement is selected without any
649 if (CurrentLocation
== ZoneRelative::Inside
&& !CurNumberOfNestedLoops
)
650 Info
.BrokenControlFlow
= true;
653 CapturedZoneInfo Info
;
654 const ExtractionZone
&ExtZone
;
655 ZoneRelative CurrentLocation
= ZoneRelative::Before
;
656 // Number of {loop,switch} statements that are currently in the traversal
657 // stack inside Extraction Zone. Used to check for broken control flow.
658 unsigned CurNumberOfNestedLoops
= 0;
659 unsigned CurNumberOfSwitch
= 0;
661 ExtractionZoneVisitor
Visitor(ExtZone
);
662 CapturedZoneInfo Result
= std::move(Visitor
.Info
);
663 Result
.AlwaysReturns
= alwaysReturns(ExtZone
);
667 // Adds parameters to ExtractedFunc.
668 // Returns true if able to find the parameters successfully and no hoisting
670 // FIXME: Check if the declaration has a local/anonymous type
671 bool createParameters(NewFunction
&ExtractedFunc
,
672 const CapturedZoneInfo
&CapturedInfo
) {
673 for (const auto &KeyVal
: CapturedInfo
.DeclInfoMap
) {
674 const auto &DeclInfo
= KeyVal
.second
;
675 // If a Decl was Declared in zone and referenced in post zone, it
676 // needs to be hoisted (we bail out in that case).
677 // FIXME: Support Decl Hoisting.
678 if (DeclInfo
.DeclaredIn
== ZoneRelative::Inside
&&
679 DeclInfo
.IsReferencedInPostZone
)
681 if (!DeclInfo
.IsReferencedInZone
)
682 continue; // no need to pass as parameter, not referenced
683 if (DeclInfo
.DeclaredIn
== ZoneRelative::Inside
||
684 DeclInfo
.DeclaredIn
== ZoneRelative::OutsideFunc
)
685 continue; // no need to pass as parameter, still accessible.
686 // Parameter specific checks.
687 const ValueDecl
*VD
= dyn_cast_or_null
<ValueDecl
>(DeclInfo
.TheDecl
);
688 // Can't parameterise if the Decl isn't a ValueDecl or is a FunctionDecl
689 // (this includes the case of recursive call to EnclosingFunc in Zone).
690 if (!VD
|| isa
<FunctionDecl
>(DeclInfo
.TheDecl
))
692 // Parameter qualifiers are same as the Decl's qualifiers.
693 QualType TypeInfo
= VD
->getType().getNonReferenceType();
694 // FIXME: Need better qualifier checks: check mutated status for
695 // Decl(e.g. was it assigned, passed as nonconst argument, etc)
696 // FIXME: check if parameter will be a non l-value reference.
697 // FIXME: We don't want to always pass variables of types like int,
698 // pointers, etc by reference.
699 bool IsPassedByReference
= true;
700 // We use the index of declaration as the ordering priority for parameters.
701 ExtractedFunc
.Parameters
.push_back({std::string(VD
->getName()), TypeInfo
,
703 DeclInfo
.DeclIndex
});
705 llvm::sort(ExtractedFunc
.Parameters
);
709 // Clangd uses open ranges while ExtractionSemicolonPolicy (in Clang Tooling)
710 // uses closed ranges. Generates the semicolon policy for the extraction and
711 // extends the ZoneRange if necessary.
712 tooling::ExtractionSemicolonPolicy
713 getSemicolonPolicy(ExtractionZone
&ExtZone
, const SourceManager
&SM
,
714 const LangOptions
&LangOpts
) {
715 // Get closed ZoneRange.
716 SourceRange FuncBodyRange
= {ExtZone
.ZoneRange
.getBegin(),
717 ExtZone
.ZoneRange
.getEnd().getLocWithOffset(-1)};
718 auto SemicolonPolicy
= tooling::ExtractionSemicolonPolicy::compute(
719 ExtZone
.getLastRootStmt()->ASTNode
.get
<Stmt
>(), FuncBodyRange
, SM
,
722 ExtZone
.ZoneRange
.setEnd(FuncBodyRange
.getEnd().getLocWithOffset(1));
723 return SemicolonPolicy
;
726 // Generate return type for ExtractedFunc. Return false if unable to do so.
727 bool generateReturnProperties(NewFunction
&ExtractedFunc
,
728 const FunctionDecl
&EnclosingFunc
,
729 const CapturedZoneInfo
&CapturedInfo
) {
730 // If the selected code always returns, we preserve those return statements.
731 // The return type should be the same as the enclosing function.
732 // (Others are possible if there are conversions, but this seems clearest).
733 if (CapturedInfo
.HasReturnStmt
) {
734 // If the return is conditional, neither replacing the code with
735 // `extracted()` nor `return extracted()` is correct.
736 if (!CapturedInfo
.AlwaysReturns
)
738 QualType Ret
= EnclosingFunc
.getReturnType();
739 // Once we support members, it'd be nice to support e.g. extracting a method
740 // of Foo<T> that returns T. But it's not clear when that's safe.
741 if (Ret
->isDependentType())
743 ExtractedFunc
.ReturnType
= Ret
;
746 // FIXME: Generate new return statement if needed.
747 ExtractedFunc
.ReturnType
= EnclosingFunc
.getParentASTContext().VoidTy
;
751 void captureMethodInfo(NewFunction
&ExtractedFunc
,
752 const CXXMethodDecl
*Method
) {
753 ExtractedFunc
.Static
= Method
->isStatic();
754 ExtractedFunc
.Const
= Method
->isConst();
755 ExtractedFunc
.EnclosingClass
= Method
->getParent();
758 // FIXME: add support for adding other function return types besides void.
759 // FIXME: assign the value returned by non void extracted function.
760 llvm::Expected
<NewFunction
> getExtractedFunction(ExtractionZone
&ExtZone
,
761 const SourceManager
&SM
,
762 const LangOptions
&LangOpts
) {
763 CapturedZoneInfo CapturedInfo
= captureZoneInfo(ExtZone
);
764 // Bail out if any break of continue exists
765 if (CapturedInfo
.BrokenControlFlow
)
766 return error("Cannot extract break/continue without corresponding "
767 "loop/switch statement.");
768 NewFunction
ExtractedFunc(getSemicolonPolicy(ExtZone
, SM
, LangOpts
),
771 ExtractedFunc
.SyntacticDC
=
772 ExtZone
.EnclosingFunction
->getLexicalDeclContext();
773 ExtractedFunc
.SemanticDC
= ExtZone
.EnclosingFunction
->getDeclContext();
774 ExtractedFunc
.DefinitionQualifier
= ExtZone
.EnclosingFunction
->getQualifier();
775 ExtractedFunc
.Constexpr
= ExtZone
.EnclosingFunction
->getConstexprKind();
777 if (const auto *Method
=
778 llvm::dyn_cast
<CXXMethodDecl
>(ExtZone
.EnclosingFunction
))
779 captureMethodInfo(ExtractedFunc
, Method
);
781 if (ExtZone
.EnclosingFunction
->isOutOfLine()) {
782 // FIXME: Put the extracted method in a private section if it's a class or
783 // maybe in an anonymous namespace
784 const auto *FirstOriginalDecl
=
785 ExtZone
.EnclosingFunction
->getCanonicalDecl();
787 toHalfOpenFileRange(SM
, LangOpts
, FirstOriginalDecl
->getSourceRange());
789 return error("Declaration is inside a macro");
790 ExtractedFunc
.ForwardDeclarationPoint
= DeclPos
->getBegin();
791 ExtractedFunc
.ForwardDeclarationSyntacticDC
= ExtractedFunc
.SemanticDC
;
794 ExtractedFunc
.BodyRange
= ExtZone
.ZoneRange
;
795 ExtractedFunc
.DefinitionPoint
= ExtZone
.getInsertionPoint();
797 ExtractedFunc
.CallerReturnsValue
= CapturedInfo
.AlwaysReturns
;
798 if (!createParameters(ExtractedFunc
, CapturedInfo
) ||
799 !generateReturnProperties(ExtractedFunc
, *ExtZone
.EnclosingFunction
,
801 return error("Too complex to extract.");
802 return ExtractedFunc
;
805 class ExtractFunction
: public Tweak
{
807 const char *id() const final
;
808 bool prepare(const Selection
&Inputs
) override
;
809 Expected
<Effect
> apply(const Selection
&Inputs
) override
;
810 std::string
title() const override
{ return "Extract to function"; }
811 llvm::StringLiteral
kind() const override
{
812 return CodeAction::REFACTOR_KIND
;
816 ExtractionZone ExtZone
;
819 REGISTER_TWEAK(ExtractFunction
)
820 tooling::Replacement
replaceWithFuncCall(const NewFunction
&ExtractedFunc
,
821 const SourceManager
&SM
,
822 const LangOptions
&LangOpts
) {
823 std::string FuncCall
= ExtractedFunc
.renderCall();
824 return tooling::Replacement(
825 SM
, CharSourceRange(ExtractedFunc
.BodyRange
, false), FuncCall
, LangOpts
);
828 tooling::Replacement
createFunctionDefinition(const NewFunction
&ExtractedFunc
,
829 const SourceManager
&SM
) {
830 FunctionDeclKind DeclKind
= InlineDefinition
;
831 if (ExtractedFunc
.ForwardDeclarationPoint
)
832 DeclKind
= OutOfLineDefinition
;
833 std::string FunctionDef
= ExtractedFunc
.renderDeclaration(
834 DeclKind
, *ExtractedFunc
.SemanticDC
, *ExtractedFunc
.SyntacticDC
, SM
);
836 return tooling::Replacement(SM
, ExtractedFunc
.DefinitionPoint
, 0,
840 tooling::Replacement
createForwardDeclaration(const NewFunction
&ExtractedFunc
,
841 const SourceManager
&SM
) {
842 std::string FunctionDecl
= ExtractedFunc
.renderDeclaration(
843 ForwardDeclaration
, *ExtractedFunc
.SemanticDC
,
844 *ExtractedFunc
.ForwardDeclarationSyntacticDC
, SM
);
845 SourceLocation DeclPoint
= *ExtractedFunc
.ForwardDeclarationPoint
;
847 return tooling::Replacement(SM
, DeclPoint
, 0, FunctionDecl
);
850 // Returns true if ExtZone contains any ReturnStmts.
851 bool hasReturnStmt(const ExtractionZone
&ExtZone
) {
852 class ReturnStmtVisitor
853 : public clang::RecursiveASTVisitor
<ReturnStmtVisitor
> {
855 bool VisitReturnStmt(ReturnStmt
*Return
) {
857 return false; // We found the answer, abort the scan.
863 for (const Stmt
*RootStmt
: ExtZone
.RootStmts
) {
864 V
.TraverseStmt(const_cast<Stmt
*>(RootStmt
));
871 bool ExtractFunction::prepare(const Selection
&Inputs
) {
872 const LangOptions
&LangOpts
= Inputs
.AST
->getLangOpts();
873 if (!LangOpts
.CPlusPlus
)
875 const Node
*CommonAnc
= Inputs
.ASTSelection
.commonAncestor();
876 const SourceManager
&SM
= Inputs
.AST
->getSourceManager();
877 auto MaybeExtZone
= findExtractionZone(CommonAnc
, SM
, LangOpts
);
879 (hasReturnStmt(*MaybeExtZone
) && !alwaysReturns(*MaybeExtZone
)))
882 // FIXME: Get rid of this check once we support hoisting.
883 if (MaybeExtZone
->requiresHoisting(SM
, Inputs
.AST
->getHeuristicResolver()))
886 ExtZone
= std::move(*MaybeExtZone
);
890 Expected
<Tweak::Effect
> ExtractFunction::apply(const Selection
&Inputs
) {
891 const SourceManager
&SM
= Inputs
.AST
->getSourceManager();
892 const LangOptions
&LangOpts
= Inputs
.AST
->getLangOpts();
893 auto ExtractedFunc
= getExtractedFunction(ExtZone
, SM
, LangOpts
);
894 // FIXME: Add more types of errors.
896 return ExtractedFunc
.takeError();
897 tooling::Replacements Edit
;
898 if (auto Err
= Edit
.add(createFunctionDefinition(*ExtractedFunc
, SM
)))
899 return std::move(Err
);
900 if (auto Err
= Edit
.add(replaceWithFuncCall(*ExtractedFunc
, SM
, LangOpts
)))
901 return std::move(Err
);
903 if (auto FwdLoc
= ExtractedFunc
->ForwardDeclarationPoint
) {
904 // If the fwd-declaration goes in the same file, merge into Replacements.
905 // Otherwise it needs to be a separate file edit.
906 if (SM
.isWrittenInSameFile(ExtractedFunc
->DefinitionPoint
, *FwdLoc
)) {
907 if (auto Err
= Edit
.add(createForwardDeclaration(*ExtractedFunc
, SM
)))
908 return std::move(Err
);
910 auto MultiFileEffect
= Effect::mainFileEdit(SM
, std::move(Edit
));
911 if (!MultiFileEffect
)
912 return MultiFileEffect
.takeError();
914 tooling::Replacements
OtherEdit(
915 createForwardDeclaration(*ExtractedFunc
, SM
));
916 if (auto PathAndEdit
= Tweak::Effect::fileEdit(SM
, SM
.getFileID(*FwdLoc
),
918 MultiFileEffect
->ApplyEdits
.try_emplace(PathAndEdit
->first
,
919 PathAndEdit
->second
);
921 return PathAndEdit
.takeError();
922 return MultiFileEffect
;
925 return Effect::mainFileEdit(SM
, std::move(Edit
));
929 } // namespace clangd