1 //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interface to tear out a code region, such as an
10 // individual loop or a parallel section, into a new function, replacing it with
11 // a call to the new function.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/CodeExtractor.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/IR/Argument.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/Constant.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DIBuilder.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DebugInfo.h"
35 #include "llvm/IR/DebugInfoMetadata.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/InstIterator.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/LLVMContext.h"
47 #include "llvm/IR/MDBuilder.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/PatternMatch.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/User.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/IR/Verifier.h"
54 #include "llvm/Support/BlockFrequency.h"
55 #include "llvm/Support/BranchProbability.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
70 using namespace llvm::PatternMatch
;
71 using ProfileCount
= Function::ProfileCount
;
73 #define DEBUG_TYPE "code-extractor"
75 // Provide a command-line option to aggregate function arguments into a struct
76 // for functions produced by the code extractor. This is useful when converting
77 // extracted functions to pthread-based code, as only one argument (void*) can
78 // be passed in to pthread_create().
80 AggregateArgsOpt("aggregate-extracted-args", cl::Hidden
,
81 cl::desc("Aggregate arguments to code-extracted functions"));
83 /// Test whether a block is valid for extraction.
84 static bool isBlockValidForExtraction(const BasicBlock
&BB
,
85 const SetVector
<BasicBlock
*> &Result
,
86 bool AllowVarArgs
, bool AllowAlloca
) {
87 // taking the address of a basic block moved to another function is illegal
88 if (BB
.hasAddressTaken())
91 // don't hoist code that uses another basicblock address, as it's likely to
92 // lead to unexpected behavior, like cross-function jumps
93 SmallPtrSet
<User
const *, 16> Visited
;
94 SmallVector
<User
const *, 16> ToVisit
;
96 for (Instruction
const &Inst
: BB
)
97 ToVisit
.push_back(&Inst
);
99 while (!ToVisit
.empty()) {
100 User
const *Curr
= ToVisit
.pop_back_val();
101 if (!Visited
.insert(Curr
).second
)
103 if (isa
<BlockAddress
const>(Curr
))
104 return false; // even a reference to self is likely to be not compatible
106 if (isa
<Instruction
>(Curr
) && cast
<Instruction
>(Curr
)->getParent() != &BB
)
109 for (auto const &U
: Curr
->operands()) {
110 if (auto *UU
= dyn_cast
<User
>(U
))
111 ToVisit
.push_back(UU
);
115 // If explicitly requested, allow vastart and alloca. For invoke instructions
116 // verify that extraction is valid.
117 for (BasicBlock::const_iterator I
= BB
.begin(), E
= BB
.end(); I
!= E
; ++I
) {
118 if (isa
<AllocaInst
>(I
)) {
124 if (const auto *II
= dyn_cast
<InvokeInst
>(I
)) {
125 // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
126 // must be a part of the subgraph which is being extracted.
127 if (auto *UBB
= II
->getUnwindDest())
128 if (!Result
.count(UBB
))
133 // All catch handlers of a catchswitch instruction as well as the unwind
134 // destination must be in the subgraph.
135 if (const auto *CSI
= dyn_cast
<CatchSwitchInst
>(I
)) {
136 if (auto *UBB
= CSI
->getUnwindDest())
137 if (!Result
.count(UBB
))
139 for (const auto *HBB
: CSI
->handlers())
140 if (!Result
.count(const_cast<BasicBlock
*>(HBB
)))
145 // Make sure that entire catch handler is within subgraph. It is sufficient
146 // to check that catch return's block is in the list.
147 if (const auto *CPI
= dyn_cast
<CatchPadInst
>(I
)) {
148 for (const auto *U
: CPI
->users())
149 if (const auto *CRI
= dyn_cast
<CatchReturnInst
>(U
))
150 if (!Result
.count(const_cast<BasicBlock
*>(CRI
->getParent())))
155 // And do similar checks for cleanup handler - the entire handler must be
156 // in subgraph which is going to be extracted. For cleanup return should
157 // additionally check that the unwind destination is also in the subgraph.
158 if (const auto *CPI
= dyn_cast
<CleanupPadInst
>(I
)) {
159 for (const auto *U
: CPI
->users())
160 if (const auto *CRI
= dyn_cast
<CleanupReturnInst
>(U
))
161 if (!Result
.count(const_cast<BasicBlock
*>(CRI
->getParent())))
165 if (const auto *CRI
= dyn_cast
<CleanupReturnInst
>(I
)) {
166 if (auto *UBB
= CRI
->getUnwindDest())
167 if (!Result
.count(UBB
))
172 if (const CallInst
*CI
= dyn_cast
<CallInst
>(I
)) {
173 if (const Function
*F
= CI
->getCalledFunction()) {
174 auto IID
= F
->getIntrinsicID();
175 if (IID
== Intrinsic::vastart
) {
182 // Currently, we miscompile outlined copies of eh_typid_for. There are
183 // proposals for fixing this in llvm.org/PR39545.
184 if (IID
== Intrinsic::eh_typeid_for
)
193 /// Build a set of blocks to extract if the input blocks are viable.
194 static SetVector
<BasicBlock
*>
195 buildExtractionBlockSet(ArrayRef
<BasicBlock
*> BBs
, DominatorTree
*DT
,
196 bool AllowVarArgs
, bool AllowAlloca
) {
197 assert(!BBs
.empty() && "The set of blocks to extract must be non-empty");
198 SetVector
<BasicBlock
*> Result
;
200 // Loop over the blocks, adding them to our set-vector, and aborting with an
201 // empty set if we encounter invalid blocks.
202 for (BasicBlock
*BB
: BBs
) {
203 // If this block is dead, don't process it.
204 if (DT
&& !DT
->isReachableFromEntry(BB
))
207 if (!Result
.insert(BB
))
208 llvm_unreachable("Repeated basic blocks in extraction input");
211 LLVM_DEBUG(dbgs() << "Region front block: " << Result
.front()->getName()
214 for (auto *BB
: Result
) {
215 if (!isBlockValidForExtraction(*BB
, Result
, AllowVarArgs
, AllowAlloca
))
218 // Make sure that the first block is not a landing pad.
219 if (BB
== Result
.front()) {
221 LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
227 // All blocks other than the first must not have predecessors outside of
228 // the subgraph which is being extracted.
229 for (auto *PBB
: predecessors(BB
))
230 if (!Result
.count(PBB
)) {
231 LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
232 "outside the region except for the first block!\n"
233 << "Problematic source BB: " << BB
->getName() << "\n"
234 << "Problematic destination BB: " << PBB
->getName()
243 CodeExtractor::CodeExtractor(ArrayRef
<BasicBlock
*> BBs
, DominatorTree
*DT
,
244 bool AggregateArgs
, BlockFrequencyInfo
*BFI
,
245 BranchProbabilityInfo
*BPI
, AssumptionCache
*AC
,
246 bool AllowVarArgs
, bool AllowAlloca
,
247 BasicBlock
*AllocationBlock
, std::string Suffix
,
248 bool ArgsInZeroAddressSpace
)
249 : DT(DT
), AggregateArgs(AggregateArgs
|| AggregateArgsOpt
), BFI(BFI
),
250 BPI(BPI
), AC(AC
), AllocationBlock(AllocationBlock
),
251 AllowVarArgs(AllowVarArgs
),
252 Blocks(buildExtractionBlockSet(BBs
, DT
, AllowVarArgs
, AllowAlloca
)),
253 Suffix(Suffix
), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace
) {}
255 /// definedInRegion - Return true if the specified value is defined in the
256 /// extracted region.
257 static bool definedInRegion(const SetVector
<BasicBlock
*> &Blocks
, Value
*V
) {
258 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
259 if (Blocks
.count(I
->getParent()))
264 /// definedInCaller - Return true if the specified value is defined in the
265 /// function being code extracted, but not in the region being extracted.
266 /// These values must be passed in as live-ins to the function.
267 static bool definedInCaller(const SetVector
<BasicBlock
*> &Blocks
, Value
*V
) {
268 if (isa
<Argument
>(V
)) return true;
269 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
270 if (!Blocks
.count(I
->getParent()))
275 static BasicBlock
*getCommonExitBlock(const SetVector
<BasicBlock
*> &Blocks
) {
276 BasicBlock
*CommonExitBlock
= nullptr;
277 auto hasNonCommonExitSucc
= [&](BasicBlock
*Block
) {
278 for (auto *Succ
: successors(Block
)) {
279 // Internal edges, ok.
280 if (Blocks
.count(Succ
))
282 if (!CommonExitBlock
) {
283 CommonExitBlock
= Succ
;
286 if (CommonExitBlock
!= Succ
)
292 if (any_of(Blocks
, hasNonCommonExitSucc
))
295 return CommonExitBlock
;
298 CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function
&F
) {
299 for (BasicBlock
&BB
: F
) {
300 for (Instruction
&II
: BB
.instructionsWithoutDebug())
301 if (auto *AI
= dyn_cast
<AllocaInst
>(&II
))
302 Allocas
.push_back(AI
);
304 findSideEffectInfoForBlock(BB
);
308 void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock
&BB
) {
309 for (Instruction
&II
: BB
.instructionsWithoutDebug()) {
310 unsigned Opcode
= II
.getOpcode();
311 Value
*MemAddr
= nullptr;
313 case Instruction::Store
:
314 case Instruction::Load
: {
315 if (Opcode
== Instruction::Store
) {
316 StoreInst
*SI
= cast
<StoreInst
>(&II
);
317 MemAddr
= SI
->getPointerOperand();
319 LoadInst
*LI
= cast
<LoadInst
>(&II
);
320 MemAddr
= LI
->getPointerOperand();
322 // Global variable can not be aliased with locals.
323 if (isa
<Constant
>(MemAddr
))
325 Value
*Base
= MemAddr
->stripInBoundsConstantOffsets();
326 if (!isa
<AllocaInst
>(Base
)) {
327 SideEffectingBlocks
.insert(&BB
);
330 BaseMemAddrs
[&BB
].insert(Base
);
334 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(&II
);
336 if (IntrInst
->isLifetimeStartOrEnd())
338 SideEffectingBlocks
.insert(&BB
);
341 // Treat all the other cases conservatively if it has side effects.
342 if (II
.mayHaveSideEffects()) {
343 SideEffectingBlocks
.insert(&BB
);
351 bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
352 BasicBlock
&BB
, AllocaInst
*Addr
) const {
353 if (SideEffectingBlocks
.count(&BB
))
355 auto It
= BaseMemAddrs
.find(&BB
);
356 if (It
!= BaseMemAddrs
.end())
357 return It
->second
.count(Addr
);
361 bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
362 const CodeExtractorAnalysisCache
&CEAC
, Instruction
*Addr
) const {
363 AllocaInst
*AI
= cast
<AllocaInst
>(Addr
->stripInBoundsConstantOffsets());
364 Function
*Func
= (*Blocks
.begin())->getParent();
365 for (BasicBlock
&BB
: *Func
) {
366 if (Blocks
.count(&BB
))
368 if (CEAC
.doesBlockContainClobberOfAddr(BB
, AI
))
375 CodeExtractor::findOrCreateBlockForHoisting(BasicBlock
*CommonExitBlock
) {
376 BasicBlock
*SinglePredFromOutlineRegion
= nullptr;
377 assert(!Blocks
.count(CommonExitBlock
) &&
378 "Expect a block outside the region!");
379 for (auto *Pred
: predecessors(CommonExitBlock
)) {
380 if (!Blocks
.count(Pred
))
382 if (!SinglePredFromOutlineRegion
) {
383 SinglePredFromOutlineRegion
= Pred
;
384 } else if (SinglePredFromOutlineRegion
!= Pred
) {
385 SinglePredFromOutlineRegion
= nullptr;
390 if (SinglePredFromOutlineRegion
)
391 return SinglePredFromOutlineRegion
;
394 auto getFirstPHI
= [](BasicBlock
*BB
) {
395 BasicBlock::iterator I
= BB
->begin();
396 PHINode
*FirstPhi
= nullptr;
397 while (I
!= BB
->end()) {
398 PHINode
*Phi
= dyn_cast
<PHINode
>(I
);
408 // If there are any phi nodes, the single pred either exists or has already
409 // be created before code extraction.
410 assert(!getFirstPHI(CommonExitBlock
) && "Phi not expected");
413 BasicBlock
*NewExitBlock
= CommonExitBlock
->splitBasicBlock(
414 CommonExitBlock
->getFirstNonPHI()->getIterator());
416 for (BasicBlock
*Pred
:
417 llvm::make_early_inc_range(predecessors(CommonExitBlock
))) {
418 if (Blocks
.count(Pred
))
420 Pred
->getTerminator()->replaceUsesOfWith(CommonExitBlock
, NewExitBlock
);
422 // Now add the old exit block to the outline region.
423 Blocks
.insert(CommonExitBlock
);
424 return CommonExitBlock
;
427 // Find the pair of life time markers for address 'Addr' that are either
428 // defined inside the outline region or can legally be shrinkwrapped into the
429 // outline region. If there are not other untracked uses of the address, return
430 // the pair of markers if found; otherwise return a pair of nullptr.
431 CodeExtractor::LifetimeMarkerInfo
432 CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache
&CEAC
,
434 BasicBlock
*ExitBlock
) const {
435 LifetimeMarkerInfo Info
;
437 for (User
*U
: Addr
->users()) {
438 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(U
);
440 // We don't model addresses with multiple start/end markers, but the
441 // markers do not need to be in the region.
442 if (IntrInst
->getIntrinsicID() == Intrinsic::lifetime_start
) {
445 Info
.LifeStart
= IntrInst
;
448 if (IntrInst
->getIntrinsicID() == Intrinsic::lifetime_end
) {
451 Info
.LifeEnd
= IntrInst
;
454 // At this point, permit debug uses outside of the region.
455 // This is fixed in a later call to fixupDebugInfoPostExtraction().
456 if (isa
<DbgInfoIntrinsic
>(IntrInst
))
459 // Find untracked uses of the address, bail.
460 if (!definedInRegion(Blocks
, U
))
464 if (!Info
.LifeStart
|| !Info
.LifeEnd
)
467 Info
.SinkLifeStart
= !definedInRegion(Blocks
, Info
.LifeStart
);
468 Info
.HoistLifeEnd
= !definedInRegion(Blocks
, Info
.LifeEnd
);
469 // Do legality check.
470 if ((Info
.SinkLifeStart
|| Info
.HoistLifeEnd
) &&
471 !isLegalToShrinkwrapLifetimeMarkers(CEAC
, Addr
))
474 // Check to see if we have a place to do hoisting, if not, bail.
475 if (Info
.HoistLifeEnd
&& !ExitBlock
)
481 void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache
&CEAC
,
482 ValueSet
&SinkCands
, ValueSet
&HoistCands
,
483 BasicBlock
*&ExitBlock
) const {
484 Function
*Func
= (*Blocks
.begin())->getParent();
485 ExitBlock
= getCommonExitBlock(Blocks
);
487 auto moveOrIgnoreLifetimeMarkers
=
488 [&](const LifetimeMarkerInfo
&LMI
) -> bool {
491 if (LMI
.SinkLifeStart
) {
492 LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI
.LifeStart
494 SinkCands
.insert(LMI
.LifeStart
);
496 if (LMI
.HoistLifeEnd
) {
497 LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI
.LifeEnd
<< "\n");
498 HoistCands
.insert(LMI
.LifeEnd
);
503 // Look up allocas in the original function in CodeExtractorAnalysisCache, as
504 // this is much faster than walking all the instructions.
505 for (AllocaInst
*AI
: CEAC
.getAllocas()) {
506 BasicBlock
*BB
= AI
->getParent();
507 if (Blocks
.count(BB
))
510 // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
511 // check whether it is actually still in the original function.
512 Function
*AIFunc
= BB
->getParent();
516 LifetimeMarkerInfo MarkerInfo
= getLifetimeMarkers(CEAC
, AI
, ExitBlock
);
517 bool Moved
= moveOrIgnoreLifetimeMarkers(MarkerInfo
);
519 LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI
<< "\n");
520 SinkCands
.insert(AI
);
524 // Find bitcasts in the outlined region that have lifetime marker users
525 // outside that region. Replace the lifetime marker use with an
526 // outside region bitcast to avoid unnecessary alloca/reload instructions
527 // and extra lifetime markers.
528 SmallVector
<Instruction
*, 2> LifetimeBitcastUsers
;
529 for (User
*U
: AI
->users()) {
530 if (!definedInRegion(Blocks
, U
))
533 if (U
->stripInBoundsConstantOffsets() != AI
)
536 Instruction
*Bitcast
= cast
<Instruction
>(U
);
537 for (User
*BU
: Bitcast
->users()) {
538 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(BU
);
542 if (!IntrInst
->isLifetimeStartOrEnd())
545 if (definedInRegion(Blocks
, IntrInst
))
548 LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
549 << *Bitcast
<< " in out-of-region lifetime marker "
550 << *IntrInst
<< "\n");
551 LifetimeBitcastUsers
.push_back(IntrInst
);
555 for (Instruction
*I
: LifetimeBitcastUsers
) {
556 Module
*M
= AIFunc
->getParent();
557 LLVMContext
&Ctx
= M
->getContext();
558 auto *Int8PtrTy
= PointerType::getUnqual(Ctx
);
560 CastInst::CreatePointerCast(AI
, Int8PtrTy
, "lt.cast", I
->getIterator());
561 I
->replaceUsesOfWith(I
->getOperand(1), CastI
);
564 // Follow any bitcasts.
565 SmallVector
<Instruction
*, 2> Bitcasts
;
566 SmallVector
<LifetimeMarkerInfo
, 2> BitcastLifetimeInfo
;
567 for (User
*U
: AI
->users()) {
568 if (U
->stripInBoundsConstantOffsets() == AI
) {
569 Instruction
*Bitcast
= cast
<Instruction
>(U
);
570 LifetimeMarkerInfo LMI
= getLifetimeMarkers(CEAC
, Bitcast
, ExitBlock
);
572 Bitcasts
.push_back(Bitcast
);
573 BitcastLifetimeInfo
.push_back(LMI
);
578 // Found unknown use of AI.
579 if (!definedInRegion(Blocks
, U
)) {
585 // Either no bitcasts reference the alloca or there are unknown uses.
586 if (Bitcasts
.empty())
589 LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI
<< "\n");
590 SinkCands
.insert(AI
);
591 for (unsigned I
= 0, E
= Bitcasts
.size(); I
!= E
; ++I
) {
592 Instruction
*BitcastAddr
= Bitcasts
[I
];
593 const LifetimeMarkerInfo
&LMI
= BitcastLifetimeInfo
[I
];
594 assert(LMI
.LifeStart
&&
595 "Unsafe to sink bitcast without lifetime markers");
596 moveOrIgnoreLifetimeMarkers(LMI
);
597 if (!definedInRegion(Blocks
, BitcastAddr
)) {
598 LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
600 SinkCands
.insert(BitcastAddr
);
606 bool CodeExtractor::isEligible() const {
609 BasicBlock
*Header
= *Blocks
.begin();
610 Function
*F
= Header
->getParent();
612 // For functions with varargs, check that varargs handling is only done in the
613 // outlined function, i.e vastart and vaend are only used in outlined blocks.
614 if (AllowVarArgs
&& F
->getFunctionType()->isVarArg()) {
615 auto containsVarArgIntrinsic
= [](const Instruction
&I
) {
616 if (const CallInst
*CI
= dyn_cast
<CallInst
>(&I
))
617 if (const Function
*Callee
= CI
->getCalledFunction())
618 return Callee
->getIntrinsicID() == Intrinsic::vastart
||
619 Callee
->getIntrinsicID() == Intrinsic::vaend
;
623 for (auto &BB
: *F
) {
624 if (Blocks
.count(&BB
))
626 if (llvm::any_of(BB
, containsVarArgIntrinsic
))
630 // stacksave as input implies stackrestore in the outlined function.
631 // This can confuse prolog epilog insertion phase.
632 // stacksave's uses must not cross outlined function.
633 for (BasicBlock
*BB
: Blocks
) {
634 for (Instruction
&I
: *BB
) {
635 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(&I
);
638 bool IsSave
= II
->getIntrinsicID() == Intrinsic::stacksave
;
639 bool IsRestore
= II
->getIntrinsicID() == Intrinsic::stackrestore
;
640 if (IsSave
&& any_of(II
->users(), [&Blks
= this->Blocks
](User
*U
) {
641 return !definedInRegion(Blks
, U
);
644 if (IsRestore
&& !definedInRegion(Blocks
, II
->getArgOperand(0)))
651 void CodeExtractor::findInputsOutputs(ValueSet
&Inputs
, ValueSet
&Outputs
,
652 const ValueSet
&SinkCands
,
653 bool CollectGlobalInputs
) const {
654 for (BasicBlock
*BB
: Blocks
) {
655 // If a used value is defined outside the region, it's an input. If an
656 // instruction is used outside the region, it's an output.
657 for (Instruction
&II
: *BB
) {
658 for (auto &OI
: II
.operands()) {
660 if (!SinkCands
.count(V
) &&
661 (definedInCaller(Blocks
, V
) ||
662 (CollectGlobalInputs
&& llvm::isa
<llvm::GlobalVariable
>(V
))))
666 for (User
*U
: II
.users())
667 if (!definedInRegion(Blocks
, U
)) {
675 /// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
676 /// of the region, we need to split the entry block of the region so that the
677 /// PHI node is easier to deal with.
678 void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock
*&Header
) {
679 unsigned NumPredsFromRegion
= 0;
680 unsigned NumPredsOutsideRegion
= 0;
682 if (Header
!= &Header
->getParent()->getEntryBlock()) {
683 PHINode
*PN
= dyn_cast
<PHINode
>(Header
->begin());
684 if (!PN
) return; // No PHI nodes.
686 // If the header node contains any PHI nodes, check to see if there is more
687 // than one entry from outside the region. If so, we need to sever the
688 // header block into two.
689 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
690 if (Blocks
.count(PN
->getIncomingBlock(i
)))
691 ++NumPredsFromRegion
;
693 ++NumPredsOutsideRegion
;
695 // If there is one (or fewer) predecessor from outside the region, we don't
696 // need to do anything special.
697 if (NumPredsOutsideRegion
<= 1) return;
700 // Otherwise, we need to split the header block into two pieces: one
701 // containing PHI nodes merging values from outside of the region, and a
702 // second that contains all of the code for the block and merges back any
703 // incoming values from inside of the region.
704 BasicBlock
*NewBB
= SplitBlock(Header
, Header
->getFirstNonPHI(), DT
);
706 // We only want to code extract the second block now, and it becomes the new
707 // header of the region.
708 BasicBlock
*OldPred
= Header
;
709 Blocks
.remove(OldPred
);
710 Blocks
.insert(NewBB
);
713 // Okay, now we need to adjust the PHI nodes and any branches from within the
714 // region to go to the new header block instead of the old header block.
715 if (NumPredsFromRegion
) {
716 PHINode
*PN
= cast
<PHINode
>(OldPred
->begin());
717 // Loop over all of the predecessors of OldPred that are in the region,
718 // changing them to branch to NewBB instead.
719 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
720 if (Blocks
.count(PN
->getIncomingBlock(i
))) {
721 Instruction
*TI
= PN
->getIncomingBlock(i
)->getTerminator();
722 TI
->replaceUsesOfWith(OldPred
, NewBB
);
725 // Okay, everything within the region is now branching to the right block, we
726 // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
727 BasicBlock::iterator AfterPHIs
;
728 for (AfterPHIs
= OldPred
->begin(); isa
<PHINode
>(AfterPHIs
); ++AfterPHIs
) {
729 PHINode
*PN
= cast
<PHINode
>(AfterPHIs
);
730 // Create a new PHI node in the new region, which has an incoming value
731 // from OldPred of PN.
732 PHINode
*NewPN
= PHINode::Create(PN
->getType(), 1 + NumPredsFromRegion
,
733 PN
->getName() + ".ce");
734 NewPN
->insertBefore(NewBB
->begin());
735 PN
->replaceAllUsesWith(NewPN
);
736 NewPN
->addIncoming(PN
, OldPred
);
738 // Loop over all of the incoming value in PN, moving them to NewPN if they
739 // are from the extracted region.
740 for (unsigned i
= 0; i
!= PN
->getNumIncomingValues(); ++i
) {
741 if (Blocks
.count(PN
->getIncomingBlock(i
))) {
742 NewPN
->addIncoming(PN
->getIncomingValue(i
), PN
->getIncomingBlock(i
));
743 PN
->removeIncomingValue(i
);
751 /// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
752 /// outlined region, we split these PHIs on two: one with inputs from region
753 /// and other with remaining incoming blocks; then first PHIs are placed in
755 void CodeExtractor::severSplitPHINodesOfExits() {
756 for (BasicBlock
*ExitBB
: ExtractedFuncRetVals
) {
757 BasicBlock
*NewBB
= nullptr;
759 for (PHINode
&PN
: ExitBB
->phis()) {
760 // Find all incoming values from the outlining region.
761 SmallVector
<unsigned, 2> IncomingVals
;
762 for (unsigned i
= 0; i
< PN
.getNumIncomingValues(); ++i
)
763 if (Blocks
.count(PN
.getIncomingBlock(i
)))
764 IncomingVals
.push_back(i
);
766 // Do not process PHI if there is one (or fewer) predecessor from region.
767 // If PHI has exactly one predecessor from region, only this one incoming
768 // will be replaced on codeRepl block, so it should be safe to skip PHI.
769 if (IncomingVals
.size() <= 1)
772 // Create block for new PHIs and add it to the list of outlined if it
773 // wasn't done before.
775 NewBB
= BasicBlock::Create(ExitBB
->getContext(),
776 ExitBB
->getName() + ".split",
777 ExitBB
->getParent(), ExitBB
);
778 NewBB
->IsNewDbgInfoFormat
= ExitBB
->IsNewDbgInfoFormat
;
779 SmallVector
<BasicBlock
*, 4> Preds(predecessors(ExitBB
));
780 for (BasicBlock
*PredBB
: Preds
)
781 if (Blocks
.count(PredBB
))
782 PredBB
->getTerminator()->replaceUsesOfWith(ExitBB
, NewBB
);
783 BranchInst::Create(ExitBB
, NewBB
);
784 Blocks
.insert(NewBB
);
788 PHINode
*NewPN
= PHINode::Create(PN
.getType(), IncomingVals
.size(),
789 PN
.getName() + ".ce");
790 NewPN
->insertBefore(NewBB
->getFirstNonPHIIt());
791 for (unsigned i
: IncomingVals
)
792 NewPN
->addIncoming(PN
.getIncomingValue(i
), PN
.getIncomingBlock(i
));
793 for (unsigned i
: reverse(IncomingVals
))
794 PN
.removeIncomingValue(i
, false);
795 PN
.addIncoming(NewPN
, NewBB
);
800 void CodeExtractor::splitReturnBlocks() {
801 for (BasicBlock
*Block
: Blocks
)
802 if (ReturnInst
*RI
= dyn_cast
<ReturnInst
>(Block
->getTerminator())) {
804 Block
->splitBasicBlock(RI
->getIterator(), Block
->getName() + ".ret");
806 // Old dominates New. New node dominates all other nodes dominated
808 DomTreeNode
*OldNode
= DT
->getNode(Block
);
809 SmallVector
<DomTreeNode
*, 8> Children(OldNode
->begin(),
812 DomTreeNode
*NewNode
= DT
->addNewBlock(New
, Block
);
814 for (DomTreeNode
*I
: Children
)
815 DT
->changeImmediateDominator(I
, NewNode
);
820 Function
*CodeExtractor::constructFunctionDeclaration(
821 const ValueSet
&inputs
, const ValueSet
&outputs
, BlockFrequency EntryFreq
,
822 const Twine
&Name
, ValueSet
&StructValues
, StructType
*&StructTy
) {
823 LLVM_DEBUG(dbgs() << "inputs: " << inputs
.size() << "\n");
824 LLVM_DEBUG(dbgs() << "outputs: " << outputs
.size() << "\n");
826 Function
*oldFunction
= Blocks
.front()->getParent();
827 Module
*M
= Blocks
.front()->getModule();
829 // Assemble the function's parameter lists.
830 std::vector
<Type
*> ParamTy
;
831 std::vector
<Type
*> AggParamTy
;
832 const DataLayout
&DL
= M
->getDataLayout();
834 // Add the types of the input values to the function's argument list
835 for (Value
*value
: inputs
) {
836 LLVM_DEBUG(dbgs() << "value used in func: " << *value
<< "\n");
837 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(value
)) {
838 AggParamTy
.push_back(value
->getType());
839 StructValues
.insert(value
);
841 ParamTy
.push_back(value
->getType());
844 // Add the types of the output values to the function's argument list.
845 for (Value
*output
: outputs
) {
846 LLVM_DEBUG(dbgs() << "instr used in func: " << *output
<< "\n");
847 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(output
)) {
848 AggParamTy
.push_back(output
->getType());
849 StructValues
.insert(output
);
852 PointerType::get(output
->getType(), DL
.getAllocaAddrSpace()));
856 (ParamTy
.size() + AggParamTy
.size()) ==
857 (inputs
.size() + outputs
.size()) &&
858 "Number of scalar and aggregate params does not match inputs, outputs");
859 assert((StructValues
.empty() || AggregateArgs
) &&
860 "Expeced StructValues only with AggregateArgs set");
862 // Concatenate scalar and aggregate params in ParamTy.
863 if (!AggParamTy
.empty()) {
864 StructTy
= StructType::get(M
->getContext(), AggParamTy
);
865 ParamTy
.push_back(PointerType::get(
866 StructTy
, ArgsInZeroAddressSpace
? 0 : DL
.getAllocaAddrSpace()));
869 Type
*RetTy
= getSwitchType();
871 dbgs() << "Function type: " << *RetTy
<< " f(";
872 for (Type
*i
: ParamTy
)
873 dbgs() << *i
<< ", ";
877 FunctionType
*funcType
= FunctionType::get(
878 RetTy
, ParamTy
, AllowVarArgs
&& oldFunction
->isVarArg());
880 // Create the new function
881 Function
*newFunction
=
882 Function::Create(funcType
, GlobalValue::InternalLinkage
,
883 oldFunction
->getAddressSpace(), Name
, M
);
885 // Propagate personality info to the new function if there is one.
886 if (oldFunction
->hasPersonalityFn())
887 newFunction
->setPersonalityFn(oldFunction
->getPersonalityFn());
889 // Inherit all of the target dependent attributes and white-listed
890 // target independent attributes.
891 // (e.g. If the extracted region contains a call to an x86.sse
892 // instruction we need to make sure that the extracted region has the
893 // "target-features" attribute allowing it to be lowered.
894 // FIXME: This should be changed to check to see if a specific
895 // attribute can not be inherited.
896 for (const auto &Attr
: oldFunction
->getAttributes().getFnAttrs()) {
897 if (Attr
.isStringAttribute()) {
898 if (Attr
.getKindAsString() == "thunk")
901 switch (Attr
.getKindAsEnum()) {
902 // Those attributes cannot be propagated safely. Explicitly list them
903 // here so we get a warning if new attributes are added.
904 case Attribute::AllocSize
:
905 case Attribute::Builtin
:
906 case Attribute::Convergent
:
907 case Attribute::JumpTable
:
908 case Attribute::Naked
:
909 case Attribute::NoBuiltin
:
910 case Attribute::NoMerge
:
911 case Attribute::NoReturn
:
912 case Attribute::NoSync
:
913 case Attribute::ReturnsTwice
:
914 case Attribute::Speculatable
:
915 case Attribute::StackAlignment
:
916 case Attribute::WillReturn
:
917 case Attribute::AllocKind
:
918 case Attribute::PresplitCoroutine
:
919 case Attribute::Memory
:
920 case Attribute::NoFPClass
:
921 case Attribute::CoroDestroyOnlyWhenComplete
:
922 case Attribute::CoroElideSafe
:
923 case Attribute::NoDivergenceSource
:
925 // Those attributes should be safe to propagate to the extracted function.
926 case Attribute::AlwaysInline
:
927 case Attribute::Cold
:
928 case Attribute::DisableSanitizerInstrumentation
:
929 case Attribute::FnRetThunkExtern
:
931 case Attribute::HybridPatchable
:
932 case Attribute::NoRecurse
:
933 case Attribute::InlineHint
:
934 case Attribute::MinSize
:
935 case Attribute::NoCallback
:
936 case Attribute::NoDuplicate
:
937 case Attribute::NoFree
:
938 case Attribute::NoImplicitFloat
:
939 case Attribute::NoInline
:
940 case Attribute::NonLazyBind
:
941 case Attribute::NoRedZone
:
942 case Attribute::NoUnwind
:
943 case Attribute::NoSanitizeBounds
:
944 case Attribute::NoSanitizeCoverage
:
945 case Attribute::NullPointerIsValid
:
946 case Attribute::OptimizeForDebugging
:
947 case Attribute::OptForFuzzing
:
948 case Attribute::OptimizeNone
:
949 case Attribute::OptimizeForSize
:
950 case Attribute::SafeStack
:
951 case Attribute::ShadowCallStack
:
952 case Attribute::SanitizeAddress
:
953 case Attribute::SanitizeMemory
:
954 case Attribute::SanitizeNumericalStability
:
955 case Attribute::SanitizeThread
:
956 case Attribute::SanitizeType
:
957 case Attribute::SanitizeHWAddress
:
958 case Attribute::SanitizeMemTag
:
959 case Attribute::SanitizeRealtime
:
960 case Attribute::SanitizeRealtimeBlocking
:
961 case Attribute::SpeculativeLoadHardening
:
962 case Attribute::StackProtect
:
963 case Attribute::StackProtectReq
:
964 case Attribute::StackProtectStrong
:
965 case Attribute::StrictFP
:
966 case Attribute::UWTable
:
967 case Attribute::VScaleRange
:
968 case Attribute::NoCfCheck
:
969 case Attribute::MustProgress
:
970 case Attribute::NoProfile
:
971 case Attribute::SkipProfile
:
973 // These attributes cannot be applied to functions.
974 case Attribute::Alignment
:
975 case Attribute::AllocatedPointer
:
976 case Attribute::AllocAlign
:
977 case Attribute::ByVal
:
978 case Attribute::Dereferenceable
:
979 case Attribute::DereferenceableOrNull
:
980 case Attribute::ElementType
:
981 case Attribute::InAlloca
:
982 case Attribute::InReg
:
983 case Attribute::Nest
:
984 case Attribute::NoAlias
:
985 case Attribute::NoCapture
:
986 case Attribute::NoUndef
:
987 case Attribute::NonNull
:
988 case Attribute::Preallocated
:
989 case Attribute::ReadNone
:
990 case Attribute::ReadOnly
:
991 case Attribute::Returned
:
992 case Attribute::SExt
:
993 case Attribute::StructRet
:
994 case Attribute::SwiftError
:
995 case Attribute::SwiftSelf
:
996 case Attribute::SwiftAsync
:
997 case Attribute::ZExt
:
998 case Attribute::ImmArg
:
999 case Attribute::ByRef
:
1000 case Attribute::WriteOnly
:
1001 case Attribute::Writable
:
1002 case Attribute::DeadOnUnwind
:
1003 case Attribute::Range
:
1004 case Attribute::Initializes
:
1005 case Attribute::NoExt
:
1006 // These are not really attributes.
1007 case Attribute::None
:
1008 case Attribute::EndAttrKinds
:
1009 case Attribute::EmptyKey
:
1010 case Attribute::TombstoneKey
:
1011 llvm_unreachable("Not a function attribute");
1014 newFunction
->addFnAttr(Attr
);
1017 // Create scalar and aggregate iterators to name all of the arguments we
1019 Function::arg_iterator ScalarAI
= newFunction
->arg_begin();
1021 // Set names and attributes for input and output arguments.
1022 ScalarAI
= newFunction
->arg_begin();
1023 for (Value
*input
: inputs
) {
1024 if (StructValues
.contains(input
))
1027 ScalarAI
->setName(input
->getName());
1028 if (input
->isSwiftError())
1029 newFunction
->addParamAttr(ScalarAI
- newFunction
->arg_begin(),
1030 Attribute::SwiftError
);
1033 for (Value
*output
: outputs
) {
1034 if (StructValues
.contains(output
))
1037 ScalarAI
->setName(output
->getName() + ".out");
1041 // Update the entry count of the function.
1043 auto Count
= BFI
->getProfileCountFromFreq(EntryFreq
);
1044 if (Count
.has_value())
1045 newFunction
->setEntryCount(
1046 ProfileCount(*Count
, Function::PCT_Real
)); // FIXME
1052 /// If the original function has debug info, we have to add a debug location
1053 /// to the new branch instruction from the artificial entry block.
1054 /// We use the debug location of the first instruction in the extracted
1055 /// blocks, as there is no other equivalent line in the source code.
1056 static void applyFirstDebugLoc(Function
*oldFunction
,
1057 ArrayRef
<BasicBlock
*> Blocks
,
1058 Instruction
*BranchI
) {
1059 if (oldFunction
->getSubprogram()) {
1060 any_of(Blocks
, [&BranchI
](const BasicBlock
*BB
) {
1061 return any_of(*BB
, [&BranchI
](const Instruction
&I
) {
1062 if (!I
.getDebugLoc())
1064 // Don't use source locations attached to debug-intrinsics: they could
1065 // be from completely unrelated scopes.
1066 if (isa
<DbgInfoIntrinsic
>(I
))
1068 BranchI
->setDebugLoc(I
.getDebugLoc());
1075 /// Erase lifetime.start markers which reference inputs to the extraction
1076 /// region, and insert the referenced memory into \p LifetimesStart.
1078 /// The extraction region is defined by a set of blocks (\p Blocks), and a set
1079 /// of allocas which will be moved from the caller function into the extracted
1080 /// function (\p SunkAllocas).
1081 static void eraseLifetimeMarkersOnInputs(const SetVector
<BasicBlock
*> &Blocks
,
1082 const SetVector
<Value
*> &SunkAllocas
,
1083 SetVector
<Value
*> &LifetimesStart
) {
1084 for (BasicBlock
*BB
: Blocks
) {
1085 for (Instruction
&I
: llvm::make_early_inc_range(*BB
)) {
1086 auto *II
= dyn_cast
<IntrinsicInst
>(&I
);
1087 if (!II
|| !II
->isLifetimeStartOrEnd())
1090 // Get the memory operand of the lifetime marker. If the underlying
1091 // object is a sunk alloca, or is otherwise defined in the extraction
1092 // region, the lifetime marker must not be erased.
1093 Value
*Mem
= II
->getOperand(1)->stripInBoundsOffsets();
1094 if (SunkAllocas
.count(Mem
) || definedInRegion(Blocks
, Mem
))
1097 if (II
->getIntrinsicID() == Intrinsic::lifetime_start
)
1098 LifetimesStart
.insert(Mem
);
1099 II
->eraseFromParent();
1104 /// Insert lifetime start/end markers surrounding the call to the new function
1105 /// for objects defined in the caller.
1106 static void insertLifetimeMarkersSurroundingCall(
1107 Module
*M
, ArrayRef
<Value
*> LifetimesStart
, ArrayRef
<Value
*> LifetimesEnd
,
1108 CallInst
*TheCall
) {
1109 LLVMContext
&Ctx
= M
->getContext();
1110 auto NegativeOne
= ConstantInt::getSigned(Type::getInt64Ty(Ctx
), -1);
1111 Instruction
*Term
= TheCall
->getParent()->getTerminator();
1113 // Emit lifetime markers for the pointers given in \p Objects. Insert the
1114 // markers before the call if \p InsertBefore, and after the call otherwise.
1115 auto insertMarkers
= [&](Intrinsic::ID MarkerFunc
, ArrayRef
<Value
*> Objects
,
1116 bool InsertBefore
) {
1117 for (Value
*Mem
: Objects
) {
1118 assert((!isa
<Instruction
>(Mem
) || cast
<Instruction
>(Mem
)->getFunction() ==
1119 TheCall
->getFunction()) &&
1120 "Input memory not defined in original function");
1123 Intrinsic::getOrInsertDeclaration(M
, MarkerFunc
, Mem
->getType());
1124 auto Marker
= CallInst::Create(Func
, {NegativeOne
, Mem
});
1126 Marker
->insertBefore(TheCall
);
1128 Marker
->insertBefore(Term
);
1132 if (!LifetimesStart
.empty()) {
1133 insertMarkers(Intrinsic::lifetime_start
, LifetimesStart
,
1134 /*InsertBefore=*/true);
1137 if (!LifetimesEnd
.empty()) {
1138 insertMarkers(Intrinsic::lifetime_end
, LifetimesEnd
,
1139 /*InsertBefore=*/false);
1143 void CodeExtractor::moveCodeToFunction(Function
*newFunction
) {
1144 auto newFuncIt
= newFunction
->begin();
1145 for (BasicBlock
*Block
: Blocks
) {
1146 // Delete the basic block from the old function, and the list of blocks
1147 Block
->removeFromParent();
1149 // Insert this basic block into the new function
1150 // Insert the original blocks after the entry block created
1151 // for the new function. The entry block may be followed
1152 // by a set of exit blocks at this point, but these exit
1153 // blocks better be placed at the end of the new function.
1154 newFuncIt
= newFunction
->insert(std::next(newFuncIt
), Block
);
1158 void CodeExtractor::calculateNewCallTerminatorWeights(
1159 BasicBlock
*CodeReplacer
,
1160 const DenseMap
<BasicBlock
*, BlockFrequency
> &ExitWeights
,
1161 BranchProbabilityInfo
*BPI
) {
1162 using Distribution
= BlockFrequencyInfoImplBase::Distribution
;
1163 using BlockNode
= BlockFrequencyInfoImplBase::BlockNode
;
1165 // Update the branch weights for the exit block.
1166 Instruction
*TI
= CodeReplacer
->getTerminator();
1167 SmallVector
<unsigned, 8> BranchWeights(TI
->getNumSuccessors(), 0);
1169 // Block Frequency distribution with dummy node.
1170 Distribution BranchDist
;
1172 SmallVector
<BranchProbability
, 4> EdgeProbabilities(
1173 TI
->getNumSuccessors(), BranchProbability::getUnknown());
1175 // Add each of the frequencies of the successors.
1176 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
< e
; ++i
) {
1177 BlockNode
ExitNode(i
);
1178 uint64_t ExitFreq
= ExitWeights
.lookup(TI
->getSuccessor(i
)).getFrequency();
1180 BranchDist
.addExit(ExitNode
, ExitFreq
);
1182 EdgeProbabilities
[i
] = BranchProbability::getZero();
1185 // Check for no total weight.
1186 if (BranchDist
.Total
== 0) {
1187 BPI
->setEdgeProbability(CodeReplacer
, EdgeProbabilities
);
1191 // Normalize the distribution so that they can fit in unsigned.
1192 BranchDist
.normalize();
1194 // Create normalized branch weights and set the metadata.
1195 for (unsigned I
= 0, E
= BranchDist
.Weights
.size(); I
< E
; ++I
) {
1196 const auto &Weight
= BranchDist
.Weights
[I
];
1198 // Get the weight and update the current BFI.
1199 BranchWeights
[Weight
.TargetNode
.Index
] = Weight
.Amount
;
1200 BranchProbability
BP(Weight
.Amount
, BranchDist
.Total
);
1201 EdgeProbabilities
[Weight
.TargetNode
.Index
] = BP
;
1203 BPI
->setEdgeProbability(CodeReplacer
, EdgeProbabilities
);
1205 LLVMContext::MD_prof
,
1206 MDBuilder(TI
->getContext()).createBranchWeights(BranchWeights
));
1209 /// Erase debug info intrinsics which refer to values in \p F but aren't in
1211 static void eraseDebugIntrinsicsWithNonLocalRefs(Function
&F
) {
1212 for (Instruction
&I
: instructions(F
)) {
1213 SmallVector
<DbgVariableIntrinsic
*, 4> DbgUsers
;
1214 SmallVector
<DbgVariableRecord
*, 4> DbgVariableRecords
;
1215 findDbgUsers(DbgUsers
, &I
, &DbgVariableRecords
);
1216 for (DbgVariableIntrinsic
*DVI
: DbgUsers
)
1217 if (DVI
->getFunction() != &F
)
1218 DVI
->eraseFromParent();
1219 for (DbgVariableRecord
*DVR
: DbgVariableRecords
)
1220 if (DVR
->getFunction() != &F
)
1221 DVR
->eraseFromParent();
1225 /// Fix up the debug info in the old and new functions by pointing line
1226 /// locations and debug intrinsics to the new subprogram scope, and by deleting
1227 /// intrinsics which point to values outside of the new function.
1228 static void fixupDebugInfoPostExtraction(Function
&OldFunc
, Function
&NewFunc
,
1229 CallInst
&TheCall
) {
1230 DISubprogram
*OldSP
= OldFunc
.getSubprogram();
1231 LLVMContext
&Ctx
= OldFunc
.getContext();
1234 // Erase any debug info the new function contains.
1235 stripDebugInfo(NewFunc
);
1236 // Make sure the old function doesn't contain any non-local metadata refs.
1237 eraseDebugIntrinsicsWithNonLocalRefs(NewFunc
);
1241 // Create a subprogram for the new function. Leave out a description of the
1242 // function arguments, as the parameters don't correspond to anything at the
1244 assert(OldSP
->getUnit() && "Missing compile unit for subprogram");
1245 DIBuilder
DIB(*OldFunc
.getParent(), /*AllowUnresolved=*/false,
1247 auto SPType
= DIB
.createSubroutineType(DIB
.getOrCreateTypeArray({}));
1248 DISubprogram::DISPFlags SPFlags
= DISubprogram::SPFlagDefinition
|
1249 DISubprogram::SPFlagOptimized
|
1250 DISubprogram::SPFlagLocalToUnit
;
1251 auto NewSP
= DIB
.createFunction(
1252 OldSP
->getUnit(), NewFunc
.getName(), NewFunc
.getName(), OldSP
->getFile(),
1253 /*LineNo=*/0, SPType
, /*ScopeLine=*/0, DINode::FlagZero
, SPFlags
);
1254 NewFunc
.setSubprogram(NewSP
);
1256 auto IsInvalidLocation
= [&NewFunc
](Value
*Location
) {
1257 // Location is invalid if it isn't a constant or an instruction, or is an
1258 // instruction but isn't in the new function.
1260 (!isa
<Constant
>(Location
) && !isa
<Instruction
>(Location
)))
1262 Instruction
*LocationInst
= dyn_cast
<Instruction
>(Location
);
1263 return LocationInst
&& LocationInst
->getFunction() != &NewFunc
;
1266 // Debug intrinsics in the new function need to be updated in one of two
1268 // 1) They need to be deleted, because they describe a value in the old
1270 // 2) They need to point to fresh metadata, e.g. because they currently
1271 // point to a variable in the wrong scope.
1272 SmallDenseMap
<DINode
*, DINode
*> RemappedMetadata
;
1273 SmallVector
<Instruction
*, 4> DebugIntrinsicsToDelete
;
1274 SmallVector
<DbgVariableRecord
*, 4> DVRsToDelete
;
1275 DenseMap
<const MDNode
*, MDNode
*> Cache
;
1277 auto GetUpdatedDIVariable
= [&](DILocalVariable
*OldVar
) {
1278 DINode
*&NewVar
= RemappedMetadata
[OldVar
];
1280 DILocalScope
*NewScope
= DILocalScope::cloneScopeForSubprogram(
1281 *OldVar
->getScope(), *NewSP
, Ctx
, Cache
);
1282 NewVar
= DIB
.createAutoVariable(
1283 NewScope
, OldVar
->getName(), OldVar
->getFile(), OldVar
->getLine(),
1284 OldVar
->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero
,
1285 OldVar
->getAlignInBits());
1287 return cast
<DILocalVariable
>(NewVar
);
1290 auto UpdateDbgLabel
= [&](auto *LabelRecord
) {
1291 // Point the label record to a fresh label within the new function if
1292 // the record was not inlined from some other function.
1293 if (LabelRecord
->getDebugLoc().getInlinedAt())
1295 DILabel
*OldLabel
= LabelRecord
->getLabel();
1296 DINode
*&NewLabel
= RemappedMetadata
[OldLabel
];
1298 DILocalScope
*NewScope
= DILocalScope::cloneScopeForSubprogram(
1299 *OldLabel
->getScope(), *NewSP
, Ctx
, Cache
);
1300 NewLabel
= DILabel::get(Ctx
, NewScope
, OldLabel
->getName(),
1301 OldLabel
->getFile(), OldLabel
->getLine());
1303 LabelRecord
->setLabel(cast
<DILabel
>(NewLabel
));
1306 auto UpdateDbgRecordsOnInst
= [&](Instruction
&I
) -> void {
1307 for (DbgRecord
&DR
: I
.getDbgRecordRange()) {
1308 if (DbgLabelRecord
*DLR
= dyn_cast
<DbgLabelRecord
>(&DR
)) {
1309 UpdateDbgLabel(DLR
);
1313 DbgVariableRecord
&DVR
= cast
<DbgVariableRecord
>(DR
);
1314 // Apply the two updates that dbg.values get: invalid operands, and
1315 // variable metadata fixup.
1316 if (any_of(DVR
.location_ops(), IsInvalidLocation
)) {
1317 DVRsToDelete
.push_back(&DVR
);
1320 if (DVR
.isDbgAssign() && IsInvalidLocation(DVR
.getAddress())) {
1321 DVRsToDelete
.push_back(&DVR
);
1324 if (!DVR
.getDebugLoc().getInlinedAt())
1325 DVR
.setVariable(GetUpdatedDIVariable(DVR
.getVariable()));
1329 for (Instruction
&I
: instructions(NewFunc
)) {
1330 UpdateDbgRecordsOnInst(I
);
1332 auto *DII
= dyn_cast
<DbgInfoIntrinsic
>(&I
);
1336 // Point the intrinsic to a fresh label within the new function if the
1337 // intrinsic was not inlined from some other function.
1338 if (auto *DLI
= dyn_cast
<DbgLabelInst
>(&I
)) {
1339 UpdateDbgLabel(DLI
);
1343 auto *DVI
= cast
<DbgVariableIntrinsic
>(DII
);
1344 // If any of the used locations are invalid, delete the intrinsic.
1345 if (any_of(DVI
->location_ops(), IsInvalidLocation
)) {
1346 DebugIntrinsicsToDelete
.push_back(DVI
);
1349 // DbgAssign intrinsics have an extra Value argument:
1350 if (auto *DAI
= dyn_cast
<DbgAssignIntrinsic
>(DVI
);
1351 DAI
&& IsInvalidLocation(DAI
->getAddress())) {
1352 DebugIntrinsicsToDelete
.push_back(DVI
);
1355 // If the variable was in the scope of the old function, i.e. it was not
1356 // inlined, point the intrinsic to a fresh variable within the new function.
1357 if (!DVI
->getDebugLoc().getInlinedAt())
1358 DVI
->setVariable(GetUpdatedDIVariable(DVI
->getVariable()));
1361 for (auto *DII
: DebugIntrinsicsToDelete
)
1362 DII
->eraseFromParent();
1363 for (auto *DVR
: DVRsToDelete
)
1364 DVR
->getMarker()->MarkedInstr
->dropOneDbgRecord(DVR
);
1365 DIB
.finalizeSubprogram(NewSP
);
1367 // Fix up the scope information attached to the line locations and the
1368 // debug assignment metadata in the new function.
1369 DenseMap
<DIAssignID
*, DIAssignID
*> AssignmentIDMap
;
1370 for (Instruction
&I
: instructions(NewFunc
)) {
1371 if (const DebugLoc
&DL
= I
.getDebugLoc())
1373 DebugLoc::replaceInlinedAtSubprogram(DL
, *NewSP
, Ctx
, Cache
));
1374 for (DbgRecord
&DR
: I
.getDbgRecordRange())
1375 DR
.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DR
.getDebugLoc(),
1376 *NewSP
, Ctx
, Cache
));
1378 // Loop info metadata may contain line locations. Fix them up.
1379 auto updateLoopInfoLoc
= [&Ctx
, &Cache
, NewSP
](Metadata
*MD
) -> Metadata
* {
1380 if (auto *Loc
= dyn_cast_or_null
<DILocation
>(MD
))
1381 return DebugLoc::replaceInlinedAtSubprogram(Loc
, *NewSP
, Ctx
, Cache
);
1384 updateLoopMetadataDebugLocations(I
, updateLoopInfoLoc
);
1385 at::remapAssignID(AssignmentIDMap
, I
);
1387 if (!TheCall
.getDebugLoc())
1388 TheCall
.setDebugLoc(DILocation::get(Ctx
, 0, 0, OldSP
));
1390 eraseDebugIntrinsicsWithNonLocalRefs(NewFunc
);
1394 CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache
&CEAC
) {
1395 ValueSet Inputs
, Outputs
;
1396 return extractCodeRegion(CEAC
, Inputs
, Outputs
);
1400 CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache
&CEAC
,
1401 ValueSet
&inputs
, ValueSet
&outputs
) {
1405 // Assumption: this is a single-entry code region, and the header is the first
1406 // block in the region.
1407 BasicBlock
*header
= *Blocks
.begin();
1408 Function
*oldFunction
= header
->getParent();
1410 normalizeCFGForExtraction(header
);
1412 // Remove @llvm.assume calls that will be moved to the new function from the
1413 // old function's assumption cache.
1414 for (BasicBlock
*Block
: Blocks
) {
1415 for (Instruction
&I
: llvm::make_early_inc_range(*Block
)) {
1416 if (auto *AI
= dyn_cast
<AssumeInst
>(&I
)) {
1418 AC
->unregisterAssumption(AI
);
1419 AI
->eraseFromParent();
1424 ValueSet SinkingCands
, HoistingCands
;
1425 BasicBlock
*CommonExit
= nullptr;
1426 findAllocas(CEAC
, SinkingCands
, HoistingCands
, CommonExit
);
1427 assert(HoistingCands
.empty() || CommonExit
);
1429 // Find inputs to, outputs from the code region.
1430 findInputsOutputs(inputs
, outputs
, SinkingCands
);
1432 // Collect objects which are inputs to the extraction region and also
1433 // referenced by lifetime start markers within it. The effects of these
1434 // markers must be replicated in the calling function to prevent the stack
1435 // coloring pass from merging slots which store input objects.
1436 ValueSet LifetimesStart
;
1437 eraseLifetimeMarkersOnInputs(Blocks
, SinkingCands
, LifetimesStart
);
1439 if (!HoistingCands
.empty()) {
1440 auto *HoistToBlock
= findOrCreateBlockForHoisting(CommonExit
);
1441 Instruction
*TI
= HoistToBlock
->getTerminator();
1442 for (auto *II
: HoistingCands
)
1443 cast
<Instruction
>(II
)->moveBefore(TI
);
1444 computeExtractedFuncRetVals();
1447 // CFG/ExitBlocks must not change hereafter
1449 // Calculate the entry frequency of the new function before we change the root
1451 BlockFrequency EntryFreq
;
1452 DenseMap
<BasicBlock
*, BlockFrequency
> ExitWeights
;
1454 assert(BPI
&& "Both BPI and BFI are required to preserve profile info");
1455 for (BasicBlock
*Pred
: predecessors(header
)) {
1456 if (Blocks
.count(Pred
))
1459 BFI
->getBlockFreq(Pred
) * BPI
->getEdgeProbability(Pred
, header
);
1462 for (BasicBlock
*Succ
: ExtractedFuncRetVals
) {
1463 for (BasicBlock
*Block
: predecessors(Succ
)) {
1464 if (!Blocks
.count(Block
))
1467 // Update the branch weight for this successor.
1468 BlockFrequency
&BF
= ExitWeights
[Succ
];
1469 BF
+= BFI
->getBlockFreq(Block
) * BPI
->getEdgeProbability(Block
, Succ
);
1474 // Determine position for the replacement code. Do so before header is moved
1475 // to the new function.
1476 BasicBlock
*ReplIP
= header
;
1477 while (ReplIP
&& Blocks
.count(ReplIP
))
1478 ReplIP
= ReplIP
->getNextNode();
1480 // Construct new function based on inputs/outputs & add allocas for all defs.
1481 std::string SuffixToUse
=
1483 ? (header
->getName().empty() ? "extracted" : header
->getName().str())
1486 ValueSet StructValues
;
1487 StructType
*StructTy
= nullptr;
1488 Function
*newFunction
= constructFunctionDeclaration(
1489 inputs
, outputs
, EntryFreq
, oldFunction
->getName() + "." + SuffixToUse
,
1490 StructValues
, StructTy
);
1491 newFunction
->IsNewDbgInfoFormat
= oldFunction
->IsNewDbgInfoFormat
;
1493 emitFunctionBody(inputs
, outputs
, StructValues
, newFunction
, StructTy
, header
,
1496 std::vector
<Value
*> Reloads
;
1497 CallInst
*TheCall
= emitReplacerCall(
1498 inputs
, outputs
, StructValues
, newFunction
, StructTy
, oldFunction
, ReplIP
,
1499 EntryFreq
, LifetimesStart
.getArrayRef(), Reloads
);
1501 insertReplacerCall(oldFunction
, header
, TheCall
->getParent(), outputs
,
1502 Reloads
, ExitWeights
);
1504 fixupDebugInfoPostExtraction(*oldFunction
, *newFunction
, *TheCall
);
1506 LLVM_DEBUG(if (verifyFunction(*newFunction
, &errs())) {
1507 newFunction
->dump();
1508 report_fatal_error("verification of newFunction failed!");
1510 LLVM_DEBUG(if (verifyFunction(*oldFunction
))
1511 report_fatal_error("verification of oldFunction failed!"));
1512 LLVM_DEBUG(if (AC
&& verifyAssumptionCache(*oldFunction
, *newFunction
, AC
))
1513 report_fatal_error("Stale Asumption cache for old Function!"));
1517 void CodeExtractor::normalizeCFGForExtraction(BasicBlock
*&header
) {
1518 // If we have any return instructions in the region, split those blocks so
1519 // that the return is not in the region.
1520 splitReturnBlocks();
1522 // If we have to split PHI nodes of the entry or exit blocks, do so now.
1523 severSplitPHINodesOfEntry(header
);
1525 // If a PHI in an exit block has multiple incoming values from the outlined
1526 // region, create a new PHI for those values within the region such that only
1527 // PHI itself becomes an output value, not each of its incoming values
1529 computeExtractedFuncRetVals();
1530 severSplitPHINodesOfExits();
1533 void CodeExtractor::computeExtractedFuncRetVals() {
1534 ExtractedFuncRetVals
.clear();
1536 SmallPtrSet
<BasicBlock
*, 2> ExitBlocks
;
1537 for (BasicBlock
*Block
: Blocks
) {
1538 for (BasicBlock
*Succ
: successors(Block
)) {
1539 if (Blocks
.count(Succ
))
1542 bool IsNew
= ExitBlocks
.insert(Succ
).second
;
1544 ExtractedFuncRetVals
.push_back(Succ
);
1549 Type
*CodeExtractor::getSwitchType() {
1550 LLVMContext
&Context
= Blocks
.front()->getContext();
1552 assert(ExtractedFuncRetVals
.size() < 0xffff &&
1553 "too many exit blocks for switch");
1554 switch (ExtractedFuncRetVals
.size()) {
1557 return Type::getVoidTy(Context
);
1559 // Conditional branch, return a bool
1560 return Type::getInt1Ty(Context
);
1562 return Type::getInt16Ty(Context
);
1566 void CodeExtractor::emitFunctionBody(
1567 const ValueSet
&inputs
, const ValueSet
&outputs
,
1568 const ValueSet
&StructValues
, Function
*newFunction
,
1569 StructType
*StructArgTy
, BasicBlock
*header
, const ValueSet
&SinkingCands
) {
1570 Function
*oldFunction
= header
->getParent();
1571 LLVMContext
&Context
= oldFunction
->getContext();
1573 // The new function needs a root node because other nodes can branch to the
1574 // head of the region, but the entry node of a function cannot have preds.
1575 BasicBlock
*newFuncRoot
=
1576 BasicBlock::Create(Context
, "newFuncRoot", newFunction
);
1577 newFuncRoot
->IsNewDbgInfoFormat
= oldFunction
->IsNewDbgInfoFormat
;
1579 // Now sink all instructions which only have non-phi uses inside the region.
1580 // Group the allocas at the start of the block, so that any bitcast uses of
1581 // the allocas are well-defined.
1582 for (auto *II
: SinkingCands
) {
1583 if (!isa
<AllocaInst
>(II
)) {
1584 cast
<Instruction
>(II
)->moveBefore(*newFuncRoot
,
1585 newFuncRoot
->getFirstInsertionPt());
1588 for (auto *II
: SinkingCands
) {
1589 if (auto *AI
= dyn_cast
<AllocaInst
>(II
)) {
1590 AI
->moveBefore(*newFuncRoot
, newFuncRoot
->getFirstInsertionPt());
1594 Function::arg_iterator ScalarAI
= newFunction
->arg_begin();
1595 Argument
*AggArg
= StructValues
.empty()
1597 : newFunction
->getArg(newFunction
->arg_size() - 1);
1599 // Rewrite all users of the inputs in the extracted region to use the
1600 // arguments (or appropriate addressing into struct) instead.
1601 SmallVector
<Value
*> NewValues
;
1602 for (unsigned i
= 0, e
= inputs
.size(), aggIdx
= 0; i
!= e
; ++i
) {
1604 if (StructValues
.contains(inputs
[i
])) {
1606 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(header
->getContext()));
1607 Idx
[1] = ConstantInt::get(Type::getInt32Ty(header
->getContext()), aggIdx
);
1608 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1609 StructArgTy
, AggArg
, Idx
, "gep_" + inputs
[i
]->getName(), newFuncRoot
);
1610 RewriteVal
= new LoadInst(StructArgTy
->getElementType(aggIdx
), GEP
,
1611 "loadgep_" + inputs
[i
]->getName(), newFuncRoot
);
1614 RewriteVal
= &*ScalarAI
++;
1616 NewValues
.push_back(RewriteVal
);
1619 moveCodeToFunction(newFunction
);
1621 for (unsigned i
= 0, e
= inputs
.size(); i
!= e
; ++i
) {
1622 Value
*RewriteVal
= NewValues
[i
];
1624 std::vector
<User
*> Users(inputs
[i
]->user_begin(), inputs
[i
]->user_end());
1625 for (User
*use
: Users
)
1626 if (Instruction
*inst
= dyn_cast
<Instruction
>(use
))
1627 if (Blocks
.count(inst
->getParent()))
1628 inst
->replaceUsesOfWith(inputs
[i
], RewriteVal
);
1631 // Since there may be multiple exits from the original region, make the new
1632 // function return an unsigned, switch on that number. This loop iterates
1633 // over all of the blocks in the extracted region, updating any terminator
1634 // instructions in the to-be-extracted region that branch to blocks that are
1635 // not in the region to be extracted.
1636 std::map
<BasicBlock
*, BasicBlock
*> ExitBlockMap
;
1638 // Iterate over the previously collected targets, and create new blocks inside
1639 // the function to branch to.
1640 for (auto P
: enumerate(ExtractedFuncRetVals
)) {
1641 BasicBlock
*OldTarget
= P
.value();
1642 size_t SuccNum
= P
.index();
1644 BasicBlock
*NewTarget
= BasicBlock::Create(
1645 Context
, OldTarget
->getName() + ".exitStub", newFunction
);
1646 ExitBlockMap
[OldTarget
] = NewTarget
;
1648 Value
*brVal
= nullptr;
1649 Type
*RetTy
= getSwitchType();
1650 assert(ExtractedFuncRetVals
.size() < 0xffff &&
1651 "too many exit blocks for switch");
1652 switch (ExtractedFuncRetVals
.size()) {
1657 case 2: // Conditional branch, return a bool
1658 brVal
= ConstantInt::get(RetTy
, !SuccNum
);
1661 brVal
= ConstantInt::get(RetTy
, SuccNum
);
1665 ReturnInst::Create(Context
, brVal
, NewTarget
);
1668 for (BasicBlock
*Block
: Blocks
) {
1669 Instruction
*TI
= Block
->getTerminator();
1670 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
!= e
; ++i
) {
1671 if (Blocks
.count(TI
->getSuccessor(i
)))
1673 BasicBlock
*OldTarget
= TI
->getSuccessor(i
);
1674 // add a new basic block which returns the appropriate value
1675 BasicBlock
*NewTarget
= ExitBlockMap
[OldTarget
];
1676 assert(NewTarget
&& "Unknown target block!");
1678 // rewrite the original branch instruction with this new target
1679 TI
->setSuccessor(i
, NewTarget
);
1683 // Loop over all of the PHI nodes in the header and exit blocks, and change
1684 // any references to the old incoming edge to be the new incoming edge.
1685 for (BasicBlock::iterator I
= header
->begin(); isa
<PHINode
>(I
); ++I
) {
1686 PHINode
*PN
= cast
<PHINode
>(I
);
1687 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1688 if (!Blocks
.count(PN
->getIncomingBlock(i
)))
1689 PN
->setIncomingBlock(i
, newFuncRoot
);
1692 // Connect newFunction entry block to new header.
1693 BranchInst
*BranchI
= BranchInst::Create(header
, newFuncRoot
);
1694 applyFirstDebugLoc(oldFunction
, Blocks
.getArrayRef(), BranchI
);
1696 // Store the arguments right after the definition of output value.
1697 // This should be proceeded after creating exit stubs to be ensure that invoke
1698 // result restore will be placed in the outlined function.
1699 ScalarAI
= newFunction
->arg_begin();
1700 unsigned AggIdx
= 0;
1702 for (Value
*Input
: inputs
) {
1703 if (StructValues
.contains(Input
))
1709 for (Value
*Output
: outputs
) {
1710 // Find proper insertion point.
1711 // In case Output is an invoke, we insert the store at the beginning in the
1712 // 'normal destination' BB. Otherwise we insert the store right after
1714 BasicBlock::iterator InsertPt
;
1715 if (auto *InvokeI
= dyn_cast
<InvokeInst
>(Output
))
1716 InsertPt
= InvokeI
->getNormalDest()->getFirstInsertionPt();
1717 else if (auto *Phi
= dyn_cast
<PHINode
>(Output
))
1718 InsertPt
= Phi
->getParent()->getFirstInsertionPt();
1719 else if (auto *OutI
= dyn_cast
<Instruction
>(Output
))
1720 InsertPt
= std::next(OutI
->getIterator());
1722 // Globals don't need to be updated, just advance to the next argument.
1723 if (StructValues
.contains(Output
))
1730 assert((InsertPt
->getFunction() == newFunction
||
1731 Blocks
.count(InsertPt
->getParent())) &&
1732 "InsertPt should be in new function");
1734 if (StructValues
.contains(Output
)) {
1735 assert(AggArg
&& "Number of aggregate output arguments should match "
1736 "the number of defined values");
1738 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1739 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), AggIdx
);
1740 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1741 StructArgTy
, AggArg
, Idx
, "gep_" + Output
->getName(), InsertPt
);
1742 new StoreInst(Output
, GEP
, InsertPt
);
1745 assert(ScalarAI
!= newFunction
->arg_end() &&
1746 "Number of scalar output arguments should match "
1747 "the number of defined values");
1748 new StoreInst(Output
, &*ScalarAI
, InsertPt
);
1753 if (ExtractedFuncRetVals
.empty()) {
1754 // Mark the new function `noreturn` if applicable. Terminators which resume
1755 // exception propagation are treated as returning instructions. This is to
1756 // avoid inserting traps after calls to outlined functions which unwind.
1757 if (none_of(Blocks
, [](const BasicBlock
*BB
) {
1758 const Instruction
*Term
= BB
->getTerminator();
1759 return isa
<ReturnInst
>(Term
) || isa
<ResumeInst
>(Term
);
1761 newFunction
->setDoesNotReturn();
1765 CallInst
*CodeExtractor::emitReplacerCall(
1766 const ValueSet
&inputs
, const ValueSet
&outputs
,
1767 const ValueSet
&StructValues
, Function
*newFunction
,
1768 StructType
*StructArgTy
, Function
*oldFunction
, BasicBlock
*ReplIP
,
1769 BlockFrequency EntryFreq
, ArrayRef
<Value
*> LifetimesStart
,
1770 std::vector
<Value
*> &Reloads
) {
1771 LLVMContext
&Context
= oldFunction
->getContext();
1772 Module
*M
= oldFunction
->getParent();
1773 const DataLayout
&DL
= M
->getDataLayout();
1775 // This takes place of the original loop
1776 BasicBlock
*codeReplacer
=
1777 BasicBlock::Create(Context
, "codeRepl", oldFunction
, ReplIP
);
1778 codeReplacer
->IsNewDbgInfoFormat
= oldFunction
->IsNewDbgInfoFormat
;
1779 BasicBlock
*AllocaBlock
=
1780 AllocationBlock
? AllocationBlock
: &oldFunction
->getEntryBlock();
1781 AllocaBlock
->IsNewDbgInfoFormat
= oldFunction
->IsNewDbgInfoFormat
;
1783 // Update the entry count of the function.
1785 BFI
->setBlockFreq(codeReplacer
, EntryFreq
);
1787 std::vector
<Value
*> params
;
1789 // Add inputs as params, or to be filled into the struct
1790 for (Value
*input
: inputs
) {
1791 if (StructValues
.contains(input
))
1794 params
.push_back(input
);
1797 // Create allocas for the outputs
1798 std::vector
<Value
*> ReloadOutputs
;
1799 for (Value
*output
: outputs
) {
1800 if (StructValues
.contains(output
))
1803 AllocaInst
*alloca
= new AllocaInst(
1804 output
->getType(), DL
.getAllocaAddrSpace(), nullptr,
1805 output
->getName() + ".loc", AllocaBlock
->getFirstInsertionPt());
1806 params
.push_back(alloca
);
1807 ReloadOutputs
.push_back(alloca
);
1810 AllocaInst
*Struct
= nullptr;
1811 if (!StructValues
.empty()) {
1812 Struct
= new AllocaInst(StructArgTy
, DL
.getAllocaAddrSpace(), nullptr,
1813 "structArg", AllocaBlock
->getFirstInsertionPt());
1814 if (ArgsInZeroAddressSpace
&& DL
.getAllocaAddrSpace() != 0) {
1815 auto *StructSpaceCast
= new AddrSpaceCastInst(
1816 Struct
, PointerType ::get(Context
, 0), "structArg.ascast");
1817 StructSpaceCast
->insertAfter(Struct
);
1818 params
.push_back(StructSpaceCast
);
1820 params
.push_back(Struct
);
1823 unsigned AggIdx
= 0;
1824 for (Value
*input
: inputs
) {
1825 if (!StructValues
.contains(input
))
1829 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1830 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), AggIdx
);
1831 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1832 StructArgTy
, Struct
, Idx
, "gep_" + input
->getName());
1833 GEP
->insertInto(codeReplacer
, codeReplacer
->end());
1834 new StoreInst(input
, GEP
, codeReplacer
);
1840 // Emit the call to the function
1841 CallInst
*call
= CallInst::Create(
1842 newFunction
, params
, ExtractedFuncRetVals
.size() > 1 ? "targetBlock" : "",
1845 // Set swifterror parameter attributes.
1846 unsigned ParamIdx
= 0;
1847 unsigned AggIdx
= 0;
1848 for (auto input
: inputs
) {
1849 if (StructValues
.contains(input
)) {
1852 if (input
->isSwiftError())
1853 call
->addParamAttr(ParamIdx
, Attribute::SwiftError
);
1858 // Add debug location to the new call, if the original function has debug
1859 // info. In that case, the terminator of the entry block of the extracted
1860 // function contains the first debug location of the extracted function,
1861 // set in extractCodeRegion.
1862 if (codeReplacer
->getParent()->getSubprogram()) {
1863 if (auto DL
= newFunction
->getEntryBlock().getTerminator()->getDebugLoc())
1864 call
->setDebugLoc(DL
);
1867 // Reload the outputs passed in by reference, use the struct if output is in
1868 // the aggregate or reload from the scalar argument.
1869 for (unsigned i
= 0, e
= outputs
.size(), scalarIdx
= 0; i
!= e
; ++i
) {
1870 Value
*Output
= nullptr;
1871 if (StructValues
.contains(outputs
[i
])) {
1873 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1874 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), AggIdx
);
1875 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1876 StructArgTy
, Struct
, Idx
, "gep_reload_" + outputs
[i
]->getName());
1877 GEP
->insertInto(codeReplacer
, codeReplacer
->end());
1881 Output
= ReloadOutputs
[scalarIdx
];
1885 new LoadInst(outputs
[i
]->getType(), Output
,
1886 outputs
[i
]->getName() + ".reload", codeReplacer
);
1887 Reloads
.push_back(load
);
1890 // Now we can emit a switch statement using the call as a value.
1891 SwitchInst
*TheSwitch
=
1892 SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context
)),
1893 codeReplacer
, 0, codeReplacer
);
1894 for (auto P
: enumerate(ExtractedFuncRetVals
)) {
1895 BasicBlock
*OldTarget
= P
.value();
1896 size_t SuccNum
= P
.index();
1898 TheSwitch
->addCase(ConstantInt::get(Type::getInt16Ty(Context
), SuccNum
),
1902 // Now that we've done the deed, simplify the switch instruction.
1903 Type
*OldFnRetTy
= TheSwitch
->getParent()->getParent()->getReturnType();
1904 switch (ExtractedFuncRetVals
.size()) {
1906 // There are no successors (the block containing the switch itself), which
1907 // means that previously this was the last part of the function, and hence
1908 // this should be rewritten as a `ret` or `unreachable`.
1909 if (newFunction
->doesNotReturn()) {
1910 // If fn is no return, end with an unreachable terminator.
1911 (void)new UnreachableInst(Context
, TheSwitch
->getIterator());
1912 } else if (OldFnRetTy
->isVoidTy()) {
1913 // We have no return value.
1914 ReturnInst::Create(Context
, nullptr,
1915 TheSwitch
->getIterator()); // Return void
1916 } else if (OldFnRetTy
== TheSwitch
->getCondition()->getType()) {
1917 // return what we have
1918 ReturnInst::Create(Context
, TheSwitch
->getCondition(),
1919 TheSwitch
->getIterator());
1921 // Otherwise we must have code extracted an unwind or something, just
1922 // return whatever we want.
1923 ReturnInst::Create(Context
, Constant::getNullValue(OldFnRetTy
),
1924 TheSwitch
->getIterator());
1927 TheSwitch
->eraseFromParent();
1930 // Only a single destination, change the switch into an unconditional
1932 BranchInst::Create(TheSwitch
->getSuccessor(1), TheSwitch
->getIterator());
1933 TheSwitch
->eraseFromParent();
1936 // Only two destinations, convert to a condition branch.
1937 // Remark: This also swaps the target branches:
1938 // 0 -> false -> getSuccessor(2); 1 -> true -> getSuccessor(1)
1939 BranchInst::Create(TheSwitch
->getSuccessor(1), TheSwitch
->getSuccessor(2),
1940 call
, TheSwitch
->getIterator());
1941 TheSwitch
->eraseFromParent();
1944 // Otherwise, make the default destination of the switch instruction be one
1945 // of the other successors.
1946 TheSwitch
->setCondition(call
);
1947 TheSwitch
->setDefaultDest(
1948 TheSwitch
->getSuccessor(ExtractedFuncRetVals
.size()));
1949 // Remove redundant case
1950 TheSwitch
->removeCase(
1951 SwitchInst::CaseIt(TheSwitch
, ExtractedFuncRetVals
.size() - 1));
1955 // Insert lifetime markers around the reloads of any output values. The
1956 // allocas output values are stored in are only in-use in the codeRepl block.
1957 insertLifetimeMarkersSurroundingCall(M
, ReloadOutputs
, ReloadOutputs
, call
);
1959 // Replicate the effects of any lifetime start/end markers which referenced
1960 // input objects in the extraction region by placing markers around the call.
1961 insertLifetimeMarkersSurroundingCall(oldFunction
->getParent(), LifetimesStart
,
1967 void CodeExtractor::insertReplacerCall(
1968 Function
*oldFunction
, BasicBlock
*header
, BasicBlock
*codeReplacer
,
1969 const ValueSet
&outputs
, ArrayRef
<Value
*> Reloads
,
1970 const DenseMap
<BasicBlock
*, BlockFrequency
> &ExitWeights
) {
1972 // Rewrite branches to basic blocks outside of the loop to new dummy blocks
1973 // within the new function. This must be done before we lose track of which
1974 // blocks were originally in the code region.
1975 std::vector
<User
*> Users(header
->user_begin(), header
->user_end());
1976 for (auto &U
: Users
)
1977 // The BasicBlock which contains the branch is not in the region
1978 // modify the branch target to a new block
1979 if (Instruction
*I
= dyn_cast
<Instruction
>(U
))
1980 if (I
->isTerminator() && I
->getFunction() == oldFunction
&&
1981 !Blocks
.count(I
->getParent()))
1982 I
->replaceUsesOfWith(header
, codeReplacer
);
1984 // When moving the code region it is sufficient to replace all uses to the
1985 // extracted function values. Since the original definition's block
1986 // dominated its use, it will also be dominated by codeReplacer's switch
1987 // which joined multiple exit blocks.
1988 for (BasicBlock
*ExitBB
: ExtractedFuncRetVals
)
1989 for (PHINode
&PN
: ExitBB
->phis()) {
1990 Value
*IncomingCodeReplacerVal
= nullptr;
1991 for (unsigned i
= 0, e
= PN
.getNumIncomingValues(); i
!= e
; ++i
) {
1992 // Ignore incoming values from outside of the extracted region.
1993 if (!Blocks
.count(PN
.getIncomingBlock(i
)))
1996 // Ensure that there is only one incoming value from codeReplacer.
1997 if (!IncomingCodeReplacerVal
) {
1998 PN
.setIncomingBlock(i
, codeReplacer
);
1999 IncomingCodeReplacerVal
= PN
.getIncomingValue(i
);
2001 assert(IncomingCodeReplacerVal
== PN
.getIncomingValue(i
) &&
2002 "PHI has two incompatbile incoming values from codeRepl");
2006 for (unsigned i
= 0, e
= outputs
.size(); i
!= e
; ++i
) {
2007 Value
*load
= Reloads
[i
];
2008 std::vector
<User
*> Users(outputs
[i
]->user_begin(), outputs
[i
]->user_end());
2009 for (User
*U
: Users
) {
2010 Instruction
*inst
= cast
<Instruction
>(U
);
2011 if (inst
->getParent()->getParent() == oldFunction
)
2012 inst
->replaceUsesOfWith(outputs
[i
], load
);
2016 // Update the branch weights for the exit block.
2017 if (BFI
&& ExtractedFuncRetVals
.size() > 1)
2018 calculateNewCallTerminatorWeights(codeReplacer
, ExitWeights
, BPI
);
2021 bool CodeExtractor::verifyAssumptionCache(const Function
&OldFunc
,
2022 const Function
&NewFunc
,
2023 AssumptionCache
*AC
) {
2024 for (auto AssumeVH
: AC
->assumptions()) {
2025 auto *I
= dyn_cast_or_null
<CallInst
>(AssumeVH
);
2029 // There shouldn't be any llvm.assume intrinsics in the new function.
2030 if (I
->getFunction() != &OldFunc
)
2033 // There shouldn't be any stale affected values in the assumption cache
2034 // that were previously in the old function, but that have now been moved
2035 // to the new function.
2036 for (auto AffectedValVH
: AC
->assumptionsFor(I
->getOperand(0))) {
2037 auto *AffectedCI
= dyn_cast_or_null
<CallInst
>(AffectedValVH
);
2040 if (AffectedCI
->getFunction() != &OldFunc
)
2042 auto *AssumedInst
= cast
<Instruction
>(AffectedCI
->getOperand(0));
2043 if (AssumedInst
->getFunction() != &OldFunc
)
2050 void CodeExtractor::excludeArgFromAggregate(Value
*Arg
) {
2051 ExcludeArgsFromAggregate
.insert(Arg
);