1 //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interface to tear out a code region, such as an
10 // individual loop or a parallel section, into a new function, replacing it with
11 // a call to the new function.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/CodeExtractor.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/IR/Argument.h"
28 #include "llvm/IR/Attributes.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/CFG.h"
31 #include "llvm/IR/Constant.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DIBuilder.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DebugInfo.h"
36 #include "llvm/IR/DebugInfoMetadata.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/InstIterator.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/MDBuilder.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/PatternMatch.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/User.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/Verifier.h"
55 #include "llvm/Support/BlockFrequency.h"
56 #include "llvm/Support/BranchProbability.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/raw_ostream.h"
62 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
71 using namespace llvm::PatternMatch
;
72 using ProfileCount
= Function::ProfileCount
;
74 #define DEBUG_TYPE "code-extractor"
76 // Provide a command-line option to aggregate function arguments into a struct
77 // for functions produced by the code extractor. This is useful when converting
78 // extracted functions to pthread-based code, as only one argument (void*) can
79 // be passed in to pthread_create().
81 AggregateArgsOpt("aggregate-extracted-args", cl::Hidden
,
82 cl::desc("Aggregate arguments to code-extracted functions"));
84 /// Test whether a block is valid for extraction.
85 static bool isBlockValidForExtraction(const BasicBlock
&BB
,
86 const SetVector
<BasicBlock
*> &Result
,
87 bool AllowVarArgs
, bool AllowAlloca
) {
88 // taking the address of a basic block moved to another function is illegal
89 if (BB
.hasAddressTaken())
92 // don't hoist code that uses another basicblock address, as it's likely to
93 // lead to unexpected behavior, like cross-function jumps
94 SmallPtrSet
<User
const *, 16> Visited
;
95 SmallVector
<User
const *, 16> ToVisit
;
97 for (Instruction
const &Inst
: BB
)
98 ToVisit
.push_back(&Inst
);
100 while (!ToVisit
.empty()) {
101 User
const *Curr
= ToVisit
.pop_back_val();
102 if (!Visited
.insert(Curr
).second
)
104 if (isa
<BlockAddress
const>(Curr
))
105 return false; // even a reference to self is likely to be not compatible
107 if (isa
<Instruction
>(Curr
) && cast
<Instruction
>(Curr
)->getParent() != &BB
)
110 for (auto const &U
: Curr
->operands()) {
111 if (auto *UU
= dyn_cast
<User
>(U
))
112 ToVisit
.push_back(UU
);
116 // If explicitly requested, allow vastart and alloca. For invoke instructions
117 // verify that extraction is valid.
118 for (BasicBlock::const_iterator I
= BB
.begin(), E
= BB
.end(); I
!= E
; ++I
) {
119 if (isa
<AllocaInst
>(I
)) {
125 if (const auto *II
= dyn_cast
<InvokeInst
>(I
)) {
126 // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
127 // must be a part of the subgraph which is being extracted.
128 if (auto *UBB
= II
->getUnwindDest())
129 if (!Result
.count(UBB
))
134 // All catch handlers of a catchswitch instruction as well as the unwind
135 // destination must be in the subgraph.
136 if (const auto *CSI
= dyn_cast
<CatchSwitchInst
>(I
)) {
137 if (auto *UBB
= CSI
->getUnwindDest())
138 if (!Result
.count(UBB
))
140 for (const auto *HBB
: CSI
->handlers())
141 if (!Result
.count(const_cast<BasicBlock
*>(HBB
)))
146 // Make sure that entire catch handler is within subgraph. It is sufficient
147 // to check that catch return's block is in the list.
148 if (const auto *CPI
= dyn_cast
<CatchPadInst
>(I
)) {
149 for (const auto *U
: CPI
->users())
150 if (const auto *CRI
= dyn_cast
<CatchReturnInst
>(U
))
151 if (!Result
.count(const_cast<BasicBlock
*>(CRI
->getParent())))
156 // And do similar checks for cleanup handler - the entire handler must be
157 // in subgraph which is going to be extracted. For cleanup return should
158 // additionally check that the unwind destination is also in the subgraph.
159 if (const auto *CPI
= dyn_cast
<CleanupPadInst
>(I
)) {
160 for (const auto *U
: CPI
->users())
161 if (const auto *CRI
= dyn_cast
<CleanupReturnInst
>(U
))
162 if (!Result
.count(const_cast<BasicBlock
*>(CRI
->getParent())))
166 if (const auto *CRI
= dyn_cast
<CleanupReturnInst
>(I
)) {
167 if (auto *UBB
= CRI
->getUnwindDest())
168 if (!Result
.count(UBB
))
173 if (const CallInst
*CI
= dyn_cast
<CallInst
>(I
)) {
174 if (const Function
*F
= CI
->getCalledFunction()) {
175 auto IID
= F
->getIntrinsicID();
176 if (IID
== Intrinsic::vastart
) {
183 // Currently, we miscompile outlined copies of eh_typid_for. There are
184 // proposals for fixing this in llvm.org/PR39545.
185 if (IID
== Intrinsic::eh_typeid_for
)
194 /// Build a set of blocks to extract if the input blocks are viable.
195 static SetVector
<BasicBlock
*>
196 buildExtractionBlockSet(ArrayRef
<BasicBlock
*> BBs
, DominatorTree
*DT
,
197 bool AllowVarArgs
, bool AllowAlloca
) {
198 assert(!BBs
.empty() && "The set of blocks to extract must be non-empty");
199 SetVector
<BasicBlock
*> Result
;
201 // Loop over the blocks, adding them to our set-vector, and aborting with an
202 // empty set if we encounter invalid blocks.
203 for (BasicBlock
*BB
: BBs
) {
204 // If this block is dead, don't process it.
205 if (DT
&& !DT
->isReachableFromEntry(BB
))
208 if (!Result
.insert(BB
))
209 llvm_unreachable("Repeated basic blocks in extraction input");
212 LLVM_DEBUG(dbgs() << "Region front block: " << Result
.front()->getName()
215 for (auto *BB
: Result
) {
216 if (!isBlockValidForExtraction(*BB
, Result
, AllowVarArgs
, AllowAlloca
))
219 // Make sure that the first block is not a landing pad.
220 if (BB
== Result
.front()) {
222 LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
228 // All blocks other than the first must not have predecessors outside of
229 // the subgraph which is being extracted.
230 for (auto *PBB
: predecessors(BB
))
231 if (!Result
.count(PBB
)) {
232 LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
233 "outside the region except for the first block!\n"
234 << "Problematic source BB: " << BB
->getName() << "\n"
235 << "Problematic destination BB: " << PBB
->getName()
244 CodeExtractor::CodeExtractor(ArrayRef
<BasicBlock
*> BBs
, DominatorTree
*DT
,
245 bool AggregateArgs
, BlockFrequencyInfo
*BFI
,
246 BranchProbabilityInfo
*BPI
, AssumptionCache
*AC
,
247 bool AllowVarArgs
, bool AllowAlloca
,
248 BasicBlock
*AllocationBlock
, std::string Suffix
)
249 : DT(DT
), AggregateArgs(AggregateArgs
|| AggregateArgsOpt
), BFI(BFI
),
250 BPI(BPI
), AC(AC
), AllocationBlock(AllocationBlock
),
251 AllowVarArgs(AllowVarArgs
),
252 Blocks(buildExtractionBlockSet(BBs
, DT
, AllowVarArgs
, AllowAlloca
)),
255 CodeExtractor::CodeExtractor(DominatorTree
&DT
, Loop
&L
, bool AggregateArgs
,
256 BlockFrequencyInfo
*BFI
,
257 BranchProbabilityInfo
*BPI
, AssumptionCache
*AC
,
259 : DT(&DT
), AggregateArgs(AggregateArgs
|| AggregateArgsOpt
), BFI(BFI
),
260 BPI(BPI
), AC(AC
), AllocationBlock(nullptr), AllowVarArgs(false),
261 Blocks(buildExtractionBlockSet(L
.getBlocks(), &DT
,
262 /* AllowVarArgs */ false,
263 /* AllowAlloca */ false)),
266 /// definedInRegion - Return true if the specified value is defined in the
267 /// extracted region.
268 static bool definedInRegion(const SetVector
<BasicBlock
*> &Blocks
, Value
*V
) {
269 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
270 if (Blocks
.count(I
->getParent()))
275 /// definedInCaller - Return true if the specified value is defined in the
276 /// function being code extracted, but not in the region being extracted.
277 /// These values must be passed in as live-ins to the function.
278 static bool definedInCaller(const SetVector
<BasicBlock
*> &Blocks
, Value
*V
) {
279 if (isa
<Argument
>(V
)) return true;
280 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
281 if (!Blocks
.count(I
->getParent()))
286 static BasicBlock
*getCommonExitBlock(const SetVector
<BasicBlock
*> &Blocks
) {
287 BasicBlock
*CommonExitBlock
= nullptr;
288 auto hasNonCommonExitSucc
= [&](BasicBlock
*Block
) {
289 for (auto *Succ
: successors(Block
)) {
290 // Internal edges, ok.
291 if (Blocks
.count(Succ
))
293 if (!CommonExitBlock
) {
294 CommonExitBlock
= Succ
;
297 if (CommonExitBlock
!= Succ
)
303 if (any_of(Blocks
, hasNonCommonExitSucc
))
306 return CommonExitBlock
;
309 CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function
&F
) {
310 for (BasicBlock
&BB
: F
) {
311 for (Instruction
&II
: BB
.instructionsWithoutDebug())
312 if (auto *AI
= dyn_cast
<AllocaInst
>(&II
))
313 Allocas
.push_back(AI
);
315 findSideEffectInfoForBlock(BB
);
319 void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock
&BB
) {
320 for (Instruction
&II
: BB
.instructionsWithoutDebug()) {
321 unsigned Opcode
= II
.getOpcode();
322 Value
*MemAddr
= nullptr;
324 case Instruction::Store
:
325 case Instruction::Load
: {
326 if (Opcode
== Instruction::Store
) {
327 StoreInst
*SI
= cast
<StoreInst
>(&II
);
328 MemAddr
= SI
->getPointerOperand();
330 LoadInst
*LI
= cast
<LoadInst
>(&II
);
331 MemAddr
= LI
->getPointerOperand();
333 // Global variable can not be aliased with locals.
334 if (isa
<Constant
>(MemAddr
))
336 Value
*Base
= MemAddr
->stripInBoundsConstantOffsets();
337 if (!isa
<AllocaInst
>(Base
)) {
338 SideEffectingBlocks
.insert(&BB
);
341 BaseMemAddrs
[&BB
].insert(Base
);
345 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(&II
);
347 if (IntrInst
->isLifetimeStartOrEnd())
349 SideEffectingBlocks
.insert(&BB
);
352 // Treat all the other cases conservatively if it has side effects.
353 if (II
.mayHaveSideEffects()) {
354 SideEffectingBlocks
.insert(&BB
);
362 bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
363 BasicBlock
&BB
, AllocaInst
*Addr
) const {
364 if (SideEffectingBlocks
.count(&BB
))
366 auto It
= BaseMemAddrs
.find(&BB
);
367 if (It
!= BaseMemAddrs
.end())
368 return It
->second
.count(Addr
);
372 bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
373 const CodeExtractorAnalysisCache
&CEAC
, Instruction
*Addr
) const {
374 AllocaInst
*AI
= cast
<AllocaInst
>(Addr
->stripInBoundsConstantOffsets());
375 Function
*Func
= (*Blocks
.begin())->getParent();
376 for (BasicBlock
&BB
: *Func
) {
377 if (Blocks
.count(&BB
))
379 if (CEAC
.doesBlockContainClobberOfAddr(BB
, AI
))
386 CodeExtractor::findOrCreateBlockForHoisting(BasicBlock
*CommonExitBlock
) {
387 BasicBlock
*SinglePredFromOutlineRegion
= nullptr;
388 assert(!Blocks
.count(CommonExitBlock
) &&
389 "Expect a block outside the region!");
390 for (auto *Pred
: predecessors(CommonExitBlock
)) {
391 if (!Blocks
.count(Pred
))
393 if (!SinglePredFromOutlineRegion
) {
394 SinglePredFromOutlineRegion
= Pred
;
395 } else if (SinglePredFromOutlineRegion
!= Pred
) {
396 SinglePredFromOutlineRegion
= nullptr;
401 if (SinglePredFromOutlineRegion
)
402 return SinglePredFromOutlineRegion
;
405 auto getFirstPHI
= [](BasicBlock
*BB
) {
406 BasicBlock::iterator I
= BB
->begin();
407 PHINode
*FirstPhi
= nullptr;
408 while (I
!= BB
->end()) {
409 PHINode
*Phi
= dyn_cast
<PHINode
>(I
);
419 // If there are any phi nodes, the single pred either exists or has already
420 // be created before code extraction.
421 assert(!getFirstPHI(CommonExitBlock
) && "Phi not expected");
424 BasicBlock
*NewExitBlock
= CommonExitBlock
->splitBasicBlock(
425 CommonExitBlock
->getFirstNonPHI()->getIterator());
427 for (BasicBlock
*Pred
:
428 llvm::make_early_inc_range(predecessors(CommonExitBlock
))) {
429 if (Blocks
.count(Pred
))
431 Pred
->getTerminator()->replaceUsesOfWith(CommonExitBlock
, NewExitBlock
);
433 // Now add the old exit block to the outline region.
434 Blocks
.insert(CommonExitBlock
);
435 OldTargets
.push_back(NewExitBlock
);
436 return CommonExitBlock
;
439 // Find the pair of life time markers for address 'Addr' that are either
440 // defined inside the outline region or can legally be shrinkwrapped into the
441 // outline region. If there are not other untracked uses of the address, return
442 // the pair of markers if found; otherwise return a pair of nullptr.
443 CodeExtractor::LifetimeMarkerInfo
444 CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache
&CEAC
,
446 BasicBlock
*ExitBlock
) const {
447 LifetimeMarkerInfo Info
;
449 for (User
*U
: Addr
->users()) {
450 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(U
);
452 // We don't model addresses with multiple start/end markers, but the
453 // markers do not need to be in the region.
454 if (IntrInst
->getIntrinsicID() == Intrinsic::lifetime_start
) {
457 Info
.LifeStart
= IntrInst
;
460 if (IntrInst
->getIntrinsicID() == Intrinsic::lifetime_end
) {
463 Info
.LifeEnd
= IntrInst
;
466 // At this point, permit debug uses outside of the region.
467 // This is fixed in a later call to fixupDebugInfoPostExtraction().
468 if (isa
<DbgInfoIntrinsic
>(IntrInst
))
471 // Find untracked uses of the address, bail.
472 if (!definedInRegion(Blocks
, U
))
476 if (!Info
.LifeStart
|| !Info
.LifeEnd
)
479 Info
.SinkLifeStart
= !definedInRegion(Blocks
, Info
.LifeStart
);
480 Info
.HoistLifeEnd
= !definedInRegion(Blocks
, Info
.LifeEnd
);
481 // Do legality check.
482 if ((Info
.SinkLifeStart
|| Info
.HoistLifeEnd
) &&
483 !isLegalToShrinkwrapLifetimeMarkers(CEAC
, Addr
))
486 // Check to see if we have a place to do hoisting, if not, bail.
487 if (Info
.HoistLifeEnd
&& !ExitBlock
)
493 void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache
&CEAC
,
494 ValueSet
&SinkCands
, ValueSet
&HoistCands
,
495 BasicBlock
*&ExitBlock
) const {
496 Function
*Func
= (*Blocks
.begin())->getParent();
497 ExitBlock
= getCommonExitBlock(Blocks
);
499 auto moveOrIgnoreLifetimeMarkers
=
500 [&](const LifetimeMarkerInfo
&LMI
) -> bool {
503 if (LMI
.SinkLifeStart
) {
504 LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI
.LifeStart
506 SinkCands
.insert(LMI
.LifeStart
);
508 if (LMI
.HoistLifeEnd
) {
509 LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI
.LifeEnd
<< "\n");
510 HoistCands
.insert(LMI
.LifeEnd
);
515 // Look up allocas in the original function in CodeExtractorAnalysisCache, as
516 // this is much faster than walking all the instructions.
517 for (AllocaInst
*AI
: CEAC
.getAllocas()) {
518 BasicBlock
*BB
= AI
->getParent();
519 if (Blocks
.count(BB
))
522 // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
523 // check whether it is actually still in the original function.
524 Function
*AIFunc
= BB
->getParent();
528 LifetimeMarkerInfo MarkerInfo
= getLifetimeMarkers(CEAC
, AI
, ExitBlock
);
529 bool Moved
= moveOrIgnoreLifetimeMarkers(MarkerInfo
);
531 LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI
<< "\n");
532 SinkCands
.insert(AI
);
536 // Find bitcasts in the outlined region that have lifetime marker users
537 // outside that region. Replace the lifetime marker use with an
538 // outside region bitcast to avoid unnecessary alloca/reload instructions
539 // and extra lifetime markers.
540 SmallVector
<Instruction
*, 2> LifetimeBitcastUsers
;
541 for (User
*U
: AI
->users()) {
542 if (!definedInRegion(Blocks
, U
))
545 if (U
->stripInBoundsConstantOffsets() != AI
)
548 Instruction
*Bitcast
= cast
<Instruction
>(U
);
549 for (User
*BU
: Bitcast
->users()) {
550 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(BU
);
554 if (!IntrInst
->isLifetimeStartOrEnd())
557 if (definedInRegion(Blocks
, IntrInst
))
560 LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
561 << *Bitcast
<< " in out-of-region lifetime marker "
562 << *IntrInst
<< "\n");
563 LifetimeBitcastUsers
.push_back(IntrInst
);
567 for (Instruction
*I
: LifetimeBitcastUsers
) {
568 Module
*M
= AIFunc
->getParent();
569 LLVMContext
&Ctx
= M
->getContext();
570 auto *Int8PtrTy
= Type::getInt8PtrTy(Ctx
);
572 CastInst::CreatePointerCast(AI
, Int8PtrTy
, "lt.cast", I
);
573 I
->replaceUsesOfWith(I
->getOperand(1), CastI
);
576 // Follow any bitcasts.
577 SmallVector
<Instruction
*, 2> Bitcasts
;
578 SmallVector
<LifetimeMarkerInfo
, 2> BitcastLifetimeInfo
;
579 for (User
*U
: AI
->users()) {
580 if (U
->stripInBoundsConstantOffsets() == AI
) {
581 Instruction
*Bitcast
= cast
<Instruction
>(U
);
582 LifetimeMarkerInfo LMI
= getLifetimeMarkers(CEAC
, Bitcast
, ExitBlock
);
584 Bitcasts
.push_back(Bitcast
);
585 BitcastLifetimeInfo
.push_back(LMI
);
590 // Found unknown use of AI.
591 if (!definedInRegion(Blocks
, U
)) {
597 // Either no bitcasts reference the alloca or there are unknown uses.
598 if (Bitcasts
.empty())
601 LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI
<< "\n");
602 SinkCands
.insert(AI
);
603 for (unsigned I
= 0, E
= Bitcasts
.size(); I
!= E
; ++I
) {
604 Instruction
*BitcastAddr
= Bitcasts
[I
];
605 const LifetimeMarkerInfo
&LMI
= BitcastLifetimeInfo
[I
];
606 assert(LMI
.LifeStart
&&
607 "Unsafe to sink bitcast without lifetime markers");
608 moveOrIgnoreLifetimeMarkers(LMI
);
609 if (!definedInRegion(Blocks
, BitcastAddr
)) {
610 LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
612 SinkCands
.insert(BitcastAddr
);
618 bool CodeExtractor::isEligible() const {
621 BasicBlock
*Header
= *Blocks
.begin();
622 Function
*F
= Header
->getParent();
624 // For functions with varargs, check that varargs handling is only done in the
625 // outlined function, i.e vastart and vaend are only used in outlined blocks.
626 if (AllowVarArgs
&& F
->getFunctionType()->isVarArg()) {
627 auto containsVarArgIntrinsic
= [](const Instruction
&I
) {
628 if (const CallInst
*CI
= dyn_cast
<CallInst
>(&I
))
629 if (const Function
*Callee
= CI
->getCalledFunction())
630 return Callee
->getIntrinsicID() == Intrinsic::vastart
||
631 Callee
->getIntrinsicID() == Intrinsic::vaend
;
635 for (auto &BB
: *F
) {
636 if (Blocks
.count(&BB
))
638 if (llvm::any_of(BB
, containsVarArgIntrinsic
))
645 void CodeExtractor::findInputsOutputs(ValueSet
&Inputs
, ValueSet
&Outputs
,
646 const ValueSet
&SinkCands
) const {
647 for (BasicBlock
*BB
: Blocks
) {
648 // If a used value is defined outside the region, it's an input. If an
649 // instruction is used outside the region, it's an output.
650 for (Instruction
&II
: *BB
) {
651 for (auto &OI
: II
.operands()) {
653 if (!SinkCands
.count(V
) && definedInCaller(Blocks
, V
))
657 for (User
*U
: II
.users())
658 if (!definedInRegion(Blocks
, U
)) {
666 /// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
667 /// of the region, we need to split the entry block of the region so that the
668 /// PHI node is easier to deal with.
669 void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock
*&Header
) {
670 unsigned NumPredsFromRegion
= 0;
671 unsigned NumPredsOutsideRegion
= 0;
673 if (Header
!= &Header
->getParent()->getEntryBlock()) {
674 PHINode
*PN
= dyn_cast
<PHINode
>(Header
->begin());
675 if (!PN
) return; // No PHI nodes.
677 // If the header node contains any PHI nodes, check to see if there is more
678 // than one entry from outside the region. If so, we need to sever the
679 // header block into two.
680 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
681 if (Blocks
.count(PN
->getIncomingBlock(i
)))
682 ++NumPredsFromRegion
;
684 ++NumPredsOutsideRegion
;
686 // If there is one (or fewer) predecessor from outside the region, we don't
687 // need to do anything special.
688 if (NumPredsOutsideRegion
<= 1) return;
691 // Otherwise, we need to split the header block into two pieces: one
692 // containing PHI nodes merging values from outside of the region, and a
693 // second that contains all of the code for the block and merges back any
694 // incoming values from inside of the region.
695 BasicBlock
*NewBB
= SplitBlock(Header
, Header
->getFirstNonPHI(), DT
);
697 // We only want to code extract the second block now, and it becomes the new
698 // header of the region.
699 BasicBlock
*OldPred
= Header
;
700 Blocks
.remove(OldPred
);
701 Blocks
.insert(NewBB
);
704 // Okay, now we need to adjust the PHI nodes and any branches from within the
705 // region to go to the new header block instead of the old header block.
706 if (NumPredsFromRegion
) {
707 PHINode
*PN
= cast
<PHINode
>(OldPred
->begin());
708 // Loop over all of the predecessors of OldPred that are in the region,
709 // changing them to branch to NewBB instead.
710 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
711 if (Blocks
.count(PN
->getIncomingBlock(i
))) {
712 Instruction
*TI
= PN
->getIncomingBlock(i
)->getTerminator();
713 TI
->replaceUsesOfWith(OldPred
, NewBB
);
716 // Okay, everything within the region is now branching to the right block, we
717 // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
718 BasicBlock::iterator AfterPHIs
;
719 for (AfterPHIs
= OldPred
->begin(); isa
<PHINode
>(AfterPHIs
); ++AfterPHIs
) {
720 PHINode
*PN
= cast
<PHINode
>(AfterPHIs
);
721 // Create a new PHI node in the new region, which has an incoming value
722 // from OldPred of PN.
723 PHINode
*NewPN
= PHINode::Create(PN
->getType(), 1 + NumPredsFromRegion
,
724 PN
->getName() + ".ce");
725 NewPN
->insertBefore(NewBB
->begin());
726 PN
->replaceAllUsesWith(NewPN
);
727 NewPN
->addIncoming(PN
, OldPred
);
729 // Loop over all of the incoming value in PN, moving them to NewPN if they
730 // are from the extracted region.
731 for (unsigned i
= 0; i
!= PN
->getNumIncomingValues(); ++i
) {
732 if (Blocks
.count(PN
->getIncomingBlock(i
))) {
733 NewPN
->addIncoming(PN
->getIncomingValue(i
), PN
->getIncomingBlock(i
));
734 PN
->removeIncomingValue(i
);
742 /// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
743 /// outlined region, we split these PHIs on two: one with inputs from region
744 /// and other with remaining incoming blocks; then first PHIs are placed in
746 void CodeExtractor::severSplitPHINodesOfExits(
747 const SmallPtrSetImpl
<BasicBlock
*> &Exits
) {
748 for (BasicBlock
*ExitBB
: Exits
) {
749 BasicBlock
*NewBB
= nullptr;
751 for (PHINode
&PN
: ExitBB
->phis()) {
752 // Find all incoming values from the outlining region.
753 SmallVector
<unsigned, 2> IncomingVals
;
754 for (unsigned i
= 0; i
< PN
.getNumIncomingValues(); ++i
)
755 if (Blocks
.count(PN
.getIncomingBlock(i
)))
756 IncomingVals
.push_back(i
);
758 // Do not process PHI if there is one (or fewer) predecessor from region.
759 // If PHI has exactly one predecessor from region, only this one incoming
760 // will be replaced on codeRepl block, so it should be safe to skip PHI.
761 if (IncomingVals
.size() <= 1)
764 // Create block for new PHIs and add it to the list of outlined if it
765 // wasn't done before.
767 NewBB
= BasicBlock::Create(ExitBB
->getContext(),
768 ExitBB
->getName() + ".split",
769 ExitBB
->getParent(), ExitBB
);
770 SmallVector
<BasicBlock
*, 4> Preds(predecessors(ExitBB
));
771 for (BasicBlock
*PredBB
: Preds
)
772 if (Blocks
.count(PredBB
))
773 PredBB
->getTerminator()->replaceUsesOfWith(ExitBB
, NewBB
);
774 BranchInst::Create(ExitBB
, NewBB
);
775 Blocks
.insert(NewBB
);
779 PHINode
*NewPN
= PHINode::Create(PN
.getType(), IncomingVals
.size(),
780 PN
.getName() + ".ce");
781 NewPN
->insertBefore(NewBB
->getFirstNonPHIIt());
782 for (unsigned i
: IncomingVals
)
783 NewPN
->addIncoming(PN
.getIncomingValue(i
), PN
.getIncomingBlock(i
));
784 for (unsigned i
: reverse(IncomingVals
))
785 PN
.removeIncomingValue(i
, false);
786 PN
.addIncoming(NewPN
, NewBB
);
791 void CodeExtractor::splitReturnBlocks() {
792 for (BasicBlock
*Block
: Blocks
)
793 if (ReturnInst
*RI
= dyn_cast
<ReturnInst
>(Block
->getTerminator())) {
795 Block
->splitBasicBlock(RI
->getIterator(), Block
->getName() + ".ret");
797 // Old dominates New. New node dominates all other nodes dominated
799 DomTreeNode
*OldNode
= DT
->getNode(Block
);
800 SmallVector
<DomTreeNode
*, 8> Children(OldNode
->begin(),
803 DomTreeNode
*NewNode
= DT
->addNewBlock(New
, Block
);
805 for (DomTreeNode
*I
: Children
)
806 DT
->changeImmediateDominator(I
, NewNode
);
811 /// constructFunction - make a function based on inputs and outputs, as follows:
812 /// f(in0, ..., inN, out0, ..., outN)
813 Function
*CodeExtractor::constructFunction(const ValueSet
&inputs
,
814 const ValueSet
&outputs
,
816 BasicBlock
*newRootNode
,
817 BasicBlock
*newHeader
,
818 Function
*oldFunction
,
820 LLVM_DEBUG(dbgs() << "inputs: " << inputs
.size() << "\n");
821 LLVM_DEBUG(dbgs() << "outputs: " << outputs
.size() << "\n");
823 // This function returns unsigned, outputs will go back by reference.
824 switch (NumExitBlocks
) {
826 case 1: RetTy
= Type::getVoidTy(header
->getContext()); break;
827 case 2: RetTy
= Type::getInt1Ty(header
->getContext()); break;
828 default: RetTy
= Type::getInt16Ty(header
->getContext()); break;
831 std::vector
<Type
*> ParamTy
;
832 std::vector
<Type
*> AggParamTy
;
833 ValueSet StructValues
;
834 const DataLayout
&DL
= M
->getDataLayout();
836 // Add the types of the input values to the function's argument list
837 for (Value
*value
: inputs
) {
838 LLVM_DEBUG(dbgs() << "value used in func: " << *value
<< "\n");
839 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(value
)) {
840 AggParamTy
.push_back(value
->getType());
841 StructValues
.insert(value
);
843 ParamTy
.push_back(value
->getType());
846 // Add the types of the output values to the function's argument list.
847 for (Value
*output
: outputs
) {
848 LLVM_DEBUG(dbgs() << "instr used in func: " << *output
<< "\n");
849 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(output
)) {
850 AggParamTy
.push_back(output
->getType());
851 StructValues
.insert(output
);
854 PointerType::get(output
->getType(), DL
.getAllocaAddrSpace()));
858 (ParamTy
.size() + AggParamTy
.size()) ==
859 (inputs
.size() + outputs
.size()) &&
860 "Number of scalar and aggregate params does not match inputs, outputs");
861 assert((StructValues
.empty() || AggregateArgs
) &&
862 "Expeced StructValues only with AggregateArgs set");
864 // Concatenate scalar and aggregate params in ParamTy.
865 size_t NumScalarParams
= ParamTy
.size();
866 StructType
*StructTy
= nullptr;
867 if (AggregateArgs
&& !AggParamTy
.empty()) {
868 StructTy
= StructType::get(M
->getContext(), AggParamTy
);
869 ParamTy
.push_back(PointerType::get(StructTy
, DL
.getAllocaAddrSpace()));
873 dbgs() << "Function type: " << *RetTy
<< " f(";
874 for (Type
*i
: ParamTy
)
875 dbgs() << *i
<< ", ";
879 FunctionType
*funcType
= FunctionType::get(
880 RetTy
, ParamTy
, AllowVarArgs
&& oldFunction
->isVarArg());
882 std::string SuffixToUse
=
884 ? (header
->getName().empty() ? "extracted" : header
->getName().str())
886 // Create the new function
887 Function
*newFunction
= Function::Create(
888 funcType
, GlobalValue::InternalLinkage
, oldFunction
->getAddressSpace(),
889 oldFunction
->getName() + "." + SuffixToUse
, M
);
891 // Inherit all of the target dependent attributes and white-listed
892 // target independent attributes.
893 // (e.g. If the extracted region contains a call to an x86.sse
894 // instruction we need to make sure that the extracted region has the
895 // "target-features" attribute allowing it to be lowered.
896 // FIXME: This should be changed to check to see if a specific
897 // attribute can not be inherited.
898 for (const auto &Attr
: oldFunction
->getAttributes().getFnAttrs()) {
899 if (Attr
.isStringAttribute()) {
900 if (Attr
.getKindAsString() == "thunk")
903 switch (Attr
.getKindAsEnum()) {
904 // Those attributes cannot be propagated safely. Explicitly list them
905 // here so we get a warning if new attributes are added.
906 case Attribute::AllocSize
:
907 case Attribute::Builtin
:
908 case Attribute::Convergent
:
909 case Attribute::JumpTable
:
910 case Attribute::Naked
:
911 case Attribute::NoBuiltin
:
912 case Attribute::NoMerge
:
913 case Attribute::NoReturn
:
914 case Attribute::NoSync
:
915 case Attribute::ReturnsTwice
:
916 case Attribute::Speculatable
:
917 case Attribute::StackAlignment
:
918 case Attribute::WillReturn
:
919 case Attribute::AllocKind
:
920 case Attribute::PresplitCoroutine
:
921 case Attribute::Memory
:
922 case Attribute::NoFPClass
:
924 // Those attributes should be safe to propagate to the extracted function.
925 case Attribute::AlwaysInline
:
926 case Attribute::Cold
:
927 case Attribute::DisableSanitizerInstrumentation
:
928 case Attribute::FnRetThunkExtern
:
930 case Attribute::NoRecurse
:
931 case Attribute::InlineHint
:
932 case Attribute::MinSize
:
933 case Attribute::NoCallback
:
934 case Attribute::NoDuplicate
:
935 case Attribute::NoFree
:
936 case Attribute::NoImplicitFloat
:
937 case Attribute::NoInline
:
938 case Attribute::NonLazyBind
:
939 case Attribute::NoRedZone
:
940 case Attribute::NoUnwind
:
941 case Attribute::NoSanitizeBounds
:
942 case Attribute::NoSanitizeCoverage
:
943 case Attribute::NullPointerIsValid
:
944 case Attribute::OptForFuzzing
:
945 case Attribute::OptimizeNone
:
946 case Attribute::OptimizeForSize
:
947 case Attribute::SafeStack
:
948 case Attribute::ShadowCallStack
:
949 case Attribute::SanitizeAddress
:
950 case Attribute::SanitizeMemory
:
951 case Attribute::SanitizeThread
:
952 case Attribute::SanitizeHWAddress
:
953 case Attribute::SanitizeMemTag
:
954 case Attribute::SpeculativeLoadHardening
:
955 case Attribute::StackProtect
:
956 case Attribute::StackProtectReq
:
957 case Attribute::StackProtectStrong
:
958 case Attribute::StrictFP
:
959 case Attribute::UWTable
:
960 case Attribute::VScaleRange
:
961 case Attribute::NoCfCheck
:
962 case Attribute::MustProgress
:
963 case Attribute::NoProfile
:
964 case Attribute::SkipProfile
:
966 // These attributes cannot be applied to functions.
967 case Attribute::Alignment
:
968 case Attribute::AllocatedPointer
:
969 case Attribute::AllocAlign
:
970 case Attribute::ByVal
:
971 case Attribute::Dereferenceable
:
972 case Attribute::DereferenceableOrNull
:
973 case Attribute::ElementType
:
974 case Attribute::InAlloca
:
975 case Attribute::InReg
:
976 case Attribute::Nest
:
977 case Attribute::NoAlias
:
978 case Attribute::NoCapture
:
979 case Attribute::NoUndef
:
980 case Attribute::NonNull
:
981 case Attribute::Preallocated
:
982 case Attribute::ReadNone
:
983 case Attribute::ReadOnly
:
984 case Attribute::Returned
:
985 case Attribute::SExt
:
986 case Attribute::StructRet
:
987 case Attribute::SwiftError
:
988 case Attribute::SwiftSelf
:
989 case Attribute::SwiftAsync
:
990 case Attribute::ZExt
:
991 case Attribute::ImmArg
:
992 case Attribute::ByRef
:
993 case Attribute::WriteOnly
:
994 // These are not really attributes.
995 case Attribute::None
:
996 case Attribute::EndAttrKinds
:
997 case Attribute::EmptyKey
:
998 case Attribute::TombstoneKey
:
999 llvm_unreachable("Not a function attribute");
1002 newFunction
->addFnAttr(Attr
);
1004 newFunction
->insert(newFunction
->end(), newRootNode
);
1006 // Create scalar and aggregate iterators to name all of the arguments we
1008 Function::arg_iterator ScalarAI
= newFunction
->arg_begin();
1009 Function::arg_iterator AggAI
= std::next(ScalarAI
, NumScalarParams
);
1011 // Rewrite all users of the inputs in the extracted region to use the
1012 // arguments (or appropriate addressing into struct) instead.
1013 for (unsigned i
= 0, e
= inputs
.size(), aggIdx
= 0; i
!= e
; ++i
) {
1015 if (AggregateArgs
&& StructValues
.contains(inputs
[i
])) {
1017 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(header
->getContext()));
1018 Idx
[1] = ConstantInt::get(Type::getInt32Ty(header
->getContext()), aggIdx
);
1019 Instruction
*TI
= newFunction
->begin()->getTerminator();
1020 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1021 StructTy
, &*AggAI
, Idx
, "gep_" + inputs
[i
]->getName(), TI
);
1022 RewriteVal
= new LoadInst(StructTy
->getElementType(aggIdx
), GEP
,
1023 "loadgep_" + inputs
[i
]->getName(), TI
);
1026 RewriteVal
= &*ScalarAI
++;
1028 std::vector
<User
*> Users(inputs
[i
]->user_begin(), inputs
[i
]->user_end());
1029 for (User
*use
: Users
)
1030 if (Instruction
*inst
= dyn_cast
<Instruction
>(use
))
1031 if (Blocks
.count(inst
->getParent()))
1032 inst
->replaceUsesOfWith(inputs
[i
], RewriteVal
);
1035 // Set names for input and output arguments.
1036 if (NumScalarParams
) {
1037 ScalarAI
= newFunction
->arg_begin();
1038 for (unsigned i
= 0, e
= inputs
.size(); i
!= e
; ++i
, ++ScalarAI
)
1039 if (!StructValues
.contains(inputs
[i
]))
1040 ScalarAI
->setName(inputs
[i
]->getName());
1041 for (unsigned i
= 0, e
= outputs
.size(); i
!= e
; ++i
, ++ScalarAI
)
1042 if (!StructValues
.contains(outputs
[i
]))
1043 ScalarAI
->setName(outputs
[i
]->getName() + ".out");
1046 // Rewrite branches to basic blocks outside of the loop to new dummy blocks
1047 // within the new function. This must be done before we lose track of which
1048 // blocks were originally in the code region.
1049 std::vector
<User
*> Users(header
->user_begin(), header
->user_end());
1050 for (auto &U
: Users
)
1051 // The BasicBlock which contains the branch is not in the region
1052 // modify the branch target to a new block
1053 if (Instruction
*I
= dyn_cast
<Instruction
>(U
))
1054 if (I
->isTerminator() && I
->getFunction() == oldFunction
&&
1055 !Blocks
.count(I
->getParent()))
1056 I
->replaceUsesOfWith(header
, newHeader
);
1061 /// Erase lifetime.start markers which reference inputs to the extraction
1062 /// region, and insert the referenced memory into \p LifetimesStart.
1064 /// The extraction region is defined by a set of blocks (\p Blocks), and a set
1065 /// of allocas which will be moved from the caller function into the extracted
1066 /// function (\p SunkAllocas).
1067 static void eraseLifetimeMarkersOnInputs(const SetVector
<BasicBlock
*> &Blocks
,
1068 const SetVector
<Value
*> &SunkAllocas
,
1069 SetVector
<Value
*> &LifetimesStart
) {
1070 for (BasicBlock
*BB
: Blocks
) {
1071 for (Instruction
&I
: llvm::make_early_inc_range(*BB
)) {
1072 auto *II
= dyn_cast
<IntrinsicInst
>(&I
);
1073 if (!II
|| !II
->isLifetimeStartOrEnd())
1076 // Get the memory operand of the lifetime marker. If the underlying
1077 // object is a sunk alloca, or is otherwise defined in the extraction
1078 // region, the lifetime marker must not be erased.
1079 Value
*Mem
= II
->getOperand(1)->stripInBoundsOffsets();
1080 if (SunkAllocas
.count(Mem
) || definedInRegion(Blocks
, Mem
))
1083 if (II
->getIntrinsicID() == Intrinsic::lifetime_start
)
1084 LifetimesStart
.insert(Mem
);
1085 II
->eraseFromParent();
1090 /// Insert lifetime start/end markers surrounding the call to the new function
1091 /// for objects defined in the caller.
1092 static void insertLifetimeMarkersSurroundingCall(
1093 Module
*M
, ArrayRef
<Value
*> LifetimesStart
, ArrayRef
<Value
*> LifetimesEnd
,
1094 CallInst
*TheCall
) {
1095 LLVMContext
&Ctx
= M
->getContext();
1096 auto NegativeOne
= ConstantInt::getSigned(Type::getInt64Ty(Ctx
), -1);
1097 Instruction
*Term
= TheCall
->getParent()->getTerminator();
1099 // Emit lifetime markers for the pointers given in \p Objects. Insert the
1100 // markers before the call if \p InsertBefore, and after the call otherwise.
1101 auto insertMarkers
= [&](Intrinsic::ID MarkerFunc
, ArrayRef
<Value
*> Objects
,
1102 bool InsertBefore
) {
1103 for (Value
*Mem
: Objects
) {
1104 assert((!isa
<Instruction
>(Mem
) || cast
<Instruction
>(Mem
)->getFunction() ==
1105 TheCall
->getFunction()) &&
1106 "Input memory not defined in original function");
1108 Function
*Func
= Intrinsic::getDeclaration(M
, MarkerFunc
, Mem
->getType());
1109 auto Marker
= CallInst::Create(Func
, {NegativeOne
, Mem
});
1111 Marker
->insertBefore(TheCall
);
1113 Marker
->insertBefore(Term
);
1117 if (!LifetimesStart
.empty()) {
1118 insertMarkers(Intrinsic::lifetime_start
, LifetimesStart
,
1119 /*InsertBefore=*/true);
1122 if (!LifetimesEnd
.empty()) {
1123 insertMarkers(Intrinsic::lifetime_end
, LifetimesEnd
,
1124 /*InsertBefore=*/false);
1128 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
1129 /// the call instruction, splitting any PHI nodes in the header block as
1131 CallInst
*CodeExtractor::emitCallAndSwitchStatement(Function
*newFunction
,
1132 BasicBlock
*codeReplacer
,
1134 ValueSet
&outputs
) {
1135 // Emit a call to the new function, passing in: *pointer to struct (if
1136 // aggregating parameters), or plan inputs and allocated memory for outputs
1137 std::vector
<Value
*> params
, ReloadOutputs
, Reloads
;
1138 ValueSet StructValues
;
1140 Module
*M
= newFunction
->getParent();
1141 LLVMContext
&Context
= M
->getContext();
1142 const DataLayout
&DL
= M
->getDataLayout();
1143 CallInst
*call
= nullptr;
1145 // Add inputs as params, or to be filled into the struct
1146 unsigned ScalarInputArgNo
= 0;
1147 SmallVector
<unsigned, 1> SwiftErrorArgs
;
1148 for (Value
*input
: inputs
) {
1149 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(input
))
1150 StructValues
.insert(input
);
1152 params
.push_back(input
);
1153 if (input
->isSwiftError())
1154 SwiftErrorArgs
.push_back(ScalarInputArgNo
);
1159 // Create allocas for the outputs
1160 unsigned ScalarOutputArgNo
= 0;
1161 for (Value
*output
: outputs
) {
1162 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(output
)) {
1163 StructValues
.insert(output
);
1165 AllocaInst
*alloca
=
1166 new AllocaInst(output
->getType(), DL
.getAllocaAddrSpace(),
1167 nullptr, output
->getName() + ".loc",
1168 &codeReplacer
->getParent()->front().front());
1169 ReloadOutputs
.push_back(alloca
);
1170 params
.push_back(alloca
);
1171 ++ScalarOutputArgNo
;
1175 StructType
*StructArgTy
= nullptr;
1176 AllocaInst
*Struct
= nullptr;
1177 unsigned NumAggregatedInputs
= 0;
1178 if (AggregateArgs
&& !StructValues
.empty()) {
1179 std::vector
<Type
*> ArgTypes
;
1180 for (Value
*V
: StructValues
)
1181 ArgTypes
.push_back(V
->getType());
1183 // Allocate a struct at the beginning of this function
1184 StructArgTy
= StructType::get(newFunction
->getContext(), ArgTypes
);
1185 Struct
= new AllocaInst(
1186 StructArgTy
, DL
.getAllocaAddrSpace(), nullptr, "structArg",
1187 AllocationBlock
? &*AllocationBlock
->getFirstInsertionPt()
1188 : &codeReplacer
->getParent()->front().front());
1189 params
.push_back(Struct
);
1191 // Store aggregated inputs in the struct.
1192 for (unsigned i
= 0, e
= StructValues
.size(); i
!= e
; ++i
) {
1193 if (inputs
.contains(StructValues
[i
])) {
1195 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1196 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), i
);
1197 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1198 StructArgTy
, Struct
, Idx
, "gep_" + StructValues
[i
]->getName());
1199 GEP
->insertInto(codeReplacer
, codeReplacer
->end());
1200 new StoreInst(StructValues
[i
], GEP
, codeReplacer
);
1201 NumAggregatedInputs
++;
1206 // Emit the call to the function
1207 call
= CallInst::Create(newFunction
, params
,
1208 NumExitBlocks
> 1 ? "targetBlock" : "");
1209 // Add debug location to the new call, if the original function has debug
1210 // info. In that case, the terminator of the entry block of the extracted
1211 // function contains the first debug location of the extracted function,
1212 // set in extractCodeRegion.
1213 if (codeReplacer
->getParent()->getSubprogram()) {
1214 if (auto DL
= newFunction
->getEntryBlock().getTerminator()->getDebugLoc())
1215 call
->setDebugLoc(DL
);
1217 call
->insertInto(codeReplacer
, codeReplacer
->end());
1219 // Set swifterror parameter attributes.
1220 for (unsigned SwiftErrArgNo
: SwiftErrorArgs
) {
1221 call
->addParamAttr(SwiftErrArgNo
, Attribute::SwiftError
);
1222 newFunction
->addParamAttr(SwiftErrArgNo
, Attribute::SwiftError
);
1225 // Reload the outputs passed in by reference, use the struct if output is in
1226 // the aggregate or reload from the scalar argument.
1227 for (unsigned i
= 0, e
= outputs
.size(), scalarIdx
= 0,
1228 aggIdx
= NumAggregatedInputs
;
1230 Value
*Output
= nullptr;
1231 if (AggregateArgs
&& StructValues
.contains(outputs
[i
])) {
1233 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1234 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), aggIdx
);
1235 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1236 StructArgTy
, Struct
, Idx
, "gep_reload_" + outputs
[i
]->getName());
1237 GEP
->insertInto(codeReplacer
, codeReplacer
->end());
1241 Output
= ReloadOutputs
[scalarIdx
];
1244 LoadInst
*load
= new LoadInst(outputs
[i
]->getType(), Output
,
1245 outputs
[i
]->getName() + ".reload",
1247 Reloads
.push_back(load
);
1248 std::vector
<User
*> Users(outputs
[i
]->user_begin(), outputs
[i
]->user_end());
1249 for (User
*U
: Users
) {
1250 Instruction
*inst
= cast
<Instruction
>(U
);
1251 if (!Blocks
.count(inst
->getParent()))
1252 inst
->replaceUsesOfWith(outputs
[i
], load
);
1256 // Now we can emit a switch statement using the call as a value.
1257 SwitchInst
*TheSwitch
=
1258 SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context
)),
1259 codeReplacer
, 0, codeReplacer
);
1261 // Since there may be multiple exits from the original region, make the new
1262 // function return an unsigned, switch on that number. This loop iterates
1263 // over all of the blocks in the extracted region, updating any terminator
1264 // instructions in the to-be-extracted region that branch to blocks that are
1265 // not in the region to be extracted.
1266 std::map
<BasicBlock
*, BasicBlock
*> ExitBlockMap
;
1268 // Iterate over the previously collected targets, and create new blocks inside
1269 // the function to branch to.
1270 unsigned switchVal
= 0;
1271 for (BasicBlock
*OldTarget
: OldTargets
) {
1272 if (Blocks
.count(OldTarget
))
1274 BasicBlock
*&NewTarget
= ExitBlockMap
[OldTarget
];
1278 // If we don't already have an exit stub for this non-extracted
1279 // destination, create one now!
1280 NewTarget
= BasicBlock::Create(Context
,
1281 OldTarget
->getName() + ".exitStub",
1283 unsigned SuccNum
= switchVal
++;
1285 Value
*brVal
= nullptr;
1286 assert(NumExitBlocks
< 0xffff && "too many exit blocks for switch");
1287 switch (NumExitBlocks
) {
1289 case 1: break; // No value needed.
1290 case 2: // Conditional branch, return a bool
1291 brVal
= ConstantInt::get(Type::getInt1Ty(Context
), !SuccNum
);
1294 brVal
= ConstantInt::get(Type::getInt16Ty(Context
), SuccNum
);
1298 ReturnInst::Create(Context
, brVal
, NewTarget
);
1300 // Update the switch instruction.
1301 TheSwitch
->addCase(ConstantInt::get(Type::getInt16Ty(Context
),
1306 for (BasicBlock
*Block
: Blocks
) {
1307 Instruction
*TI
= Block
->getTerminator();
1308 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
!= e
; ++i
) {
1309 if (Blocks
.count(TI
->getSuccessor(i
)))
1311 BasicBlock
*OldTarget
= TI
->getSuccessor(i
);
1312 // add a new basic block which returns the appropriate value
1313 BasicBlock
*NewTarget
= ExitBlockMap
[OldTarget
];
1314 assert(NewTarget
&& "Unknown target block!");
1316 // rewrite the original branch instruction with this new target
1317 TI
->setSuccessor(i
, NewTarget
);
1321 // Store the arguments right after the definition of output value.
1322 // This should be proceeded after creating exit stubs to be ensure that invoke
1323 // result restore will be placed in the outlined function.
1324 Function::arg_iterator ScalarOutputArgBegin
= newFunction
->arg_begin();
1325 std::advance(ScalarOutputArgBegin
, ScalarInputArgNo
);
1326 Function::arg_iterator AggOutputArgBegin
= newFunction
->arg_begin();
1327 std::advance(AggOutputArgBegin
, ScalarInputArgNo
+ ScalarOutputArgNo
);
1329 for (unsigned i
= 0, e
= outputs
.size(), aggIdx
= NumAggregatedInputs
; i
!= e
;
1331 auto *OutI
= dyn_cast
<Instruction
>(outputs
[i
]);
1335 // Find proper insertion point.
1336 BasicBlock::iterator InsertPt
;
1337 // In case OutI is an invoke, we insert the store at the beginning in the
1338 // 'normal destination' BB. Otherwise we insert the store right after OutI.
1339 if (auto *InvokeI
= dyn_cast
<InvokeInst
>(OutI
))
1340 InsertPt
= InvokeI
->getNormalDest()->getFirstInsertionPt();
1341 else if (auto *Phi
= dyn_cast
<PHINode
>(OutI
))
1342 InsertPt
= Phi
->getParent()->getFirstInsertionPt();
1344 InsertPt
= std::next(OutI
->getIterator());
1346 Instruction
*InsertBefore
= &*InsertPt
;
1347 assert((InsertBefore
->getFunction() == newFunction
||
1348 Blocks
.count(InsertBefore
->getParent())) &&
1349 "InsertPt should be in new function");
1350 if (AggregateArgs
&& StructValues
.contains(outputs
[i
])) {
1351 assert(AggOutputArgBegin
!= newFunction
->arg_end() &&
1352 "Number of aggregate output arguments should match "
1353 "the number of defined values");
1355 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1356 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), aggIdx
);
1357 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1358 StructArgTy
, &*AggOutputArgBegin
, Idx
, "gep_" + outputs
[i
]->getName(),
1360 new StoreInst(outputs
[i
], GEP
, InsertBefore
);
1362 // Since there should be only one struct argument aggregating
1363 // all the output values, we shouldn't increment AggOutputArgBegin, which
1364 // always points to the struct argument, in this case.
1366 assert(ScalarOutputArgBegin
!= newFunction
->arg_end() &&
1367 "Number of scalar output arguments should match "
1368 "the number of defined values");
1369 new StoreInst(outputs
[i
], &*ScalarOutputArgBegin
, InsertBefore
);
1370 ++ScalarOutputArgBegin
;
1374 // Now that we've done the deed, simplify the switch instruction.
1375 Type
*OldFnRetTy
= TheSwitch
->getParent()->getParent()->getReturnType();
1376 switch (NumExitBlocks
) {
1378 // There are no successors (the block containing the switch itself), which
1379 // means that previously this was the last part of the function, and hence
1380 // this should be rewritten as a `ret'
1382 // Check if the function should return a value
1383 if (OldFnRetTy
->isVoidTy()) {
1384 ReturnInst::Create(Context
, nullptr, TheSwitch
); // Return void
1385 } else if (OldFnRetTy
== TheSwitch
->getCondition()->getType()) {
1386 // return what we have
1387 ReturnInst::Create(Context
, TheSwitch
->getCondition(), TheSwitch
);
1389 // Otherwise we must have code extracted an unwind or something, just
1390 // return whatever we want.
1391 ReturnInst::Create(Context
,
1392 Constant::getNullValue(OldFnRetTy
), TheSwitch
);
1395 TheSwitch
->eraseFromParent();
1398 // Only a single destination, change the switch into an unconditional
1400 BranchInst::Create(TheSwitch
->getSuccessor(1), TheSwitch
);
1401 TheSwitch
->eraseFromParent();
1404 BranchInst::Create(TheSwitch
->getSuccessor(1), TheSwitch
->getSuccessor(2),
1406 TheSwitch
->eraseFromParent();
1409 // Otherwise, make the default destination of the switch instruction be one
1410 // of the other successors.
1411 TheSwitch
->setCondition(call
);
1412 TheSwitch
->setDefaultDest(TheSwitch
->getSuccessor(NumExitBlocks
));
1413 // Remove redundant case
1414 TheSwitch
->removeCase(SwitchInst::CaseIt(TheSwitch
, NumExitBlocks
-1));
1418 // Insert lifetime markers around the reloads of any output values. The
1419 // allocas output values are stored in are only in-use in the codeRepl block.
1420 insertLifetimeMarkersSurroundingCall(M
, ReloadOutputs
, ReloadOutputs
, call
);
1425 void CodeExtractor::moveCodeToFunction(Function
*newFunction
) {
1426 auto newFuncIt
= newFunction
->front().getIterator();
1427 for (BasicBlock
*Block
: Blocks
) {
1428 // Delete the basic block from the old function, and the list of blocks
1429 Block
->removeFromParent();
1431 // Insert this basic block into the new function
1432 // Insert the original blocks after the entry block created
1433 // for the new function. The entry block may be followed
1434 // by a set of exit blocks at this point, but these exit
1435 // blocks better be placed at the end of the new function.
1436 newFuncIt
= newFunction
->insert(std::next(newFuncIt
), Block
);
1440 void CodeExtractor::calculateNewCallTerminatorWeights(
1441 BasicBlock
*CodeReplacer
,
1442 DenseMap
<BasicBlock
*, BlockFrequency
> &ExitWeights
,
1443 BranchProbabilityInfo
*BPI
) {
1444 using Distribution
= BlockFrequencyInfoImplBase::Distribution
;
1445 using BlockNode
= BlockFrequencyInfoImplBase::BlockNode
;
1447 // Update the branch weights for the exit block.
1448 Instruction
*TI
= CodeReplacer
->getTerminator();
1449 SmallVector
<unsigned, 8> BranchWeights(TI
->getNumSuccessors(), 0);
1451 // Block Frequency distribution with dummy node.
1452 Distribution BranchDist
;
1454 SmallVector
<BranchProbability
, 4> EdgeProbabilities(
1455 TI
->getNumSuccessors(), BranchProbability::getUnknown());
1457 // Add each of the frequencies of the successors.
1458 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
< e
; ++i
) {
1459 BlockNode
ExitNode(i
);
1460 uint64_t ExitFreq
= ExitWeights
[TI
->getSuccessor(i
)].getFrequency();
1462 BranchDist
.addExit(ExitNode
, ExitFreq
);
1464 EdgeProbabilities
[i
] = BranchProbability::getZero();
1467 // Check for no total weight.
1468 if (BranchDist
.Total
== 0) {
1469 BPI
->setEdgeProbability(CodeReplacer
, EdgeProbabilities
);
1473 // Normalize the distribution so that they can fit in unsigned.
1474 BranchDist
.normalize();
1476 // Create normalized branch weights and set the metadata.
1477 for (unsigned I
= 0, E
= BranchDist
.Weights
.size(); I
< E
; ++I
) {
1478 const auto &Weight
= BranchDist
.Weights
[I
];
1480 // Get the weight and update the current BFI.
1481 BranchWeights
[Weight
.TargetNode
.Index
] = Weight
.Amount
;
1482 BranchProbability
BP(Weight
.Amount
, BranchDist
.Total
);
1483 EdgeProbabilities
[Weight
.TargetNode
.Index
] = BP
;
1485 BPI
->setEdgeProbability(CodeReplacer
, EdgeProbabilities
);
1487 LLVMContext::MD_prof
,
1488 MDBuilder(TI
->getContext()).createBranchWeights(BranchWeights
));
1491 /// Erase debug info intrinsics which refer to values in \p F but aren't in
1493 static void eraseDebugIntrinsicsWithNonLocalRefs(Function
&F
) {
1494 for (Instruction
&I
: instructions(F
)) {
1495 SmallVector
<DbgVariableIntrinsic
*, 4> DbgUsers
;
1496 findDbgUsers(DbgUsers
, &I
);
1497 for (DbgVariableIntrinsic
*DVI
: DbgUsers
)
1498 if (DVI
->getFunction() != &F
)
1499 DVI
->eraseFromParent();
1503 /// Fix up the debug info in the old and new functions by pointing line
1504 /// locations and debug intrinsics to the new subprogram scope, and by deleting
1505 /// intrinsics which point to values outside of the new function.
1506 static void fixupDebugInfoPostExtraction(Function
&OldFunc
, Function
&NewFunc
,
1507 CallInst
&TheCall
) {
1508 DISubprogram
*OldSP
= OldFunc
.getSubprogram();
1509 LLVMContext
&Ctx
= OldFunc
.getContext();
1512 // Erase any debug info the new function contains.
1513 stripDebugInfo(NewFunc
);
1514 // Make sure the old function doesn't contain any non-local metadata refs.
1515 eraseDebugIntrinsicsWithNonLocalRefs(NewFunc
);
1519 // Create a subprogram for the new function. Leave out a description of the
1520 // function arguments, as the parameters don't correspond to anything at the
1522 assert(OldSP
->getUnit() && "Missing compile unit for subprogram");
1523 DIBuilder
DIB(*OldFunc
.getParent(), /*AllowUnresolved=*/false,
1526 DIB
.createSubroutineType(DIB
.getOrCreateTypeArray(std::nullopt
));
1527 DISubprogram::DISPFlags SPFlags
= DISubprogram::SPFlagDefinition
|
1528 DISubprogram::SPFlagOptimized
|
1529 DISubprogram::SPFlagLocalToUnit
;
1530 auto NewSP
= DIB
.createFunction(
1531 OldSP
->getUnit(), NewFunc
.getName(), NewFunc
.getName(), OldSP
->getFile(),
1532 /*LineNo=*/0, SPType
, /*ScopeLine=*/0, DINode::FlagZero
, SPFlags
);
1533 NewFunc
.setSubprogram(NewSP
);
1535 // Debug intrinsics in the new function need to be updated in one of two
1537 // 1) They need to be deleted, because they describe a value in the old
1539 // 2) They need to point to fresh metadata, e.g. because they currently
1540 // point to a variable in the wrong scope.
1541 SmallDenseMap
<DINode
*, DINode
*> RemappedMetadata
;
1542 SmallVector
<Instruction
*, 4> DebugIntrinsicsToDelete
;
1543 DenseMap
<const MDNode
*, MDNode
*> Cache
;
1544 for (Instruction
&I
: instructions(NewFunc
)) {
1545 auto *DII
= dyn_cast
<DbgInfoIntrinsic
>(&I
);
1549 // Point the intrinsic to a fresh label within the new function if the
1550 // intrinsic was not inlined from some other function.
1551 if (auto *DLI
= dyn_cast
<DbgLabelInst
>(&I
)) {
1552 if (DLI
->getDebugLoc().getInlinedAt())
1554 DILabel
*OldLabel
= DLI
->getLabel();
1555 DINode
*&NewLabel
= RemappedMetadata
[OldLabel
];
1557 DILocalScope
*NewScope
= DILocalScope::cloneScopeForSubprogram(
1558 *OldLabel
->getScope(), *NewSP
, Ctx
, Cache
);
1559 NewLabel
= DILabel::get(Ctx
, NewScope
, OldLabel
->getName(),
1560 OldLabel
->getFile(), OldLabel
->getLine());
1562 DLI
->setArgOperand(0, MetadataAsValue::get(Ctx
, NewLabel
));
1566 auto IsInvalidLocation
= [&NewFunc
](Value
*Location
) {
1567 // Location is invalid if it isn't a constant or an instruction, or is an
1568 // instruction but isn't in the new function.
1570 (!isa
<Constant
>(Location
) && !isa
<Instruction
>(Location
)))
1572 Instruction
*LocationInst
= dyn_cast
<Instruction
>(Location
);
1573 return LocationInst
&& LocationInst
->getFunction() != &NewFunc
;
1576 auto *DVI
= cast
<DbgVariableIntrinsic
>(DII
);
1577 // If any of the used locations are invalid, delete the intrinsic.
1578 if (any_of(DVI
->location_ops(), IsInvalidLocation
)) {
1579 DebugIntrinsicsToDelete
.push_back(DVI
);
1582 // If the variable was in the scope of the old function, i.e. it was not
1583 // inlined, point the intrinsic to a fresh variable within the new function.
1584 if (!DVI
->getDebugLoc().getInlinedAt()) {
1585 DILocalVariable
*OldVar
= DVI
->getVariable();
1586 DINode
*&NewVar
= RemappedMetadata
[OldVar
];
1588 DILocalScope
*NewScope
= DILocalScope::cloneScopeForSubprogram(
1589 *OldVar
->getScope(), *NewSP
, Ctx
, Cache
);
1590 NewVar
= DIB
.createAutoVariable(
1591 NewScope
, OldVar
->getName(), OldVar
->getFile(), OldVar
->getLine(),
1592 OldVar
->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero
,
1593 OldVar
->getAlignInBits());
1595 DVI
->setVariable(cast
<DILocalVariable
>(NewVar
));
1599 for (auto *DII
: DebugIntrinsicsToDelete
)
1600 DII
->eraseFromParent();
1601 DIB
.finalizeSubprogram(NewSP
);
1603 // Fix up the scope information attached to the line locations in the new
1605 for (Instruction
&I
: instructions(NewFunc
)) {
1606 if (const DebugLoc
&DL
= I
.getDebugLoc())
1608 DebugLoc::replaceInlinedAtSubprogram(DL
, *NewSP
, Ctx
, Cache
));
1610 // Loop info metadata may contain line locations. Fix them up.
1611 auto updateLoopInfoLoc
= [&Ctx
, &Cache
, NewSP
](Metadata
*MD
) -> Metadata
* {
1612 if (auto *Loc
= dyn_cast_or_null
<DILocation
>(MD
))
1613 return DebugLoc::replaceInlinedAtSubprogram(Loc
, *NewSP
, Ctx
, Cache
);
1616 updateLoopMetadataDebugLocations(I
, updateLoopInfoLoc
);
1618 if (!TheCall
.getDebugLoc())
1619 TheCall
.setDebugLoc(DILocation::get(Ctx
, 0, 0, OldSP
));
1621 eraseDebugIntrinsicsWithNonLocalRefs(NewFunc
);
1625 CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache
&CEAC
) {
1626 ValueSet Inputs
, Outputs
;
1627 return extractCodeRegion(CEAC
, Inputs
, Outputs
);
1631 CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache
&CEAC
,
1632 ValueSet
&inputs
, ValueSet
&outputs
) {
1636 // Assumption: this is a single-entry code region, and the header is the first
1637 // block in the region.
1638 BasicBlock
*header
= *Blocks
.begin();
1639 Function
*oldFunction
= header
->getParent();
1641 // Calculate the entry frequency of the new function before we change the root
1643 BlockFrequency EntryFreq
;
1645 assert(BPI
&& "Both BPI and BFI are required to preserve profile info");
1646 for (BasicBlock
*Pred
: predecessors(header
)) {
1647 if (Blocks
.count(Pred
))
1650 BFI
->getBlockFreq(Pred
) * BPI
->getEdgeProbability(Pred
, header
);
1654 // Remove @llvm.assume calls that will be moved to the new function from the
1655 // old function's assumption cache.
1656 for (BasicBlock
*Block
: Blocks
) {
1657 for (Instruction
&I
: llvm::make_early_inc_range(*Block
)) {
1658 if (auto *AI
= dyn_cast
<AssumeInst
>(&I
)) {
1660 AC
->unregisterAssumption(AI
);
1661 AI
->eraseFromParent();
1666 // If we have any return instructions in the region, split those blocks so
1667 // that the return is not in the region.
1668 splitReturnBlocks();
1670 // Calculate the exit blocks for the extracted region and the total exit
1671 // weights for each of those blocks.
1672 DenseMap
<BasicBlock
*, BlockFrequency
> ExitWeights
;
1673 SmallPtrSet
<BasicBlock
*, 1> ExitBlocks
;
1674 for (BasicBlock
*Block
: Blocks
) {
1675 for (BasicBlock
*Succ
: successors(Block
)) {
1676 if (!Blocks
.count(Succ
)) {
1677 // Update the branch weight for this successor.
1679 BlockFrequency
&BF
= ExitWeights
[Succ
];
1680 BF
+= BFI
->getBlockFreq(Block
) * BPI
->getEdgeProbability(Block
, Succ
);
1682 ExitBlocks
.insert(Succ
);
1686 NumExitBlocks
= ExitBlocks
.size();
1688 for (BasicBlock
*Block
: Blocks
) {
1689 Instruction
*TI
= Block
->getTerminator();
1690 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
!= e
; ++i
) {
1691 if (Blocks
.count(TI
->getSuccessor(i
)))
1693 BasicBlock
*OldTarget
= TI
->getSuccessor(i
);
1694 OldTargets
.push_back(OldTarget
);
1698 // If we have to split PHI nodes of the entry or exit blocks, do so now.
1699 severSplitPHINodesOfEntry(header
);
1700 severSplitPHINodesOfExits(ExitBlocks
);
1702 // This takes place of the original loop
1703 BasicBlock
*codeReplacer
= BasicBlock::Create(header
->getContext(),
1704 "codeRepl", oldFunction
,
1707 // The new function needs a root node because other nodes can branch to the
1708 // head of the region, but the entry node of a function cannot have preds.
1709 BasicBlock
*newFuncRoot
= BasicBlock::Create(header
->getContext(),
1711 auto *BranchI
= BranchInst::Create(header
);
1712 // If the original function has debug info, we have to add a debug location
1713 // to the new branch instruction from the artificial entry block.
1714 // We use the debug location of the first instruction in the extracted
1715 // blocks, as there is no other equivalent line in the source code.
1716 if (oldFunction
->getSubprogram()) {
1717 any_of(Blocks
, [&BranchI
](const BasicBlock
*BB
) {
1718 return any_of(*BB
, [&BranchI
](const Instruction
&I
) {
1719 if (!I
.getDebugLoc())
1721 BranchI
->setDebugLoc(I
.getDebugLoc());
1726 BranchI
->insertInto(newFuncRoot
, newFuncRoot
->end());
1728 ValueSet SinkingCands
, HoistingCands
;
1729 BasicBlock
*CommonExit
= nullptr;
1730 findAllocas(CEAC
, SinkingCands
, HoistingCands
, CommonExit
);
1731 assert(HoistingCands
.empty() || CommonExit
);
1733 // Find inputs to, outputs from the code region.
1734 findInputsOutputs(inputs
, outputs
, SinkingCands
);
1736 // Now sink all instructions which only have non-phi uses inside the region.
1737 // Group the allocas at the start of the block, so that any bitcast uses of
1738 // the allocas are well-defined.
1739 AllocaInst
*FirstSunkAlloca
= nullptr;
1740 for (auto *II
: SinkingCands
) {
1741 if (auto *AI
= dyn_cast
<AllocaInst
>(II
)) {
1742 AI
->moveBefore(*newFuncRoot
, newFuncRoot
->getFirstInsertionPt());
1743 if (!FirstSunkAlloca
)
1744 FirstSunkAlloca
= AI
;
1747 assert((SinkingCands
.empty() || FirstSunkAlloca
) &&
1748 "Did not expect a sink candidate without any allocas");
1749 for (auto *II
: SinkingCands
) {
1750 if (!isa
<AllocaInst
>(II
)) {
1751 cast
<Instruction
>(II
)->moveAfter(FirstSunkAlloca
);
1755 if (!HoistingCands
.empty()) {
1756 auto *HoistToBlock
= findOrCreateBlockForHoisting(CommonExit
);
1757 Instruction
*TI
= HoistToBlock
->getTerminator();
1758 for (auto *II
: HoistingCands
)
1759 cast
<Instruction
>(II
)->moveBefore(TI
);
1762 // Collect objects which are inputs to the extraction region and also
1763 // referenced by lifetime start markers within it. The effects of these
1764 // markers must be replicated in the calling function to prevent the stack
1765 // coloring pass from merging slots which store input objects.
1766 ValueSet LifetimesStart
;
1767 eraseLifetimeMarkersOnInputs(Blocks
, SinkingCands
, LifetimesStart
);
1769 // Construct new function based on inputs/outputs & add allocas for all defs.
1770 Function
*newFunction
=
1771 constructFunction(inputs
, outputs
, header
, newFuncRoot
, codeReplacer
,
1772 oldFunction
, oldFunction
->getParent());
1774 // Update the entry count of the function.
1776 auto Count
= BFI
->getProfileCountFromFreq(EntryFreq
.getFrequency());
1778 newFunction
->setEntryCount(
1779 ProfileCount(*Count
, Function::PCT_Real
)); // FIXME
1780 BFI
->setBlockFreq(codeReplacer
, EntryFreq
.getFrequency());
1784 emitCallAndSwitchStatement(newFunction
, codeReplacer
, inputs
, outputs
);
1786 moveCodeToFunction(newFunction
);
1788 // Replicate the effects of any lifetime start/end markers which referenced
1789 // input objects in the extraction region by placing markers around the call.
1790 insertLifetimeMarkersSurroundingCall(
1791 oldFunction
->getParent(), LifetimesStart
.getArrayRef(), {}, TheCall
);
1793 // Propagate personality info to the new function if there is one.
1794 if (oldFunction
->hasPersonalityFn())
1795 newFunction
->setPersonalityFn(oldFunction
->getPersonalityFn());
1797 // Update the branch weights for the exit block.
1798 if (BFI
&& NumExitBlocks
> 1)
1799 calculateNewCallTerminatorWeights(codeReplacer
, ExitWeights
, BPI
);
1801 // Loop over all of the PHI nodes in the header and exit blocks, and change
1802 // any references to the old incoming edge to be the new incoming edge.
1803 for (BasicBlock::iterator I
= header
->begin(); isa
<PHINode
>(I
); ++I
) {
1804 PHINode
*PN
= cast
<PHINode
>(I
);
1805 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1806 if (!Blocks
.count(PN
->getIncomingBlock(i
)))
1807 PN
->setIncomingBlock(i
, newFuncRoot
);
1810 for (BasicBlock
*ExitBB
: ExitBlocks
)
1811 for (PHINode
&PN
: ExitBB
->phis()) {
1812 Value
*IncomingCodeReplacerVal
= nullptr;
1813 for (unsigned i
= 0, e
= PN
.getNumIncomingValues(); i
!= e
; ++i
) {
1814 // Ignore incoming values from outside of the extracted region.
1815 if (!Blocks
.count(PN
.getIncomingBlock(i
)))
1818 // Ensure that there is only one incoming value from codeReplacer.
1819 if (!IncomingCodeReplacerVal
) {
1820 PN
.setIncomingBlock(i
, codeReplacer
);
1821 IncomingCodeReplacerVal
= PN
.getIncomingValue(i
);
1823 assert(IncomingCodeReplacerVal
== PN
.getIncomingValue(i
) &&
1824 "PHI has two incompatbile incoming values from codeRepl");
1828 fixupDebugInfoPostExtraction(*oldFunction
, *newFunction
, *TheCall
);
1830 // Mark the new function `noreturn` if applicable. Terminators which resume
1831 // exception propagation are treated as returning instructions. This is to
1832 // avoid inserting traps after calls to outlined functions which unwind.
1833 bool doesNotReturn
= none_of(*newFunction
, [](const BasicBlock
&BB
) {
1834 const Instruction
*Term
= BB
.getTerminator();
1835 return isa
<ReturnInst
>(Term
) || isa
<ResumeInst
>(Term
);
1838 newFunction
->setDoesNotReturn();
1840 LLVM_DEBUG(if (verifyFunction(*newFunction
, &errs())) {
1841 newFunction
->dump();
1842 report_fatal_error("verification of newFunction failed!");
1844 LLVM_DEBUG(if (verifyFunction(*oldFunction
))
1845 report_fatal_error("verification of oldFunction failed!"));
1846 LLVM_DEBUG(if (AC
&& verifyAssumptionCache(*oldFunction
, *newFunction
, AC
))
1847 report_fatal_error("Stale Asumption cache for old Function!"));
1851 bool CodeExtractor::verifyAssumptionCache(const Function
&OldFunc
,
1852 const Function
&NewFunc
,
1853 AssumptionCache
*AC
) {
1854 for (auto AssumeVH
: AC
->assumptions()) {
1855 auto *I
= dyn_cast_or_null
<CallInst
>(AssumeVH
);
1859 // There shouldn't be any llvm.assume intrinsics in the new function.
1860 if (I
->getFunction() != &OldFunc
)
1863 // There shouldn't be any stale affected values in the assumption cache
1864 // that were previously in the old function, but that have now been moved
1865 // to the new function.
1866 for (auto AffectedValVH
: AC
->assumptionsFor(I
->getOperand(0))) {
1867 auto *AffectedCI
= dyn_cast_or_null
<CallInst
>(AffectedValVH
);
1870 if (AffectedCI
->getFunction() != &OldFunc
)
1872 auto *AssumedInst
= cast
<Instruction
>(AffectedCI
->getOperand(0));
1873 if (AssumedInst
->getFunction() != &OldFunc
)
1880 void CodeExtractor::excludeArgFromAggregate(Value
*Arg
) {
1881 ExcludeArgsFromAggregate
.insert(Arg
);