1 //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interface to tear out a code region, such as an
10 // individual loop or a parallel section, into a new function, replacing it with
11 // a call to the new function.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/CodeExtractor.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/IR/Argument.h"
28 #include "llvm/IR/Attributes.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/CFG.h"
31 #include "llvm/IR/Constant.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DIBuilder.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DebugInfo.h"
36 #include "llvm/IR/DebugInfoMetadata.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/InstIterator.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/MDBuilder.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/PatternMatch.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/User.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/Verifier.h"
55 #include "llvm/Support/BlockFrequency.h"
56 #include "llvm/Support/BranchProbability.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/raw_ostream.h"
62 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
71 using namespace llvm::PatternMatch
;
72 using ProfileCount
= Function::ProfileCount
;
74 #define DEBUG_TYPE "code-extractor"
76 // Provide a command-line option to aggregate function arguments into a struct
77 // for functions produced by the code extractor. This is useful when converting
78 // extracted functions to pthread-based code, as only one argument (void*) can
79 // be passed in to pthread_create().
81 AggregateArgsOpt("aggregate-extracted-args", cl::Hidden
,
82 cl::desc("Aggregate arguments to code-extracted functions"));
84 /// Test whether a block is valid for extraction.
85 static bool isBlockValidForExtraction(const BasicBlock
&BB
,
86 const SetVector
<BasicBlock
*> &Result
,
87 bool AllowVarArgs
, bool AllowAlloca
) {
88 // taking the address of a basic block moved to another function is illegal
89 if (BB
.hasAddressTaken())
92 // don't hoist code that uses another basicblock address, as it's likely to
93 // lead to unexpected behavior, like cross-function jumps
94 SmallPtrSet
<User
const *, 16> Visited
;
95 SmallVector
<User
const *, 16> ToVisit
;
97 for (Instruction
const &Inst
: BB
)
98 ToVisit
.push_back(&Inst
);
100 while (!ToVisit
.empty()) {
101 User
const *Curr
= ToVisit
.pop_back_val();
102 if (!Visited
.insert(Curr
).second
)
104 if (isa
<BlockAddress
const>(Curr
))
105 return false; // even a reference to self is likely to be not compatible
107 if (isa
<Instruction
>(Curr
) && cast
<Instruction
>(Curr
)->getParent() != &BB
)
110 for (auto const &U
: Curr
->operands()) {
111 if (auto *UU
= dyn_cast
<User
>(U
))
112 ToVisit
.push_back(UU
);
116 // If explicitly requested, allow vastart and alloca. For invoke instructions
117 // verify that extraction is valid.
118 for (BasicBlock::const_iterator I
= BB
.begin(), E
= BB
.end(); I
!= E
; ++I
) {
119 if (isa
<AllocaInst
>(I
)) {
125 if (const auto *II
= dyn_cast
<InvokeInst
>(I
)) {
126 // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
127 // must be a part of the subgraph which is being extracted.
128 if (auto *UBB
= II
->getUnwindDest())
129 if (!Result
.count(UBB
))
134 // All catch handlers of a catchswitch instruction as well as the unwind
135 // destination must be in the subgraph.
136 if (const auto *CSI
= dyn_cast
<CatchSwitchInst
>(I
)) {
137 if (auto *UBB
= CSI
->getUnwindDest())
138 if (!Result
.count(UBB
))
140 for (const auto *HBB
: CSI
->handlers())
141 if (!Result
.count(const_cast<BasicBlock
*>(HBB
)))
146 // Make sure that entire catch handler is within subgraph. It is sufficient
147 // to check that catch return's block is in the list.
148 if (const auto *CPI
= dyn_cast
<CatchPadInst
>(I
)) {
149 for (const auto *U
: CPI
->users())
150 if (const auto *CRI
= dyn_cast
<CatchReturnInst
>(U
))
151 if (!Result
.count(const_cast<BasicBlock
*>(CRI
->getParent())))
156 // And do similar checks for cleanup handler - the entire handler must be
157 // in subgraph which is going to be extracted. For cleanup return should
158 // additionally check that the unwind destination is also in the subgraph.
159 if (const auto *CPI
= dyn_cast
<CleanupPadInst
>(I
)) {
160 for (const auto *U
: CPI
->users())
161 if (const auto *CRI
= dyn_cast
<CleanupReturnInst
>(U
))
162 if (!Result
.count(const_cast<BasicBlock
*>(CRI
->getParent())))
166 if (const auto *CRI
= dyn_cast
<CleanupReturnInst
>(I
)) {
167 if (auto *UBB
= CRI
->getUnwindDest())
168 if (!Result
.count(UBB
))
173 if (const CallInst
*CI
= dyn_cast
<CallInst
>(I
)) {
174 if (const Function
*F
= CI
->getCalledFunction()) {
175 auto IID
= F
->getIntrinsicID();
176 if (IID
== Intrinsic::vastart
) {
183 // Currently, we miscompile outlined copies of eh_typid_for. There are
184 // proposals for fixing this in llvm.org/PR39545.
185 if (IID
== Intrinsic::eh_typeid_for
)
194 /// Build a set of blocks to extract if the input blocks are viable.
195 static SetVector
<BasicBlock
*>
196 buildExtractionBlockSet(ArrayRef
<BasicBlock
*> BBs
, DominatorTree
*DT
,
197 bool AllowVarArgs
, bool AllowAlloca
) {
198 assert(!BBs
.empty() && "The set of blocks to extract must be non-empty");
199 SetVector
<BasicBlock
*> Result
;
201 // Loop over the blocks, adding them to our set-vector, and aborting with an
202 // empty set if we encounter invalid blocks.
203 for (BasicBlock
*BB
: BBs
) {
204 // If this block is dead, don't process it.
205 if (DT
&& !DT
->isReachableFromEntry(BB
))
208 if (!Result
.insert(BB
))
209 llvm_unreachable("Repeated basic blocks in extraction input");
212 LLVM_DEBUG(dbgs() << "Region front block: " << Result
.front()->getName()
215 for (auto *BB
: Result
) {
216 if (!isBlockValidForExtraction(*BB
, Result
, AllowVarArgs
, AllowAlloca
))
219 // Make sure that the first block is not a landing pad.
220 if (BB
== Result
.front()) {
222 LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
228 // All blocks other than the first must not have predecessors outside of
229 // the subgraph which is being extracted.
230 for (auto *PBB
: predecessors(BB
))
231 if (!Result
.count(PBB
)) {
232 LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
233 "outside the region except for the first block!\n"
234 << "Problematic source BB: " << BB
->getName() << "\n"
235 << "Problematic destination BB: " << PBB
->getName()
244 CodeExtractor::CodeExtractor(ArrayRef
<BasicBlock
*> BBs
, DominatorTree
*DT
,
245 bool AggregateArgs
, BlockFrequencyInfo
*BFI
,
246 BranchProbabilityInfo
*BPI
, AssumptionCache
*AC
,
247 bool AllowVarArgs
, bool AllowAlloca
,
248 BasicBlock
*AllocationBlock
, std::string Suffix
,
249 bool ArgsInZeroAddressSpace
)
250 : DT(DT
), AggregateArgs(AggregateArgs
|| AggregateArgsOpt
), BFI(BFI
),
251 BPI(BPI
), AC(AC
), AllocationBlock(AllocationBlock
),
252 AllowVarArgs(AllowVarArgs
),
253 Blocks(buildExtractionBlockSet(BBs
, DT
, AllowVarArgs
, AllowAlloca
)),
254 Suffix(Suffix
), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace
) {}
256 CodeExtractor::CodeExtractor(DominatorTree
&DT
, Loop
&L
, bool AggregateArgs
,
257 BlockFrequencyInfo
*BFI
,
258 BranchProbabilityInfo
*BPI
, AssumptionCache
*AC
,
260 : DT(&DT
), AggregateArgs(AggregateArgs
|| AggregateArgsOpt
), BFI(BFI
),
261 BPI(BPI
), AC(AC
), AllocationBlock(nullptr), AllowVarArgs(false),
262 Blocks(buildExtractionBlockSet(L
.getBlocks(), &DT
,
263 /* AllowVarArgs */ false,
264 /* AllowAlloca */ false)),
267 /// definedInRegion - Return true if the specified value is defined in the
268 /// extracted region.
269 static bool definedInRegion(const SetVector
<BasicBlock
*> &Blocks
, Value
*V
) {
270 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
271 if (Blocks
.count(I
->getParent()))
276 /// definedInCaller - Return true if the specified value is defined in the
277 /// function being code extracted, but not in the region being extracted.
278 /// These values must be passed in as live-ins to the function.
279 static bool definedInCaller(const SetVector
<BasicBlock
*> &Blocks
, Value
*V
) {
280 if (isa
<Argument
>(V
)) return true;
281 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
282 if (!Blocks
.count(I
->getParent()))
287 static BasicBlock
*getCommonExitBlock(const SetVector
<BasicBlock
*> &Blocks
) {
288 BasicBlock
*CommonExitBlock
= nullptr;
289 auto hasNonCommonExitSucc
= [&](BasicBlock
*Block
) {
290 for (auto *Succ
: successors(Block
)) {
291 // Internal edges, ok.
292 if (Blocks
.count(Succ
))
294 if (!CommonExitBlock
) {
295 CommonExitBlock
= Succ
;
298 if (CommonExitBlock
!= Succ
)
304 if (any_of(Blocks
, hasNonCommonExitSucc
))
307 return CommonExitBlock
;
310 CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function
&F
) {
311 for (BasicBlock
&BB
: F
) {
312 for (Instruction
&II
: BB
.instructionsWithoutDebug())
313 if (auto *AI
= dyn_cast
<AllocaInst
>(&II
))
314 Allocas
.push_back(AI
);
316 findSideEffectInfoForBlock(BB
);
320 void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock
&BB
) {
321 for (Instruction
&II
: BB
.instructionsWithoutDebug()) {
322 unsigned Opcode
= II
.getOpcode();
323 Value
*MemAddr
= nullptr;
325 case Instruction::Store
:
326 case Instruction::Load
: {
327 if (Opcode
== Instruction::Store
) {
328 StoreInst
*SI
= cast
<StoreInst
>(&II
);
329 MemAddr
= SI
->getPointerOperand();
331 LoadInst
*LI
= cast
<LoadInst
>(&II
);
332 MemAddr
= LI
->getPointerOperand();
334 // Global variable can not be aliased with locals.
335 if (isa
<Constant
>(MemAddr
))
337 Value
*Base
= MemAddr
->stripInBoundsConstantOffsets();
338 if (!isa
<AllocaInst
>(Base
)) {
339 SideEffectingBlocks
.insert(&BB
);
342 BaseMemAddrs
[&BB
].insert(Base
);
346 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(&II
);
348 if (IntrInst
->isLifetimeStartOrEnd())
350 SideEffectingBlocks
.insert(&BB
);
353 // Treat all the other cases conservatively if it has side effects.
354 if (II
.mayHaveSideEffects()) {
355 SideEffectingBlocks
.insert(&BB
);
363 bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
364 BasicBlock
&BB
, AllocaInst
*Addr
) const {
365 if (SideEffectingBlocks
.count(&BB
))
367 auto It
= BaseMemAddrs
.find(&BB
);
368 if (It
!= BaseMemAddrs
.end())
369 return It
->second
.count(Addr
);
373 bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
374 const CodeExtractorAnalysisCache
&CEAC
, Instruction
*Addr
) const {
375 AllocaInst
*AI
= cast
<AllocaInst
>(Addr
->stripInBoundsConstantOffsets());
376 Function
*Func
= (*Blocks
.begin())->getParent();
377 for (BasicBlock
&BB
: *Func
) {
378 if (Blocks
.count(&BB
))
380 if (CEAC
.doesBlockContainClobberOfAddr(BB
, AI
))
387 CodeExtractor::findOrCreateBlockForHoisting(BasicBlock
*CommonExitBlock
) {
388 BasicBlock
*SinglePredFromOutlineRegion
= nullptr;
389 assert(!Blocks
.count(CommonExitBlock
) &&
390 "Expect a block outside the region!");
391 for (auto *Pred
: predecessors(CommonExitBlock
)) {
392 if (!Blocks
.count(Pred
))
394 if (!SinglePredFromOutlineRegion
) {
395 SinglePredFromOutlineRegion
= Pred
;
396 } else if (SinglePredFromOutlineRegion
!= Pred
) {
397 SinglePredFromOutlineRegion
= nullptr;
402 if (SinglePredFromOutlineRegion
)
403 return SinglePredFromOutlineRegion
;
406 auto getFirstPHI
= [](BasicBlock
*BB
) {
407 BasicBlock::iterator I
= BB
->begin();
408 PHINode
*FirstPhi
= nullptr;
409 while (I
!= BB
->end()) {
410 PHINode
*Phi
= dyn_cast
<PHINode
>(I
);
420 // If there are any phi nodes, the single pred either exists or has already
421 // be created before code extraction.
422 assert(!getFirstPHI(CommonExitBlock
) && "Phi not expected");
425 BasicBlock
*NewExitBlock
= CommonExitBlock
->splitBasicBlock(
426 CommonExitBlock
->getFirstNonPHI()->getIterator());
428 for (BasicBlock
*Pred
:
429 llvm::make_early_inc_range(predecessors(CommonExitBlock
))) {
430 if (Blocks
.count(Pred
))
432 Pred
->getTerminator()->replaceUsesOfWith(CommonExitBlock
, NewExitBlock
);
434 // Now add the old exit block to the outline region.
435 Blocks
.insert(CommonExitBlock
);
436 OldTargets
.push_back(NewExitBlock
);
437 return CommonExitBlock
;
440 // Find the pair of life time markers for address 'Addr' that are either
441 // defined inside the outline region or can legally be shrinkwrapped into the
442 // outline region. If there are not other untracked uses of the address, return
443 // the pair of markers if found; otherwise return a pair of nullptr.
444 CodeExtractor::LifetimeMarkerInfo
445 CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache
&CEAC
,
447 BasicBlock
*ExitBlock
) const {
448 LifetimeMarkerInfo Info
;
450 for (User
*U
: Addr
->users()) {
451 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(U
);
453 // We don't model addresses with multiple start/end markers, but the
454 // markers do not need to be in the region.
455 if (IntrInst
->getIntrinsicID() == Intrinsic::lifetime_start
) {
458 Info
.LifeStart
= IntrInst
;
461 if (IntrInst
->getIntrinsicID() == Intrinsic::lifetime_end
) {
464 Info
.LifeEnd
= IntrInst
;
467 // At this point, permit debug uses outside of the region.
468 // This is fixed in a later call to fixupDebugInfoPostExtraction().
469 if (isa
<DbgInfoIntrinsic
>(IntrInst
))
472 // Find untracked uses of the address, bail.
473 if (!definedInRegion(Blocks
, U
))
477 if (!Info
.LifeStart
|| !Info
.LifeEnd
)
480 Info
.SinkLifeStart
= !definedInRegion(Blocks
, Info
.LifeStart
);
481 Info
.HoistLifeEnd
= !definedInRegion(Blocks
, Info
.LifeEnd
);
482 // Do legality check.
483 if ((Info
.SinkLifeStart
|| Info
.HoistLifeEnd
) &&
484 !isLegalToShrinkwrapLifetimeMarkers(CEAC
, Addr
))
487 // Check to see if we have a place to do hoisting, if not, bail.
488 if (Info
.HoistLifeEnd
&& !ExitBlock
)
494 void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache
&CEAC
,
495 ValueSet
&SinkCands
, ValueSet
&HoistCands
,
496 BasicBlock
*&ExitBlock
) const {
497 Function
*Func
= (*Blocks
.begin())->getParent();
498 ExitBlock
= getCommonExitBlock(Blocks
);
500 auto moveOrIgnoreLifetimeMarkers
=
501 [&](const LifetimeMarkerInfo
&LMI
) -> bool {
504 if (LMI
.SinkLifeStart
) {
505 LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI
.LifeStart
507 SinkCands
.insert(LMI
.LifeStart
);
509 if (LMI
.HoistLifeEnd
) {
510 LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI
.LifeEnd
<< "\n");
511 HoistCands
.insert(LMI
.LifeEnd
);
516 // Look up allocas in the original function in CodeExtractorAnalysisCache, as
517 // this is much faster than walking all the instructions.
518 for (AllocaInst
*AI
: CEAC
.getAllocas()) {
519 BasicBlock
*BB
= AI
->getParent();
520 if (Blocks
.count(BB
))
523 // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
524 // check whether it is actually still in the original function.
525 Function
*AIFunc
= BB
->getParent();
529 LifetimeMarkerInfo MarkerInfo
= getLifetimeMarkers(CEAC
, AI
, ExitBlock
);
530 bool Moved
= moveOrIgnoreLifetimeMarkers(MarkerInfo
);
532 LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI
<< "\n");
533 SinkCands
.insert(AI
);
537 // Find bitcasts in the outlined region that have lifetime marker users
538 // outside that region. Replace the lifetime marker use with an
539 // outside region bitcast to avoid unnecessary alloca/reload instructions
540 // and extra lifetime markers.
541 SmallVector
<Instruction
*, 2> LifetimeBitcastUsers
;
542 for (User
*U
: AI
->users()) {
543 if (!definedInRegion(Blocks
, U
))
546 if (U
->stripInBoundsConstantOffsets() != AI
)
549 Instruction
*Bitcast
= cast
<Instruction
>(U
);
550 for (User
*BU
: Bitcast
->users()) {
551 IntrinsicInst
*IntrInst
= dyn_cast
<IntrinsicInst
>(BU
);
555 if (!IntrInst
->isLifetimeStartOrEnd())
558 if (definedInRegion(Blocks
, IntrInst
))
561 LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
562 << *Bitcast
<< " in out-of-region lifetime marker "
563 << *IntrInst
<< "\n");
564 LifetimeBitcastUsers
.push_back(IntrInst
);
568 for (Instruction
*I
: LifetimeBitcastUsers
) {
569 Module
*M
= AIFunc
->getParent();
570 LLVMContext
&Ctx
= M
->getContext();
571 auto *Int8PtrTy
= PointerType::getUnqual(Ctx
);
573 CastInst::CreatePointerCast(AI
, Int8PtrTy
, "lt.cast", I
);
574 I
->replaceUsesOfWith(I
->getOperand(1), CastI
);
577 // Follow any bitcasts.
578 SmallVector
<Instruction
*, 2> Bitcasts
;
579 SmallVector
<LifetimeMarkerInfo
, 2> BitcastLifetimeInfo
;
580 for (User
*U
: AI
->users()) {
581 if (U
->stripInBoundsConstantOffsets() == AI
) {
582 Instruction
*Bitcast
= cast
<Instruction
>(U
);
583 LifetimeMarkerInfo LMI
= getLifetimeMarkers(CEAC
, Bitcast
, ExitBlock
);
585 Bitcasts
.push_back(Bitcast
);
586 BitcastLifetimeInfo
.push_back(LMI
);
591 // Found unknown use of AI.
592 if (!definedInRegion(Blocks
, U
)) {
598 // Either no bitcasts reference the alloca or there are unknown uses.
599 if (Bitcasts
.empty())
602 LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI
<< "\n");
603 SinkCands
.insert(AI
);
604 for (unsigned I
= 0, E
= Bitcasts
.size(); I
!= E
; ++I
) {
605 Instruction
*BitcastAddr
= Bitcasts
[I
];
606 const LifetimeMarkerInfo
&LMI
= BitcastLifetimeInfo
[I
];
607 assert(LMI
.LifeStart
&&
608 "Unsafe to sink bitcast without lifetime markers");
609 moveOrIgnoreLifetimeMarkers(LMI
);
610 if (!definedInRegion(Blocks
, BitcastAddr
)) {
611 LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
613 SinkCands
.insert(BitcastAddr
);
619 bool CodeExtractor::isEligible() const {
622 BasicBlock
*Header
= *Blocks
.begin();
623 Function
*F
= Header
->getParent();
625 // For functions with varargs, check that varargs handling is only done in the
626 // outlined function, i.e vastart and vaend are only used in outlined blocks.
627 if (AllowVarArgs
&& F
->getFunctionType()->isVarArg()) {
628 auto containsVarArgIntrinsic
= [](const Instruction
&I
) {
629 if (const CallInst
*CI
= dyn_cast
<CallInst
>(&I
))
630 if (const Function
*Callee
= CI
->getCalledFunction())
631 return Callee
->getIntrinsicID() == Intrinsic::vastart
||
632 Callee
->getIntrinsicID() == Intrinsic::vaend
;
636 for (auto &BB
: *F
) {
637 if (Blocks
.count(&BB
))
639 if (llvm::any_of(BB
, containsVarArgIntrinsic
))
646 void CodeExtractor::findInputsOutputs(ValueSet
&Inputs
, ValueSet
&Outputs
,
647 const ValueSet
&SinkCands
) const {
648 for (BasicBlock
*BB
: Blocks
) {
649 // If a used value is defined outside the region, it's an input. If an
650 // instruction is used outside the region, it's an output.
651 for (Instruction
&II
: *BB
) {
652 for (auto &OI
: II
.operands()) {
654 if (!SinkCands
.count(V
) && definedInCaller(Blocks
, V
))
658 for (User
*U
: II
.users())
659 if (!definedInRegion(Blocks
, U
)) {
667 /// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
668 /// of the region, we need to split the entry block of the region so that the
669 /// PHI node is easier to deal with.
670 void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock
*&Header
) {
671 unsigned NumPredsFromRegion
= 0;
672 unsigned NumPredsOutsideRegion
= 0;
674 if (Header
!= &Header
->getParent()->getEntryBlock()) {
675 PHINode
*PN
= dyn_cast
<PHINode
>(Header
->begin());
676 if (!PN
) return; // No PHI nodes.
678 // If the header node contains any PHI nodes, check to see if there is more
679 // than one entry from outside the region. If so, we need to sever the
680 // header block into two.
681 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
682 if (Blocks
.count(PN
->getIncomingBlock(i
)))
683 ++NumPredsFromRegion
;
685 ++NumPredsOutsideRegion
;
687 // If there is one (or fewer) predecessor from outside the region, we don't
688 // need to do anything special.
689 if (NumPredsOutsideRegion
<= 1) return;
692 // Otherwise, we need to split the header block into two pieces: one
693 // containing PHI nodes merging values from outside of the region, and a
694 // second that contains all of the code for the block and merges back any
695 // incoming values from inside of the region.
696 BasicBlock
*NewBB
= SplitBlock(Header
, Header
->getFirstNonPHI(), DT
);
698 // We only want to code extract the second block now, and it becomes the new
699 // header of the region.
700 BasicBlock
*OldPred
= Header
;
701 Blocks
.remove(OldPred
);
702 Blocks
.insert(NewBB
);
705 // Okay, now we need to adjust the PHI nodes and any branches from within the
706 // region to go to the new header block instead of the old header block.
707 if (NumPredsFromRegion
) {
708 PHINode
*PN
= cast
<PHINode
>(OldPred
->begin());
709 // Loop over all of the predecessors of OldPred that are in the region,
710 // changing them to branch to NewBB instead.
711 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
712 if (Blocks
.count(PN
->getIncomingBlock(i
))) {
713 Instruction
*TI
= PN
->getIncomingBlock(i
)->getTerminator();
714 TI
->replaceUsesOfWith(OldPred
, NewBB
);
717 // Okay, everything within the region is now branching to the right block, we
718 // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
719 BasicBlock::iterator AfterPHIs
;
720 for (AfterPHIs
= OldPred
->begin(); isa
<PHINode
>(AfterPHIs
); ++AfterPHIs
) {
721 PHINode
*PN
= cast
<PHINode
>(AfterPHIs
);
722 // Create a new PHI node in the new region, which has an incoming value
723 // from OldPred of PN.
724 PHINode
*NewPN
= PHINode::Create(PN
->getType(), 1 + NumPredsFromRegion
,
725 PN
->getName() + ".ce");
726 NewPN
->insertBefore(NewBB
->begin());
727 PN
->replaceAllUsesWith(NewPN
);
728 NewPN
->addIncoming(PN
, OldPred
);
730 // Loop over all of the incoming value in PN, moving them to NewPN if they
731 // are from the extracted region.
732 for (unsigned i
= 0; i
!= PN
->getNumIncomingValues(); ++i
) {
733 if (Blocks
.count(PN
->getIncomingBlock(i
))) {
734 NewPN
->addIncoming(PN
->getIncomingValue(i
), PN
->getIncomingBlock(i
));
735 PN
->removeIncomingValue(i
);
743 /// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
744 /// outlined region, we split these PHIs on two: one with inputs from region
745 /// and other with remaining incoming blocks; then first PHIs are placed in
747 void CodeExtractor::severSplitPHINodesOfExits(
748 const SmallPtrSetImpl
<BasicBlock
*> &Exits
) {
749 for (BasicBlock
*ExitBB
: Exits
) {
750 BasicBlock
*NewBB
= nullptr;
752 for (PHINode
&PN
: ExitBB
->phis()) {
753 // Find all incoming values from the outlining region.
754 SmallVector
<unsigned, 2> IncomingVals
;
755 for (unsigned i
= 0; i
< PN
.getNumIncomingValues(); ++i
)
756 if (Blocks
.count(PN
.getIncomingBlock(i
)))
757 IncomingVals
.push_back(i
);
759 // Do not process PHI if there is one (or fewer) predecessor from region.
760 // If PHI has exactly one predecessor from region, only this one incoming
761 // will be replaced on codeRepl block, so it should be safe to skip PHI.
762 if (IncomingVals
.size() <= 1)
765 // Create block for new PHIs and add it to the list of outlined if it
766 // wasn't done before.
768 NewBB
= BasicBlock::Create(ExitBB
->getContext(),
769 ExitBB
->getName() + ".split",
770 ExitBB
->getParent(), ExitBB
);
771 NewBB
->IsNewDbgInfoFormat
= ExitBB
->IsNewDbgInfoFormat
;
772 SmallVector
<BasicBlock
*, 4> Preds(predecessors(ExitBB
));
773 for (BasicBlock
*PredBB
: Preds
)
774 if (Blocks
.count(PredBB
))
775 PredBB
->getTerminator()->replaceUsesOfWith(ExitBB
, NewBB
);
776 BranchInst::Create(ExitBB
, NewBB
);
777 Blocks
.insert(NewBB
);
781 PHINode
*NewPN
= PHINode::Create(PN
.getType(), IncomingVals
.size(),
782 PN
.getName() + ".ce");
783 NewPN
->insertBefore(NewBB
->getFirstNonPHIIt());
784 for (unsigned i
: IncomingVals
)
785 NewPN
->addIncoming(PN
.getIncomingValue(i
), PN
.getIncomingBlock(i
));
786 for (unsigned i
: reverse(IncomingVals
))
787 PN
.removeIncomingValue(i
, false);
788 PN
.addIncoming(NewPN
, NewBB
);
793 void CodeExtractor::splitReturnBlocks() {
794 for (BasicBlock
*Block
: Blocks
)
795 if (ReturnInst
*RI
= dyn_cast
<ReturnInst
>(Block
->getTerminator())) {
797 Block
->splitBasicBlock(RI
->getIterator(), Block
->getName() + ".ret");
799 // Old dominates New. New node dominates all other nodes dominated
801 DomTreeNode
*OldNode
= DT
->getNode(Block
);
802 SmallVector
<DomTreeNode
*, 8> Children(OldNode
->begin(),
805 DomTreeNode
*NewNode
= DT
->addNewBlock(New
, Block
);
807 for (DomTreeNode
*I
: Children
)
808 DT
->changeImmediateDominator(I
, NewNode
);
813 /// constructFunction - make a function based on inputs and outputs, as follows:
814 /// f(in0, ..., inN, out0, ..., outN)
815 Function
*CodeExtractor::constructFunction(const ValueSet
&inputs
,
816 const ValueSet
&outputs
,
818 BasicBlock
*newRootNode
,
819 BasicBlock
*newHeader
,
820 Function
*oldFunction
,
822 LLVM_DEBUG(dbgs() << "inputs: " << inputs
.size() << "\n");
823 LLVM_DEBUG(dbgs() << "outputs: " << outputs
.size() << "\n");
825 // This function returns unsigned, outputs will go back by reference.
826 switch (NumExitBlocks
) {
828 case 1: RetTy
= Type::getVoidTy(header
->getContext()); break;
829 case 2: RetTy
= Type::getInt1Ty(header
->getContext()); break;
830 default: RetTy
= Type::getInt16Ty(header
->getContext()); break;
833 std::vector
<Type
*> ParamTy
;
834 std::vector
<Type
*> AggParamTy
;
835 ValueSet StructValues
;
836 const DataLayout
&DL
= M
->getDataLayout();
838 // Add the types of the input values to the function's argument list
839 for (Value
*value
: inputs
) {
840 LLVM_DEBUG(dbgs() << "value used in func: " << *value
<< "\n");
841 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(value
)) {
842 AggParamTy
.push_back(value
->getType());
843 StructValues
.insert(value
);
845 ParamTy
.push_back(value
->getType());
848 // Add the types of the output values to the function's argument list.
849 for (Value
*output
: outputs
) {
850 LLVM_DEBUG(dbgs() << "instr used in func: " << *output
<< "\n");
851 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(output
)) {
852 AggParamTy
.push_back(output
->getType());
853 StructValues
.insert(output
);
856 PointerType::get(output
->getType(), DL
.getAllocaAddrSpace()));
860 (ParamTy
.size() + AggParamTy
.size()) ==
861 (inputs
.size() + outputs
.size()) &&
862 "Number of scalar and aggregate params does not match inputs, outputs");
863 assert((StructValues
.empty() || AggregateArgs
) &&
864 "Expeced StructValues only with AggregateArgs set");
866 // Concatenate scalar and aggregate params in ParamTy.
867 size_t NumScalarParams
= ParamTy
.size();
868 StructType
*StructTy
= nullptr;
869 if (AggregateArgs
&& !AggParamTy
.empty()) {
870 StructTy
= StructType::get(M
->getContext(), AggParamTy
);
871 ParamTy
.push_back(PointerType::get(
872 StructTy
, ArgsInZeroAddressSpace
? 0 : DL
.getAllocaAddrSpace()));
876 dbgs() << "Function type: " << *RetTy
<< " f(";
877 for (Type
*i
: ParamTy
)
878 dbgs() << *i
<< ", ";
882 FunctionType
*funcType
= FunctionType::get(
883 RetTy
, ParamTy
, AllowVarArgs
&& oldFunction
->isVarArg());
885 std::string SuffixToUse
=
887 ? (header
->getName().empty() ? "extracted" : header
->getName().str())
889 // Create the new function
890 Function
*newFunction
= Function::Create(
891 funcType
, GlobalValue::InternalLinkage
, oldFunction
->getAddressSpace(),
892 oldFunction
->getName() + "." + SuffixToUse
, M
);
893 newFunction
->IsNewDbgInfoFormat
= oldFunction
->IsNewDbgInfoFormat
;
895 // Inherit all of the target dependent attributes and white-listed
896 // target independent attributes.
897 // (e.g. If the extracted region contains a call to an x86.sse
898 // instruction we need to make sure that the extracted region has the
899 // "target-features" attribute allowing it to be lowered.
900 // FIXME: This should be changed to check to see if a specific
901 // attribute can not be inherited.
902 for (const auto &Attr
: oldFunction
->getAttributes().getFnAttrs()) {
903 if (Attr
.isStringAttribute()) {
904 if (Attr
.getKindAsString() == "thunk")
907 switch (Attr
.getKindAsEnum()) {
908 // Those attributes cannot be propagated safely. Explicitly list them
909 // here so we get a warning if new attributes are added.
910 case Attribute::AllocSize
:
911 case Attribute::Builtin
:
912 case Attribute::Convergent
:
913 case Attribute::JumpTable
:
914 case Attribute::Naked
:
915 case Attribute::NoBuiltin
:
916 case Attribute::NoMerge
:
917 case Attribute::NoReturn
:
918 case Attribute::NoSync
:
919 case Attribute::ReturnsTwice
:
920 case Attribute::Speculatable
:
921 case Attribute::StackAlignment
:
922 case Attribute::WillReturn
:
923 case Attribute::AllocKind
:
924 case Attribute::PresplitCoroutine
:
925 case Attribute::Memory
:
926 case Attribute::NoFPClass
:
927 case Attribute::CoroDestroyOnlyWhenComplete
:
929 // Those attributes should be safe to propagate to the extracted function.
930 case Attribute::AlwaysInline
:
931 case Attribute::Cold
:
932 case Attribute::DisableSanitizerInstrumentation
:
933 case Attribute::FnRetThunkExtern
:
935 case Attribute::NoRecurse
:
936 case Attribute::InlineHint
:
937 case Attribute::MinSize
:
938 case Attribute::NoCallback
:
939 case Attribute::NoDuplicate
:
940 case Attribute::NoFree
:
941 case Attribute::NoImplicitFloat
:
942 case Attribute::NoInline
:
943 case Attribute::NonLazyBind
:
944 case Attribute::NoRedZone
:
945 case Attribute::NoUnwind
:
946 case Attribute::NoSanitizeBounds
:
947 case Attribute::NoSanitizeCoverage
:
948 case Attribute::NullPointerIsValid
:
949 case Attribute::OptimizeForDebugging
:
950 case Attribute::OptForFuzzing
:
951 case Attribute::OptimizeNone
:
952 case Attribute::OptimizeForSize
:
953 case Attribute::SafeStack
:
954 case Attribute::ShadowCallStack
:
955 case Attribute::SanitizeAddress
:
956 case Attribute::SanitizeMemory
:
957 case Attribute::SanitizeThread
:
958 case Attribute::SanitizeHWAddress
:
959 case Attribute::SanitizeMemTag
:
960 case Attribute::SpeculativeLoadHardening
:
961 case Attribute::StackProtect
:
962 case Attribute::StackProtectReq
:
963 case Attribute::StackProtectStrong
:
964 case Attribute::StrictFP
:
965 case Attribute::UWTable
:
966 case Attribute::VScaleRange
:
967 case Attribute::NoCfCheck
:
968 case Attribute::MustProgress
:
969 case Attribute::NoProfile
:
970 case Attribute::SkipProfile
:
972 // These attributes cannot be applied to functions.
973 case Attribute::Alignment
:
974 case Attribute::AllocatedPointer
:
975 case Attribute::AllocAlign
:
976 case Attribute::ByVal
:
977 case Attribute::Dereferenceable
:
978 case Attribute::DereferenceableOrNull
:
979 case Attribute::ElementType
:
980 case Attribute::InAlloca
:
981 case Attribute::InReg
:
982 case Attribute::Nest
:
983 case Attribute::NoAlias
:
984 case Attribute::NoCapture
:
985 case Attribute::NoUndef
:
986 case Attribute::NonNull
:
987 case Attribute::Preallocated
:
988 case Attribute::ReadNone
:
989 case Attribute::ReadOnly
:
990 case Attribute::Returned
:
991 case Attribute::SExt
:
992 case Attribute::StructRet
:
993 case Attribute::SwiftError
:
994 case Attribute::SwiftSelf
:
995 case Attribute::SwiftAsync
:
996 case Attribute::ZExt
:
997 case Attribute::ImmArg
:
998 case Attribute::ByRef
:
999 case Attribute::WriteOnly
:
1000 case Attribute::Writable
:
1001 case Attribute::DeadOnUnwind
:
1002 // These are not really attributes.
1003 case Attribute::None
:
1004 case Attribute::EndAttrKinds
:
1005 case Attribute::EmptyKey
:
1006 case Attribute::TombstoneKey
:
1007 llvm_unreachable("Not a function attribute");
1010 newFunction
->addFnAttr(Attr
);
1012 newFunction
->insert(newFunction
->end(), newRootNode
);
1014 // Create scalar and aggregate iterators to name all of the arguments we
1016 Function::arg_iterator ScalarAI
= newFunction
->arg_begin();
1017 Function::arg_iterator AggAI
= std::next(ScalarAI
, NumScalarParams
);
1019 // Rewrite all users of the inputs in the extracted region to use the
1020 // arguments (or appropriate addressing into struct) instead.
1021 for (unsigned i
= 0, e
= inputs
.size(), aggIdx
= 0; i
!= e
; ++i
) {
1023 if (AggregateArgs
&& StructValues
.contains(inputs
[i
])) {
1025 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(header
->getContext()));
1026 Idx
[1] = ConstantInt::get(Type::getInt32Ty(header
->getContext()), aggIdx
);
1027 Instruction
*TI
= newFunction
->begin()->getTerminator();
1028 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1029 StructTy
, &*AggAI
, Idx
, "gep_" + inputs
[i
]->getName(), TI
);
1030 RewriteVal
= new LoadInst(StructTy
->getElementType(aggIdx
), GEP
,
1031 "loadgep_" + inputs
[i
]->getName(), TI
);
1034 RewriteVal
= &*ScalarAI
++;
1036 std::vector
<User
*> Users(inputs
[i
]->user_begin(), inputs
[i
]->user_end());
1037 for (User
*use
: Users
)
1038 if (Instruction
*inst
= dyn_cast
<Instruction
>(use
))
1039 if (Blocks
.count(inst
->getParent()))
1040 inst
->replaceUsesOfWith(inputs
[i
], RewriteVal
);
1043 // Set names for input and output arguments.
1044 if (NumScalarParams
) {
1045 ScalarAI
= newFunction
->arg_begin();
1046 for (unsigned i
= 0, e
= inputs
.size(); i
!= e
; ++i
, ++ScalarAI
)
1047 if (!StructValues
.contains(inputs
[i
]))
1048 ScalarAI
->setName(inputs
[i
]->getName());
1049 for (unsigned i
= 0, e
= outputs
.size(); i
!= e
; ++i
, ++ScalarAI
)
1050 if (!StructValues
.contains(outputs
[i
]))
1051 ScalarAI
->setName(outputs
[i
]->getName() + ".out");
1054 // Rewrite branches to basic blocks outside of the loop to new dummy blocks
1055 // within the new function. This must be done before we lose track of which
1056 // blocks were originally in the code region.
1057 std::vector
<User
*> Users(header
->user_begin(), header
->user_end());
1058 for (auto &U
: Users
)
1059 // The BasicBlock which contains the branch is not in the region
1060 // modify the branch target to a new block
1061 if (Instruction
*I
= dyn_cast
<Instruction
>(U
))
1062 if (I
->isTerminator() && I
->getFunction() == oldFunction
&&
1063 !Blocks
.count(I
->getParent()))
1064 I
->replaceUsesOfWith(header
, newHeader
);
1069 /// Erase lifetime.start markers which reference inputs to the extraction
1070 /// region, and insert the referenced memory into \p LifetimesStart.
1072 /// The extraction region is defined by a set of blocks (\p Blocks), and a set
1073 /// of allocas which will be moved from the caller function into the extracted
1074 /// function (\p SunkAllocas).
1075 static void eraseLifetimeMarkersOnInputs(const SetVector
<BasicBlock
*> &Blocks
,
1076 const SetVector
<Value
*> &SunkAllocas
,
1077 SetVector
<Value
*> &LifetimesStart
) {
1078 for (BasicBlock
*BB
: Blocks
) {
1079 for (Instruction
&I
: llvm::make_early_inc_range(*BB
)) {
1080 auto *II
= dyn_cast
<IntrinsicInst
>(&I
);
1081 if (!II
|| !II
->isLifetimeStartOrEnd())
1084 // Get the memory operand of the lifetime marker. If the underlying
1085 // object is a sunk alloca, or is otherwise defined in the extraction
1086 // region, the lifetime marker must not be erased.
1087 Value
*Mem
= II
->getOperand(1)->stripInBoundsOffsets();
1088 if (SunkAllocas
.count(Mem
) || definedInRegion(Blocks
, Mem
))
1091 if (II
->getIntrinsicID() == Intrinsic::lifetime_start
)
1092 LifetimesStart
.insert(Mem
);
1093 II
->eraseFromParent();
1098 /// Insert lifetime start/end markers surrounding the call to the new function
1099 /// for objects defined in the caller.
1100 static void insertLifetimeMarkersSurroundingCall(
1101 Module
*M
, ArrayRef
<Value
*> LifetimesStart
, ArrayRef
<Value
*> LifetimesEnd
,
1102 CallInst
*TheCall
) {
1103 LLVMContext
&Ctx
= M
->getContext();
1104 auto NegativeOne
= ConstantInt::getSigned(Type::getInt64Ty(Ctx
), -1);
1105 Instruction
*Term
= TheCall
->getParent()->getTerminator();
1107 // Emit lifetime markers for the pointers given in \p Objects. Insert the
1108 // markers before the call if \p InsertBefore, and after the call otherwise.
1109 auto insertMarkers
= [&](Intrinsic::ID MarkerFunc
, ArrayRef
<Value
*> Objects
,
1110 bool InsertBefore
) {
1111 for (Value
*Mem
: Objects
) {
1112 assert((!isa
<Instruction
>(Mem
) || cast
<Instruction
>(Mem
)->getFunction() ==
1113 TheCall
->getFunction()) &&
1114 "Input memory not defined in original function");
1116 Function
*Func
= Intrinsic::getDeclaration(M
, MarkerFunc
, Mem
->getType());
1117 auto Marker
= CallInst::Create(Func
, {NegativeOne
, Mem
});
1119 Marker
->insertBefore(TheCall
);
1121 Marker
->insertBefore(Term
);
1125 if (!LifetimesStart
.empty()) {
1126 insertMarkers(Intrinsic::lifetime_start
, LifetimesStart
,
1127 /*InsertBefore=*/true);
1130 if (!LifetimesEnd
.empty()) {
1131 insertMarkers(Intrinsic::lifetime_end
, LifetimesEnd
,
1132 /*InsertBefore=*/false);
1136 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
1137 /// the call instruction, splitting any PHI nodes in the header block as
1139 CallInst
*CodeExtractor::emitCallAndSwitchStatement(Function
*newFunction
,
1140 BasicBlock
*codeReplacer
,
1142 ValueSet
&outputs
) {
1143 // Emit a call to the new function, passing in: *pointer to struct (if
1144 // aggregating parameters), or plan inputs and allocated memory for outputs
1145 std::vector
<Value
*> params
, ReloadOutputs
, Reloads
;
1146 ValueSet StructValues
;
1148 Module
*M
= newFunction
->getParent();
1149 LLVMContext
&Context
= M
->getContext();
1150 const DataLayout
&DL
= M
->getDataLayout();
1151 CallInst
*call
= nullptr;
1153 // Add inputs as params, or to be filled into the struct
1154 unsigned ScalarInputArgNo
= 0;
1155 SmallVector
<unsigned, 1> SwiftErrorArgs
;
1156 for (Value
*input
: inputs
) {
1157 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(input
))
1158 StructValues
.insert(input
);
1160 params
.push_back(input
);
1161 if (input
->isSwiftError())
1162 SwiftErrorArgs
.push_back(ScalarInputArgNo
);
1167 // Create allocas for the outputs
1168 unsigned ScalarOutputArgNo
= 0;
1169 for (Value
*output
: outputs
) {
1170 if (AggregateArgs
&& !ExcludeArgsFromAggregate
.contains(output
)) {
1171 StructValues
.insert(output
);
1173 AllocaInst
*alloca
=
1174 new AllocaInst(output
->getType(), DL
.getAllocaAddrSpace(),
1175 nullptr, output
->getName() + ".loc",
1176 &codeReplacer
->getParent()->front().front());
1177 ReloadOutputs
.push_back(alloca
);
1178 params
.push_back(alloca
);
1179 ++ScalarOutputArgNo
;
1183 StructType
*StructArgTy
= nullptr;
1184 AllocaInst
*Struct
= nullptr;
1185 unsigned NumAggregatedInputs
= 0;
1186 if (AggregateArgs
&& !StructValues
.empty()) {
1187 std::vector
<Type
*> ArgTypes
;
1188 for (Value
*V
: StructValues
)
1189 ArgTypes
.push_back(V
->getType());
1191 // Allocate a struct at the beginning of this function
1192 StructArgTy
= StructType::get(newFunction
->getContext(), ArgTypes
);
1193 Struct
= new AllocaInst(
1194 StructArgTy
, DL
.getAllocaAddrSpace(), nullptr, "structArg",
1195 AllocationBlock
? &*AllocationBlock
->getFirstInsertionPt()
1196 : &codeReplacer
->getParent()->front().front());
1198 if (ArgsInZeroAddressSpace
&& DL
.getAllocaAddrSpace() != 0) {
1199 auto *StructSpaceCast
= new AddrSpaceCastInst(
1200 Struct
, PointerType ::get(Context
, 0), "structArg.ascast");
1201 StructSpaceCast
->insertAfter(Struct
);
1202 params
.push_back(StructSpaceCast
);
1204 params
.push_back(Struct
);
1206 // Store aggregated inputs in the struct.
1207 for (unsigned i
= 0, e
= StructValues
.size(); i
!= e
; ++i
) {
1208 if (inputs
.contains(StructValues
[i
])) {
1210 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1211 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), i
);
1212 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1213 StructArgTy
, Struct
, Idx
, "gep_" + StructValues
[i
]->getName());
1214 GEP
->insertInto(codeReplacer
, codeReplacer
->end());
1215 new StoreInst(StructValues
[i
], GEP
, codeReplacer
);
1216 NumAggregatedInputs
++;
1221 // Emit the call to the function
1222 call
= CallInst::Create(newFunction
, params
,
1223 NumExitBlocks
> 1 ? "targetBlock" : "");
1224 // Add debug location to the new call, if the original function has debug
1225 // info. In that case, the terminator of the entry block of the extracted
1226 // function contains the first debug location of the extracted function,
1227 // set in extractCodeRegion.
1228 if (codeReplacer
->getParent()->getSubprogram()) {
1229 if (auto DL
= newFunction
->getEntryBlock().getTerminator()->getDebugLoc())
1230 call
->setDebugLoc(DL
);
1232 call
->insertInto(codeReplacer
, codeReplacer
->end());
1234 // Set swifterror parameter attributes.
1235 for (unsigned SwiftErrArgNo
: SwiftErrorArgs
) {
1236 call
->addParamAttr(SwiftErrArgNo
, Attribute::SwiftError
);
1237 newFunction
->addParamAttr(SwiftErrArgNo
, Attribute::SwiftError
);
1240 // Reload the outputs passed in by reference, use the struct if output is in
1241 // the aggregate or reload from the scalar argument.
1242 for (unsigned i
= 0, e
= outputs
.size(), scalarIdx
= 0,
1243 aggIdx
= NumAggregatedInputs
;
1245 Value
*Output
= nullptr;
1246 if (AggregateArgs
&& StructValues
.contains(outputs
[i
])) {
1248 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1249 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), aggIdx
);
1250 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1251 StructArgTy
, Struct
, Idx
, "gep_reload_" + outputs
[i
]->getName());
1252 GEP
->insertInto(codeReplacer
, codeReplacer
->end());
1256 Output
= ReloadOutputs
[scalarIdx
];
1259 LoadInst
*load
= new LoadInst(outputs
[i
]->getType(), Output
,
1260 outputs
[i
]->getName() + ".reload",
1262 Reloads
.push_back(load
);
1263 std::vector
<User
*> Users(outputs
[i
]->user_begin(), outputs
[i
]->user_end());
1264 for (User
*U
: Users
) {
1265 Instruction
*inst
= cast
<Instruction
>(U
);
1266 if (!Blocks
.count(inst
->getParent()))
1267 inst
->replaceUsesOfWith(outputs
[i
], load
);
1271 // Now we can emit a switch statement using the call as a value.
1272 SwitchInst
*TheSwitch
=
1273 SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context
)),
1274 codeReplacer
, 0, codeReplacer
);
1276 // Since there may be multiple exits from the original region, make the new
1277 // function return an unsigned, switch on that number. This loop iterates
1278 // over all of the blocks in the extracted region, updating any terminator
1279 // instructions in the to-be-extracted region that branch to blocks that are
1280 // not in the region to be extracted.
1281 std::map
<BasicBlock
*, BasicBlock
*> ExitBlockMap
;
1283 // Iterate over the previously collected targets, and create new blocks inside
1284 // the function to branch to.
1285 unsigned switchVal
= 0;
1286 for (BasicBlock
*OldTarget
: OldTargets
) {
1287 if (Blocks
.count(OldTarget
))
1289 BasicBlock
*&NewTarget
= ExitBlockMap
[OldTarget
];
1293 // If we don't already have an exit stub for this non-extracted
1294 // destination, create one now!
1295 NewTarget
= BasicBlock::Create(Context
,
1296 OldTarget
->getName() + ".exitStub",
1298 unsigned SuccNum
= switchVal
++;
1300 Value
*brVal
= nullptr;
1301 assert(NumExitBlocks
< 0xffff && "too many exit blocks for switch");
1302 switch (NumExitBlocks
) {
1304 case 1: break; // No value needed.
1305 case 2: // Conditional branch, return a bool
1306 brVal
= ConstantInt::get(Type::getInt1Ty(Context
), !SuccNum
);
1309 brVal
= ConstantInt::get(Type::getInt16Ty(Context
), SuccNum
);
1313 ReturnInst::Create(Context
, brVal
, NewTarget
);
1315 // Update the switch instruction.
1316 TheSwitch
->addCase(ConstantInt::get(Type::getInt16Ty(Context
),
1321 for (BasicBlock
*Block
: Blocks
) {
1322 Instruction
*TI
= Block
->getTerminator();
1323 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
!= e
; ++i
) {
1324 if (Blocks
.count(TI
->getSuccessor(i
)))
1326 BasicBlock
*OldTarget
= TI
->getSuccessor(i
);
1327 // add a new basic block which returns the appropriate value
1328 BasicBlock
*NewTarget
= ExitBlockMap
[OldTarget
];
1329 assert(NewTarget
&& "Unknown target block!");
1331 // rewrite the original branch instruction with this new target
1332 TI
->setSuccessor(i
, NewTarget
);
1336 // Store the arguments right after the definition of output value.
1337 // This should be proceeded after creating exit stubs to be ensure that invoke
1338 // result restore will be placed in the outlined function.
1339 Function::arg_iterator ScalarOutputArgBegin
= newFunction
->arg_begin();
1340 std::advance(ScalarOutputArgBegin
, ScalarInputArgNo
);
1341 Function::arg_iterator AggOutputArgBegin
= newFunction
->arg_begin();
1342 std::advance(AggOutputArgBegin
, ScalarInputArgNo
+ ScalarOutputArgNo
);
1344 for (unsigned i
= 0, e
= outputs
.size(), aggIdx
= NumAggregatedInputs
; i
!= e
;
1346 auto *OutI
= dyn_cast
<Instruction
>(outputs
[i
]);
1350 // Find proper insertion point.
1351 BasicBlock::iterator InsertPt
;
1352 // In case OutI is an invoke, we insert the store at the beginning in the
1353 // 'normal destination' BB. Otherwise we insert the store right after OutI.
1354 if (auto *InvokeI
= dyn_cast
<InvokeInst
>(OutI
))
1355 InsertPt
= InvokeI
->getNormalDest()->getFirstInsertionPt();
1356 else if (auto *Phi
= dyn_cast
<PHINode
>(OutI
))
1357 InsertPt
= Phi
->getParent()->getFirstInsertionPt();
1359 InsertPt
= std::next(OutI
->getIterator());
1361 Instruction
*InsertBefore
= &*InsertPt
;
1362 assert((InsertBefore
->getFunction() == newFunction
||
1363 Blocks
.count(InsertBefore
->getParent())) &&
1364 "InsertPt should be in new function");
1365 if (AggregateArgs
&& StructValues
.contains(outputs
[i
])) {
1366 assert(AggOutputArgBegin
!= newFunction
->arg_end() &&
1367 "Number of aggregate output arguments should match "
1368 "the number of defined values");
1370 Idx
[0] = Constant::getNullValue(Type::getInt32Ty(Context
));
1371 Idx
[1] = ConstantInt::get(Type::getInt32Ty(Context
), aggIdx
);
1372 GetElementPtrInst
*GEP
= GetElementPtrInst::Create(
1373 StructArgTy
, &*AggOutputArgBegin
, Idx
, "gep_" + outputs
[i
]->getName(),
1375 new StoreInst(outputs
[i
], GEP
, InsertBefore
);
1377 // Since there should be only one struct argument aggregating
1378 // all the output values, we shouldn't increment AggOutputArgBegin, which
1379 // always points to the struct argument, in this case.
1381 assert(ScalarOutputArgBegin
!= newFunction
->arg_end() &&
1382 "Number of scalar output arguments should match "
1383 "the number of defined values");
1384 new StoreInst(outputs
[i
], &*ScalarOutputArgBegin
, InsertBefore
);
1385 ++ScalarOutputArgBegin
;
1389 // Now that we've done the deed, simplify the switch instruction.
1390 Type
*OldFnRetTy
= TheSwitch
->getParent()->getParent()->getReturnType();
1391 switch (NumExitBlocks
) {
1393 // There are no successors (the block containing the switch itself), which
1394 // means that previously this was the last part of the function, and hence
1395 // this should be rewritten as a `ret'
1397 // Check if the function should return a value
1398 if (OldFnRetTy
->isVoidTy()) {
1399 ReturnInst::Create(Context
, nullptr, TheSwitch
); // Return void
1400 } else if (OldFnRetTy
== TheSwitch
->getCondition()->getType()) {
1401 // return what we have
1402 ReturnInst::Create(Context
, TheSwitch
->getCondition(), TheSwitch
);
1404 // Otherwise we must have code extracted an unwind or something, just
1405 // return whatever we want.
1406 ReturnInst::Create(Context
,
1407 Constant::getNullValue(OldFnRetTy
), TheSwitch
);
1410 TheSwitch
->eraseFromParent();
1413 // Only a single destination, change the switch into an unconditional
1415 BranchInst::Create(TheSwitch
->getSuccessor(1), TheSwitch
);
1416 TheSwitch
->eraseFromParent();
1419 BranchInst::Create(TheSwitch
->getSuccessor(1), TheSwitch
->getSuccessor(2),
1421 TheSwitch
->eraseFromParent();
1424 // Otherwise, make the default destination of the switch instruction be one
1425 // of the other successors.
1426 TheSwitch
->setCondition(call
);
1427 TheSwitch
->setDefaultDest(TheSwitch
->getSuccessor(NumExitBlocks
));
1428 // Remove redundant case
1429 TheSwitch
->removeCase(SwitchInst::CaseIt(TheSwitch
, NumExitBlocks
-1));
1433 // Insert lifetime markers around the reloads of any output values. The
1434 // allocas output values are stored in are only in-use in the codeRepl block.
1435 insertLifetimeMarkersSurroundingCall(M
, ReloadOutputs
, ReloadOutputs
, call
);
1440 void CodeExtractor::moveCodeToFunction(Function
*newFunction
) {
1441 auto newFuncIt
= newFunction
->front().getIterator();
1442 for (BasicBlock
*Block
: Blocks
) {
1443 // Delete the basic block from the old function, and the list of blocks
1444 Block
->removeFromParent();
1446 // Insert this basic block into the new function
1447 // Insert the original blocks after the entry block created
1448 // for the new function. The entry block may be followed
1449 // by a set of exit blocks at this point, but these exit
1450 // blocks better be placed at the end of the new function.
1451 newFuncIt
= newFunction
->insert(std::next(newFuncIt
), Block
);
1455 void CodeExtractor::calculateNewCallTerminatorWeights(
1456 BasicBlock
*CodeReplacer
,
1457 DenseMap
<BasicBlock
*, BlockFrequency
> &ExitWeights
,
1458 BranchProbabilityInfo
*BPI
) {
1459 using Distribution
= BlockFrequencyInfoImplBase::Distribution
;
1460 using BlockNode
= BlockFrequencyInfoImplBase::BlockNode
;
1462 // Update the branch weights for the exit block.
1463 Instruction
*TI
= CodeReplacer
->getTerminator();
1464 SmallVector
<unsigned, 8> BranchWeights(TI
->getNumSuccessors(), 0);
1466 // Block Frequency distribution with dummy node.
1467 Distribution BranchDist
;
1469 SmallVector
<BranchProbability
, 4> EdgeProbabilities(
1470 TI
->getNumSuccessors(), BranchProbability::getUnknown());
1472 // Add each of the frequencies of the successors.
1473 for (unsigned i
= 0, e
= TI
->getNumSuccessors(); i
< e
; ++i
) {
1474 BlockNode
ExitNode(i
);
1475 uint64_t ExitFreq
= ExitWeights
[TI
->getSuccessor(i
)].getFrequency();
1477 BranchDist
.addExit(ExitNode
, ExitFreq
);
1479 EdgeProbabilities
[i
] = BranchProbability::getZero();
1482 // Check for no total weight.
1483 if (BranchDist
.Total
== 0) {
1484 BPI
->setEdgeProbability(CodeReplacer
, EdgeProbabilities
);
1488 // Normalize the distribution so that they can fit in unsigned.
1489 BranchDist
.normalize();
1491 // Create normalized branch weights and set the metadata.
1492 for (unsigned I
= 0, E
= BranchDist
.Weights
.size(); I
< E
; ++I
) {
1493 const auto &Weight
= BranchDist
.Weights
[I
];
1495 // Get the weight and update the current BFI.
1496 BranchWeights
[Weight
.TargetNode
.Index
] = Weight
.Amount
;
1497 BranchProbability
BP(Weight
.Amount
, BranchDist
.Total
);
1498 EdgeProbabilities
[Weight
.TargetNode
.Index
] = BP
;
1500 BPI
->setEdgeProbability(CodeReplacer
, EdgeProbabilities
);
1502 LLVMContext::MD_prof
,
1503 MDBuilder(TI
->getContext()).createBranchWeights(BranchWeights
));
1506 /// Erase debug info intrinsics which refer to values in \p F but aren't in
1508 static void eraseDebugIntrinsicsWithNonLocalRefs(Function
&F
) {
1509 for (Instruction
&I
: instructions(F
)) {
1510 SmallVector
<DbgVariableIntrinsic
*, 4> DbgUsers
;
1511 SmallVector
<DPValue
*, 4> DPValues
;
1512 findDbgUsers(DbgUsers
, &I
, &DPValues
);
1513 for (DbgVariableIntrinsic
*DVI
: DbgUsers
)
1514 if (DVI
->getFunction() != &F
)
1515 DVI
->eraseFromParent();
1516 for (DPValue
*DPV
: DPValues
)
1517 if (DPV
->getFunction() != &F
)
1518 DPV
->eraseFromParent();
1522 /// Fix up the debug info in the old and new functions by pointing line
1523 /// locations and debug intrinsics to the new subprogram scope, and by deleting
1524 /// intrinsics which point to values outside of the new function.
1525 static void fixupDebugInfoPostExtraction(Function
&OldFunc
, Function
&NewFunc
,
1526 CallInst
&TheCall
) {
1527 DISubprogram
*OldSP
= OldFunc
.getSubprogram();
1528 LLVMContext
&Ctx
= OldFunc
.getContext();
1531 // Erase any debug info the new function contains.
1532 stripDebugInfo(NewFunc
);
1533 // Make sure the old function doesn't contain any non-local metadata refs.
1534 eraseDebugIntrinsicsWithNonLocalRefs(NewFunc
);
1538 // Create a subprogram for the new function. Leave out a description of the
1539 // function arguments, as the parameters don't correspond to anything at the
1541 assert(OldSP
->getUnit() && "Missing compile unit for subprogram");
1542 DIBuilder
DIB(*OldFunc
.getParent(), /*AllowUnresolved=*/false,
1545 DIB
.createSubroutineType(DIB
.getOrCreateTypeArray(std::nullopt
));
1546 DISubprogram::DISPFlags SPFlags
= DISubprogram::SPFlagDefinition
|
1547 DISubprogram::SPFlagOptimized
|
1548 DISubprogram::SPFlagLocalToUnit
;
1549 auto NewSP
= DIB
.createFunction(
1550 OldSP
->getUnit(), NewFunc
.getName(), NewFunc
.getName(), OldSP
->getFile(),
1551 /*LineNo=*/0, SPType
, /*ScopeLine=*/0, DINode::FlagZero
, SPFlags
);
1552 NewFunc
.setSubprogram(NewSP
);
1554 auto IsInvalidLocation
= [&NewFunc
](Value
*Location
) {
1555 // Location is invalid if it isn't a constant or an instruction, or is an
1556 // instruction but isn't in the new function.
1558 (!isa
<Constant
>(Location
) && !isa
<Instruction
>(Location
)))
1560 Instruction
*LocationInst
= dyn_cast
<Instruction
>(Location
);
1561 return LocationInst
&& LocationInst
->getFunction() != &NewFunc
;
1564 // Debug intrinsics in the new function need to be updated in one of two
1566 // 1) They need to be deleted, because they describe a value in the old
1568 // 2) They need to point to fresh metadata, e.g. because they currently
1569 // point to a variable in the wrong scope.
1570 SmallDenseMap
<DINode
*, DINode
*> RemappedMetadata
;
1571 SmallVector
<Instruction
*, 4> DebugIntrinsicsToDelete
;
1572 SmallVector
<DPValue
*, 4> DPVsToDelete
;
1573 DenseMap
<const MDNode
*, MDNode
*> Cache
;
1575 auto GetUpdatedDIVariable
= [&](DILocalVariable
*OldVar
) {
1576 DINode
*&NewVar
= RemappedMetadata
[OldVar
];
1578 DILocalScope
*NewScope
= DILocalScope::cloneScopeForSubprogram(
1579 *OldVar
->getScope(), *NewSP
, Ctx
, Cache
);
1580 NewVar
= DIB
.createAutoVariable(
1581 NewScope
, OldVar
->getName(), OldVar
->getFile(), OldVar
->getLine(),
1582 OldVar
->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero
,
1583 OldVar
->getAlignInBits());
1585 return cast
<DILocalVariable
>(NewVar
);
1588 auto UpdateDPValuesOnInst
= [&](Instruction
&I
) -> void {
1589 for (auto &DPV
: I
.getDbgValueRange()) {
1590 // Apply the two updates that dbg.values get: invalid operands, and
1591 // variable metadata fixup.
1592 if (any_of(DPV
.location_ops(), IsInvalidLocation
)) {
1593 DPVsToDelete
.push_back(&DPV
);
1596 if (DPV
.isDbgAssign() && IsInvalidLocation(DPV
.getAddress())) {
1597 DPVsToDelete
.push_back(&DPV
);
1600 if (!DPV
.getDebugLoc().getInlinedAt())
1601 DPV
.setVariable(GetUpdatedDIVariable(DPV
.getVariable()));
1602 DPV
.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DPV
.getDebugLoc(),
1603 *NewSP
, Ctx
, Cache
));
1607 for (Instruction
&I
: instructions(NewFunc
)) {
1608 UpdateDPValuesOnInst(I
);
1610 auto *DII
= dyn_cast
<DbgInfoIntrinsic
>(&I
);
1614 // Point the intrinsic to a fresh label within the new function if the
1615 // intrinsic was not inlined from some other function.
1616 if (auto *DLI
= dyn_cast
<DbgLabelInst
>(&I
)) {
1617 if (DLI
->getDebugLoc().getInlinedAt())
1619 DILabel
*OldLabel
= DLI
->getLabel();
1620 DINode
*&NewLabel
= RemappedMetadata
[OldLabel
];
1622 DILocalScope
*NewScope
= DILocalScope::cloneScopeForSubprogram(
1623 *OldLabel
->getScope(), *NewSP
, Ctx
, Cache
);
1624 NewLabel
= DILabel::get(Ctx
, NewScope
, OldLabel
->getName(),
1625 OldLabel
->getFile(), OldLabel
->getLine());
1627 DLI
->setArgOperand(0, MetadataAsValue::get(Ctx
, NewLabel
));
1631 auto *DVI
= cast
<DbgVariableIntrinsic
>(DII
);
1632 // If any of the used locations are invalid, delete the intrinsic.
1633 if (any_of(DVI
->location_ops(), IsInvalidLocation
)) {
1634 DebugIntrinsicsToDelete
.push_back(DVI
);
1637 // DbgAssign intrinsics have an extra Value argument:
1638 if (auto *DAI
= dyn_cast
<DbgAssignIntrinsic
>(DVI
);
1639 DAI
&& IsInvalidLocation(DAI
->getAddress())) {
1640 DebugIntrinsicsToDelete
.push_back(DVI
);
1643 // If the variable was in the scope of the old function, i.e. it was not
1644 // inlined, point the intrinsic to a fresh variable within the new function.
1645 if (!DVI
->getDebugLoc().getInlinedAt())
1646 DVI
->setVariable(GetUpdatedDIVariable(DVI
->getVariable()));
1649 for (auto *DII
: DebugIntrinsicsToDelete
)
1650 DII
->eraseFromParent();
1651 for (auto *DPV
: DPVsToDelete
)
1652 DPV
->getMarker()->MarkedInstr
->dropOneDbgValue(DPV
);
1653 DIB
.finalizeSubprogram(NewSP
);
1655 // Fix up the scope information attached to the line locations in the new
1657 for (Instruction
&I
: instructions(NewFunc
)) {
1658 if (const DebugLoc
&DL
= I
.getDebugLoc())
1660 DebugLoc::replaceInlinedAtSubprogram(DL
, *NewSP
, Ctx
, Cache
));
1662 // Loop info metadata may contain line locations. Fix them up.
1663 auto updateLoopInfoLoc
= [&Ctx
, &Cache
, NewSP
](Metadata
*MD
) -> Metadata
* {
1664 if (auto *Loc
= dyn_cast_or_null
<DILocation
>(MD
))
1665 return DebugLoc::replaceInlinedAtSubprogram(Loc
, *NewSP
, Ctx
, Cache
);
1668 updateLoopMetadataDebugLocations(I
, updateLoopInfoLoc
);
1670 if (!TheCall
.getDebugLoc())
1671 TheCall
.setDebugLoc(DILocation::get(Ctx
, 0, 0, OldSP
));
1673 eraseDebugIntrinsicsWithNonLocalRefs(NewFunc
);
1677 CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache
&CEAC
) {
1678 ValueSet Inputs
, Outputs
;
1679 return extractCodeRegion(CEAC
, Inputs
, Outputs
);
1683 CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache
&CEAC
,
1684 ValueSet
&inputs
, ValueSet
&outputs
) {
1688 // Assumption: this is a single-entry code region, and the header is the first
1689 // block in the region.
1690 BasicBlock
*header
= *Blocks
.begin();
1691 Function
*oldFunction
= header
->getParent();
1693 // Calculate the entry frequency of the new function before we change the root
1695 BlockFrequency EntryFreq
;
1697 assert(BPI
&& "Both BPI and BFI are required to preserve profile info");
1698 for (BasicBlock
*Pred
: predecessors(header
)) {
1699 if (Blocks
.count(Pred
))
1702 BFI
->getBlockFreq(Pred
) * BPI
->getEdgeProbability(Pred
, header
);
1706 // Remove @llvm.assume calls that will be moved to the new function from the
1707 // old function's assumption cache.
1708 for (BasicBlock
*Block
: Blocks
) {
1709 for (Instruction
&I
: llvm::make_early_inc_range(*Block
)) {
1710 if (auto *AI
= dyn_cast
<AssumeInst
>(&I
)) {
1712 AC
->unregisterAssumption(AI
);
1713 AI
->eraseFromParent();
1718 // If we have any return instructions in the region, split those blocks so
1719 // that the return is not in the region.
1720 splitReturnBlocks();
1722 // Calculate the exit blocks for the extracted region and the total exit
1723 // weights for each of those blocks.
1724 DenseMap
<BasicBlock
*, BlockFrequency
> ExitWeights
;
1725 SmallPtrSet
<BasicBlock
*, 1> ExitBlocks
;
1726 for (BasicBlock
*Block
: Blocks
) {
1727 for (BasicBlock
*Succ
: successors(Block
)) {
1728 if (!Blocks
.count(Succ
)) {
1729 // Update the branch weight for this successor.
1731 BlockFrequency
&BF
= ExitWeights
[Succ
];
1732 BF
+= BFI
->getBlockFreq(Block
) * BPI
->getEdgeProbability(Block
, Succ
);
1734 ExitBlocks
.insert(Succ
);
1738 NumExitBlocks
= ExitBlocks
.size();
1740 for (BasicBlock
*Block
: Blocks
) {
1741 for (BasicBlock
*OldTarget
: successors(Block
))
1742 if (!Blocks
.contains(OldTarget
))
1743 OldTargets
.push_back(OldTarget
);
1746 // If we have to split PHI nodes of the entry or exit blocks, do so now.
1747 severSplitPHINodesOfEntry(header
);
1748 severSplitPHINodesOfExits(ExitBlocks
);
1750 // This takes place of the original loop
1751 BasicBlock
*codeReplacer
= BasicBlock::Create(header
->getContext(),
1752 "codeRepl", oldFunction
,
1754 codeReplacer
->IsNewDbgInfoFormat
= oldFunction
->IsNewDbgInfoFormat
;
1756 // The new function needs a root node because other nodes can branch to the
1757 // head of the region, but the entry node of a function cannot have preds.
1758 BasicBlock
*newFuncRoot
= BasicBlock::Create(header
->getContext(),
1760 newFuncRoot
->IsNewDbgInfoFormat
= oldFunction
->IsNewDbgInfoFormat
;
1762 auto *BranchI
= BranchInst::Create(header
);
1763 // If the original function has debug info, we have to add a debug location
1764 // to the new branch instruction from the artificial entry block.
1765 // We use the debug location of the first instruction in the extracted
1766 // blocks, as there is no other equivalent line in the source code.
1767 if (oldFunction
->getSubprogram()) {
1768 any_of(Blocks
, [&BranchI
](const BasicBlock
*BB
) {
1769 return any_of(*BB
, [&BranchI
](const Instruction
&I
) {
1770 if (!I
.getDebugLoc())
1772 BranchI
->setDebugLoc(I
.getDebugLoc());
1777 BranchI
->insertInto(newFuncRoot
, newFuncRoot
->end());
1779 ValueSet SinkingCands
, HoistingCands
;
1780 BasicBlock
*CommonExit
= nullptr;
1781 findAllocas(CEAC
, SinkingCands
, HoistingCands
, CommonExit
);
1782 assert(HoistingCands
.empty() || CommonExit
);
1784 // Find inputs to, outputs from the code region.
1785 findInputsOutputs(inputs
, outputs
, SinkingCands
);
1787 // Now sink all instructions which only have non-phi uses inside the region.
1788 // Group the allocas at the start of the block, so that any bitcast uses of
1789 // the allocas are well-defined.
1790 AllocaInst
*FirstSunkAlloca
= nullptr;
1791 for (auto *II
: SinkingCands
) {
1792 if (auto *AI
= dyn_cast
<AllocaInst
>(II
)) {
1793 AI
->moveBefore(*newFuncRoot
, newFuncRoot
->getFirstInsertionPt());
1794 if (!FirstSunkAlloca
)
1795 FirstSunkAlloca
= AI
;
1798 assert((SinkingCands
.empty() || FirstSunkAlloca
) &&
1799 "Did not expect a sink candidate without any allocas");
1800 for (auto *II
: SinkingCands
) {
1801 if (!isa
<AllocaInst
>(II
)) {
1802 cast
<Instruction
>(II
)->moveAfter(FirstSunkAlloca
);
1806 if (!HoistingCands
.empty()) {
1807 auto *HoistToBlock
= findOrCreateBlockForHoisting(CommonExit
);
1808 Instruction
*TI
= HoistToBlock
->getTerminator();
1809 for (auto *II
: HoistingCands
)
1810 cast
<Instruction
>(II
)->moveBefore(TI
);
1813 // Collect objects which are inputs to the extraction region and also
1814 // referenced by lifetime start markers within it. The effects of these
1815 // markers must be replicated in the calling function to prevent the stack
1816 // coloring pass from merging slots which store input objects.
1817 ValueSet LifetimesStart
;
1818 eraseLifetimeMarkersOnInputs(Blocks
, SinkingCands
, LifetimesStart
);
1820 // Construct new function based on inputs/outputs & add allocas for all defs.
1821 Function
*newFunction
=
1822 constructFunction(inputs
, outputs
, header
, newFuncRoot
, codeReplacer
,
1823 oldFunction
, oldFunction
->getParent());
1825 // Update the entry count of the function.
1827 auto Count
= BFI
->getProfileCountFromFreq(EntryFreq
);
1829 newFunction
->setEntryCount(
1830 ProfileCount(*Count
, Function::PCT_Real
)); // FIXME
1831 BFI
->setBlockFreq(codeReplacer
, EntryFreq
);
1835 emitCallAndSwitchStatement(newFunction
, codeReplacer
, inputs
, outputs
);
1837 moveCodeToFunction(newFunction
);
1839 // Replicate the effects of any lifetime start/end markers which referenced
1840 // input objects in the extraction region by placing markers around the call.
1841 insertLifetimeMarkersSurroundingCall(
1842 oldFunction
->getParent(), LifetimesStart
.getArrayRef(), {}, TheCall
);
1844 // Propagate personality info to the new function if there is one.
1845 if (oldFunction
->hasPersonalityFn())
1846 newFunction
->setPersonalityFn(oldFunction
->getPersonalityFn());
1848 // Update the branch weights for the exit block.
1849 if (BFI
&& NumExitBlocks
> 1)
1850 calculateNewCallTerminatorWeights(codeReplacer
, ExitWeights
, BPI
);
1852 // Loop over all of the PHI nodes in the header and exit blocks, and change
1853 // any references to the old incoming edge to be the new incoming edge.
1854 for (BasicBlock::iterator I
= header
->begin(); isa
<PHINode
>(I
); ++I
) {
1855 PHINode
*PN
= cast
<PHINode
>(I
);
1856 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1857 if (!Blocks
.count(PN
->getIncomingBlock(i
)))
1858 PN
->setIncomingBlock(i
, newFuncRoot
);
1861 for (BasicBlock
*ExitBB
: ExitBlocks
)
1862 for (PHINode
&PN
: ExitBB
->phis()) {
1863 Value
*IncomingCodeReplacerVal
= nullptr;
1864 for (unsigned i
= 0, e
= PN
.getNumIncomingValues(); i
!= e
; ++i
) {
1865 // Ignore incoming values from outside of the extracted region.
1866 if (!Blocks
.count(PN
.getIncomingBlock(i
)))
1869 // Ensure that there is only one incoming value from codeReplacer.
1870 if (!IncomingCodeReplacerVal
) {
1871 PN
.setIncomingBlock(i
, codeReplacer
);
1872 IncomingCodeReplacerVal
= PN
.getIncomingValue(i
);
1874 assert(IncomingCodeReplacerVal
== PN
.getIncomingValue(i
) &&
1875 "PHI has two incompatbile incoming values from codeRepl");
1879 fixupDebugInfoPostExtraction(*oldFunction
, *newFunction
, *TheCall
);
1881 // Mark the new function `noreturn` if applicable. Terminators which resume
1882 // exception propagation are treated as returning instructions. This is to
1883 // avoid inserting traps after calls to outlined functions which unwind.
1884 bool doesNotReturn
= none_of(*newFunction
, [](const BasicBlock
&BB
) {
1885 const Instruction
*Term
= BB
.getTerminator();
1886 return isa
<ReturnInst
>(Term
) || isa
<ResumeInst
>(Term
);
1889 newFunction
->setDoesNotReturn();
1891 LLVM_DEBUG(if (verifyFunction(*newFunction
, &errs())) {
1892 newFunction
->dump();
1893 report_fatal_error("verification of newFunction failed!");
1895 LLVM_DEBUG(if (verifyFunction(*oldFunction
))
1896 report_fatal_error("verification of oldFunction failed!"));
1897 LLVM_DEBUG(if (AC
&& verifyAssumptionCache(*oldFunction
, *newFunction
, AC
))
1898 report_fatal_error("Stale Asumption cache for old Function!"));
1902 bool CodeExtractor::verifyAssumptionCache(const Function
&OldFunc
,
1903 const Function
&NewFunc
,
1904 AssumptionCache
*AC
) {
1905 for (auto AssumeVH
: AC
->assumptions()) {
1906 auto *I
= dyn_cast_or_null
<CallInst
>(AssumeVH
);
1910 // There shouldn't be any llvm.assume intrinsics in the new function.
1911 if (I
->getFunction() != &OldFunc
)
1914 // There shouldn't be any stale affected values in the assumption cache
1915 // that were previously in the old function, but that have now been moved
1916 // to the new function.
1917 for (auto AffectedValVH
: AC
->assumptionsFor(I
->getOperand(0))) {
1918 auto *AffectedCI
= dyn_cast_or_null
<CallInst
>(AffectedValVH
);
1921 if (AffectedCI
->getFunction() != &OldFunc
)
1923 auto *AssumedInst
= cast
<Instruction
>(AffectedCI
->getOperand(0));
1924 if (AssumedInst
->getFunction() != &OldFunc
)
1931 void CodeExtractor::excludeArgFromAggregate(Value
*Arg
) {
1932 ExcludeArgsFromAggregate
.insert(Arg
);