1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
16 // TODO: pack values tightly using liveness info.
17 //===----------------------------------------------------------------------===//
19 #include "CoroInternal.h"
20 #include "llvm/ADT/BitVector.h"
21 #include "llvm/Transforms/Utils/Local.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstIterator.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Support/circular_raw_ostream.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
34 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
35 // "coro-frame", which results in leaner debug spew.
36 #define DEBUG_TYPE "coro-suspend-crossing"
38 enum { SmallVectorThreshold
= 32 };
40 // Provides two way mapping between the blocks and numbers.
42 class BlockToIndexMapping
{
43 SmallVector
<BasicBlock
*, SmallVectorThreshold
> V
;
46 size_t size() const { return V
.size(); }
48 BlockToIndexMapping(Function
&F
) {
49 for (BasicBlock
&BB
: F
)
54 size_t blockToIndex(BasicBlock
*BB
) const {
55 auto *I
= std::lower_bound(V
.begin(), V
.end(), BB
);
56 assert(I
!= V
.end() && *I
== BB
&& "BasicBlockNumberng: Unknown block");
60 BasicBlock
*indexToBlock(unsigned Index
) const { return V
[Index
]; }
62 } // end anonymous namespace
64 // The SuspendCrossingInfo maintains data that allows to answer a question
65 // whether given two BasicBlocks A and B there is a path from A to B that
66 // passes through a suspend point.
68 // For every basic block 'i' it maintains a BlockData that consists of:
69 // Consumes: a bit vector which contains a set of indices of blocks that can
71 // Kills: a bit vector which contains a set of indices of blocks that can
72 // reach block 'i', but one of the path will cross a suspend point
73 // Suspend: a boolean indicating whether block 'i' contains a suspend point.
74 // End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
77 struct SuspendCrossingInfo
{
78 BlockToIndexMapping Mapping
;
86 SmallVector
<BlockData
, SmallVectorThreshold
> Block
;
88 iterator_range
<succ_iterator
> successors(BlockData
const &BD
) const {
89 BasicBlock
*BB
= Mapping
.indexToBlock(&BD
- &Block
[0]);
90 return llvm::successors(BB
);
93 BlockData
&getBlockData(BasicBlock
*BB
) {
94 return Block
[Mapping
.blockToIndex(BB
)];
98 void dump(StringRef Label
, BitVector
const &BV
) const;
100 SuspendCrossingInfo(Function
&F
, coro::Shape
&Shape
);
102 bool hasPathCrossingSuspendPoint(BasicBlock
*DefBB
, BasicBlock
*UseBB
) const {
103 size_t const DefIndex
= Mapping
.blockToIndex(DefBB
);
104 size_t const UseIndex
= Mapping
.blockToIndex(UseBB
);
106 assert(Block
[UseIndex
].Consumes
[DefIndex
] && "use must consume def");
107 bool const Result
= Block
[UseIndex
].Kills
[DefIndex
];
108 LLVM_DEBUG(dbgs() << UseBB
->getName() << " => " << DefBB
->getName()
109 << " answer is " << Result
<< "\n");
113 bool isDefinitionAcrossSuspend(BasicBlock
*DefBB
, User
*U
) const {
114 auto *I
= cast
<Instruction
>(U
);
116 // We rewrote PHINodes, so that only the ones with exactly one incoming
117 // value need to be analyzed.
118 if (auto *PN
= dyn_cast
<PHINode
>(I
))
119 if (PN
->getNumIncomingValues() > 1)
122 BasicBlock
*UseBB
= I
->getParent();
123 return hasPathCrossingSuspendPoint(DefBB
, UseBB
);
126 bool isDefinitionAcrossSuspend(Argument
&A
, User
*U
) const {
127 return isDefinitionAcrossSuspend(&A
.getParent()->getEntryBlock(), U
);
130 bool isDefinitionAcrossSuspend(Instruction
&I
, User
*U
) const {
131 return isDefinitionAcrossSuspend(I
.getParent(), U
);
134 } // end anonymous namespace
136 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
137 LLVM_DUMP_METHOD
void SuspendCrossingInfo::dump(StringRef Label
,
138 BitVector
const &BV
) const {
139 dbgs() << Label
<< ":";
140 for (size_t I
= 0, N
= BV
.size(); I
< N
; ++I
)
142 dbgs() << " " << Mapping
.indexToBlock(I
)->getName();
146 LLVM_DUMP_METHOD
void SuspendCrossingInfo::dump() const {
147 for (size_t I
= 0, N
= Block
.size(); I
< N
; ++I
) {
148 BasicBlock
*const B
= Mapping
.indexToBlock(I
);
149 dbgs() << B
->getName() << ":\n";
150 dump(" Consumes", Block
[I
].Consumes
);
151 dump(" Kills", Block
[I
].Kills
);
157 SuspendCrossingInfo::SuspendCrossingInfo(Function
&F
, coro::Shape
&Shape
)
159 const size_t N
= Mapping
.size();
162 // Initialize every block so that it consumes itself
163 for (size_t I
= 0; I
< N
; ++I
) {
165 B
.Consumes
.resize(N
);
170 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
171 // the code beyond coro.end is reachable during initial invocation of the
173 for (auto *CE
: Shape
.CoroEnds
)
174 getBlockData(CE
->getParent()).End
= true;
176 // Mark all suspend blocks and indicate that they kill everything they
177 // consume. Note, that crossing coro.save also requires a spill, as any code
178 // between coro.save and coro.suspend may resume the coroutine and all of the
179 // state needs to be saved by that time.
180 auto markSuspendBlock
= [&](IntrinsicInst
*BarrierInst
) {
181 BasicBlock
*SuspendBlock
= BarrierInst
->getParent();
182 auto &B
= getBlockData(SuspendBlock
);
184 B
.Kills
|= B
.Consumes
;
186 for (CoroSuspendInst
*CSI
: Shape
.CoroSuspends
) {
187 markSuspendBlock(CSI
);
188 markSuspendBlock(CSI
->getCoroSave());
191 // Iterate propagating consumes and kills until they stop changing.
197 LLVM_DEBUG(dbgs() << "iteration " << ++Iteration
);
198 LLVM_DEBUG(dbgs() << "==============\n");
201 for (size_t I
= 0; I
< N
; ++I
) {
203 for (BasicBlock
*SI
: successors(B
)) {
205 auto SuccNo
= Mapping
.blockToIndex(SI
);
207 // Saved Consumes and Kills bitsets so that it is easy to see
208 // if anything changed after propagation.
209 auto &S
= Block
[SuccNo
];
210 auto SavedConsumes
= S
.Consumes
;
211 auto SavedKills
= S
.Kills
;
213 // Propagate Kills and Consumes from block B into its successor S.
214 S
.Consumes
|= B
.Consumes
;
217 // If block B is a suspend block, it should propagate kills into the
218 // its successor for every block B consumes.
220 S
.Kills
|= B
.Consumes
;
223 // If block S is a suspend block, it should kill all of the blocks it
225 S
.Kills
|= S
.Consumes
;
227 // If block S is an end block, it should not propagate kills as the
228 // blocks following coro.end() are reached during initial invocation
229 // of the coroutine while all the data are still available on the
230 // stack or in the registers.
233 // This is reached when S block it not Suspend nor coro.end and it
234 // need to make sure that it is not in the kill set.
235 S
.Kills
.reset(SuccNo
);
238 // See if anything changed.
239 Changed
|= (S
.Kills
!= SavedKills
) || (S
.Consumes
!= SavedConsumes
);
241 if (S
.Kills
!= SavedKills
) {
242 LLVM_DEBUG(dbgs() << "\nblock " << I
<< " follower " << SI
->getName()
244 LLVM_DEBUG(dump("S.Kills", S
.Kills
));
245 LLVM_DEBUG(dump("SavedKills", SavedKills
));
247 if (S
.Consumes
!= SavedConsumes
) {
248 LLVM_DEBUG(dbgs() << "\nblock " << I
<< " follower " << SI
<< "\n");
249 LLVM_DEBUG(dump("S.Consume", S
.Consumes
));
250 LLVM_DEBUG(dump("SavedCons", SavedConsumes
));
258 #undef DEBUG_TYPE // "coro-suspend-crossing"
259 #define DEBUG_TYPE "coro-frame"
261 // We build up the list of spills for every case where a use is separated
262 // from the definition by a suspend point.
266 Value
*Def
= nullptr;
267 Instruction
*User
= nullptr;
268 unsigned FieldNo
= 0;
271 Spill(Value
*Def
, llvm::User
*U
) : Def(Def
), User(cast
<Instruction
>(U
)) {}
273 Value
*def() const { return Def
; }
274 Instruction
*user() const { return User
; }
275 BasicBlock
*userBlock() const { return User
->getParent(); }
277 // Note that field index is stored in the first SpillEntry for a particular
278 // definition. Subsequent mentions of a defintion do not have fieldNo
279 // assigned. This works out fine as the users of Spills capture the info about
280 // the definition the first time they encounter it. Consider refactoring
281 // SpillInfo into two arrays to normalize the spill representation.
282 unsigned fieldIndex() const {
283 assert(FieldNo
&& "Accessing unassigned field");
286 void setFieldIndex(unsigned FieldNumber
) {
287 assert(!FieldNo
&& "Reassigning field number");
288 FieldNo
= FieldNumber
;
293 // Note that there may be more than one record with the same value of Def in
294 // the SpillInfo vector.
295 using SpillInfo
= SmallVector
<Spill
, 8>;
298 static void dump(StringRef Title
, SpillInfo
const &Spills
) {
299 dbgs() << "------------- " << Title
<< "--------------\n";
300 Value
*CurrentValue
= nullptr;
301 for (auto const &E
: Spills
) {
302 if (CurrentValue
!= E
.def()) {
303 CurrentValue
= E
.def();
304 CurrentValue
->dump();
313 // We cannot rely solely on natural alignment of a type when building a
314 // coroutine frame and if the alignment specified on the Alloca instruction
315 // differs from the natural alignment of the alloca type we will need to insert
317 struct PaddingCalculator
{
318 const DataLayout
&DL
;
319 LLVMContext
&Context
;
320 unsigned StructSize
= 0;
322 PaddingCalculator(LLVMContext
&Context
, DataLayout
const &DL
)
323 : DL(DL
), Context(Context
) {}
325 // Replicate the logic from IR/DataLayout.cpp to match field offset
326 // computation for LLVM structs.
327 void addType(Type
*Ty
) {
328 unsigned TyAlign
= DL
.getABITypeAlignment(Ty
);
329 if ((StructSize
& (TyAlign
- 1)) != 0)
330 StructSize
= alignTo(StructSize
, TyAlign
);
332 StructSize
+= DL
.getTypeAllocSize(Ty
); // Consume space for this data item.
335 void addTypes(SmallVectorImpl
<Type
*> const &Types
) {
336 for (auto *Ty
: Types
)
340 unsigned computePadding(Type
*Ty
, unsigned ForcedAlignment
) {
341 unsigned TyAlign
= DL
.getABITypeAlignment(Ty
);
342 auto Natural
= alignTo(StructSize
, TyAlign
);
343 auto Forced
= alignTo(StructSize
, ForcedAlignment
);
345 // Return how many bytes of padding we need to insert.
346 if (Natural
!= Forced
)
347 return std::max(Natural
, Forced
) - StructSize
;
349 // Rely on natural alignment.
353 // If padding required, return the padding field type to insert.
354 ArrayType
*getPaddingType(Type
*Ty
, unsigned ForcedAlignment
) {
355 if (auto Padding
= computePadding(Ty
, ForcedAlignment
))
356 return ArrayType::get(Type::getInt8Ty(Context
), Padding
);
363 // Build a struct that will keep state for an active coroutine.
365 // ResumeFnTy ResumeFnAddr;
366 // ResumeFnTy DestroyFnAddr;
368 // ... promise (if present) ...
371 static StructType
*buildFrameType(Function
&F
, coro::Shape
&Shape
,
373 LLVMContext
&C
= F
.getContext();
374 const DataLayout
&DL
= F
.getParent()->getDataLayout();
375 PaddingCalculator
Padder(C
, DL
);
376 SmallString
<32> Name(F
.getName());
377 Name
.append(".Frame");
378 StructType
*FrameTy
= StructType::create(C
, Name
);
379 auto *FramePtrTy
= FrameTy
->getPointerTo();
380 auto *FnTy
= FunctionType::get(Type::getVoidTy(C
), FramePtrTy
,
381 /*IsVarArgs=*/false);
382 auto *FnPtrTy
= FnTy
->getPointerTo();
384 // Figure out how wide should be an integer type storing the suspend index.
385 unsigned IndexBits
= std::max(1U, Log2_64_Ceil(Shape
.CoroSuspends
.size()));
386 Type
*PromiseType
= Shape
.PromiseAlloca
387 ? Shape
.PromiseAlloca
->getType()->getElementType()
388 : Type::getInt1Ty(C
);
389 SmallVector
<Type
*, 8> Types
{FnPtrTy
, FnPtrTy
, PromiseType
,
390 Type::getIntNTy(C
, IndexBits
)};
391 Value
*CurrentDef
= nullptr;
393 Padder
.addTypes(Types
);
395 // Create an entry for every spilled value.
396 for (auto &S
: Spills
) {
397 if (CurrentDef
== S
.def())
400 CurrentDef
= S
.def();
401 // PromiseAlloca was already added to Types array earlier.
402 if (CurrentDef
== Shape
.PromiseAlloca
)
406 if (auto *AI
= dyn_cast
<AllocaInst
>(CurrentDef
)) {
407 Ty
= AI
->getAllocatedType();
408 if (unsigned AllocaAlignment
= AI
->getAlignment()) {
409 // If alignment is specified in alloca, see if we need to insert extra
411 if (auto PaddingTy
= Padder
.getPaddingType(Ty
, AllocaAlignment
)) {
412 Types
.push_back(PaddingTy
);
413 Padder
.addType(PaddingTy
);
417 Ty
= CurrentDef
->getType();
419 S
.setFieldIndex(Types
.size());
423 FrameTy
->setBody(Types
);
428 // We need to make room to insert a spill after initial PHIs, but before
429 // catchswitch instruction. Placing it before violates the requirement that
430 // catchswitch, like all other EHPads must be the first nonPHI in a block.
432 // Split away catchswitch into a separate block and insert in its place:
434 // cleanuppad <InsertPt> cleanupret.
436 // cleanupret instruction will act as an insert point for the spill.
437 static Instruction
*splitBeforeCatchSwitch(CatchSwitchInst
*CatchSwitch
) {
438 BasicBlock
*CurrentBlock
= CatchSwitch
->getParent();
439 BasicBlock
*NewBlock
= CurrentBlock
->splitBasicBlock(CatchSwitch
);
440 CurrentBlock
->getTerminator()->eraseFromParent();
443 CleanupPadInst::Create(CatchSwitch
->getParentPad(), {}, "", CurrentBlock
);
445 CleanupReturnInst::Create(CleanupPad
, NewBlock
, CurrentBlock
);
449 // Replace all alloca and SSA values that are accessed across suspend points
450 // with GetElementPointer from coroutine frame + loads and stores. Create an
451 // AllocaSpillBB that will become the new entry block for the resume parts of
454 // %hdl = coro.begin(...)
459 // %hdl = coro.begin(...)
460 // %FramePtr = bitcast i8* hdl to %f.frame*
461 // br label %AllocaSpillBB
464 // ; geps corresponding to allocas that were moved to coroutine frame
465 // br label PostSpill
471 static Instruction
*insertSpills(SpillInfo
&Spills
, coro::Shape
&Shape
) {
472 auto *CB
= Shape
.CoroBegin
;
473 IRBuilder
<> Builder(CB
->getNextNode());
474 StructType
*FrameTy
= Shape
.FrameTy
;
475 PointerType
*FramePtrTy
= FrameTy
->getPointerTo();
477 cast
<Instruction
>(Builder
.CreateBitCast(CB
, FramePtrTy
, "FramePtr"));
479 Value
*CurrentValue
= nullptr;
480 BasicBlock
*CurrentBlock
= nullptr;
481 Value
*CurrentReload
= nullptr;
482 unsigned Index
= 0; // Proper field number will be read from field definition.
484 // We need to keep track of any allocas that need "spilling"
485 // since they will live in the coroutine frame now, all access to them
486 // need to be changed, not just the access across suspend points
487 // we remember allocas and their indices to be handled once we processed
489 SmallVector
<std::pair
<AllocaInst
*, unsigned>, 4> Allocas
;
490 // Promise alloca (if present) has a fixed field number (Shape::PromiseField)
491 if (Shape
.PromiseAlloca
)
492 Allocas
.emplace_back(Shape
.PromiseAlloca
, coro::Shape::PromiseField
);
494 // Create a load instruction to reload the spilled value from the coroutine
496 auto CreateReload
= [&](Instruction
*InsertBefore
) {
497 assert(Index
&& "accessing unassigned field number");
498 Builder
.SetInsertPoint(InsertBefore
);
499 auto *G
= Builder
.CreateConstInBoundsGEP2_32(FrameTy
, FramePtr
, 0, Index
,
500 CurrentValue
->getName() +
501 Twine(".reload.addr"));
502 return isa
<AllocaInst
>(CurrentValue
)
504 : Builder
.CreateLoad(FrameTy
->getElementType(Index
), G
,
505 CurrentValue
->getName() + Twine(".reload"));
508 for (auto const &E
: Spills
) {
509 // If we have not seen the value, generate a spill.
510 if (CurrentValue
!= E
.def()) {
511 CurrentValue
= E
.def();
512 CurrentBlock
= nullptr;
513 CurrentReload
= nullptr;
515 Index
= E
.fieldIndex();
517 if (auto *AI
= dyn_cast
<AllocaInst
>(CurrentValue
)) {
518 // Spilled AllocaInst will be replaced with GEP from the coroutine frame
519 // there is no spill required.
520 Allocas
.emplace_back(AI
, Index
);
521 if (!AI
->isStaticAlloca())
522 report_fatal_error("Coroutines cannot handle non static allocas yet");
524 // Otherwise, create a store instruction storing the value into the
527 Instruction
*InsertPt
= nullptr;
528 if (isa
<Argument
>(CurrentValue
)) {
529 // For arguments, we will place the store instruction right after
530 // the coroutine frame pointer instruction, i.e. bitcast of
531 // coro.begin from i8* to %f.frame*.
532 InsertPt
= FramePtr
->getNextNode();
533 } else if (auto *II
= dyn_cast
<InvokeInst
>(CurrentValue
)) {
534 // If we are spilling the result of the invoke instruction, split the
535 // normal edge and insert the spill in the new block.
536 auto NewBB
= SplitEdge(II
->getParent(), II
->getNormalDest());
537 InsertPt
= NewBB
->getTerminator();
538 } else if (dyn_cast
<PHINode
>(CurrentValue
)) {
539 // Skip the PHINodes and EH pads instructions.
540 BasicBlock
*DefBlock
= cast
<Instruction
>(E
.def())->getParent();
541 if (auto *CSI
= dyn_cast
<CatchSwitchInst
>(DefBlock
->getTerminator()))
542 InsertPt
= splitBeforeCatchSwitch(CSI
);
544 InsertPt
= &*DefBlock
->getFirstInsertionPt();
546 // For all other values, the spill is placed immediately after
548 assert(!cast
<Instruction
>(E
.def())->isTerminator() &&
549 "unexpected terminator");
550 InsertPt
= cast
<Instruction
>(E
.def())->getNextNode();
553 Builder
.SetInsertPoint(InsertPt
);
554 auto *G
= Builder
.CreateConstInBoundsGEP2_32(
555 FrameTy
, FramePtr
, 0, Index
,
556 CurrentValue
->getName() + Twine(".spill.addr"));
557 Builder
.CreateStore(CurrentValue
, G
);
561 // If we have not seen the use block, generate a reload in it.
562 if (CurrentBlock
!= E
.userBlock()) {
563 CurrentBlock
= E
.userBlock();
564 CurrentReload
= CreateReload(&*CurrentBlock
->getFirstInsertionPt());
567 // If we have a single edge PHINode, remove it and replace it with a reload
568 // from the coroutine frame. (We already took care of multi edge PHINodes
569 // by rewriting them in the rewritePHIs function).
570 if (auto *PN
= dyn_cast
<PHINode
>(E
.user())) {
571 assert(PN
->getNumIncomingValues() == 1 && "unexpected number of incoming "
572 "values in the PHINode");
573 PN
->replaceAllUsesWith(CurrentReload
);
574 PN
->eraseFromParent();
578 // Replace all uses of CurrentValue in the current instruction with reload.
579 E
.user()->replaceUsesOfWith(CurrentValue
, CurrentReload
);
582 BasicBlock
*FramePtrBB
= FramePtr
->getParent();
583 Shape
.AllocaSpillBlock
=
584 FramePtrBB
->splitBasicBlock(FramePtr
->getNextNode(), "AllocaSpillBB");
585 Shape
.AllocaSpillBlock
->splitBasicBlock(&Shape
.AllocaSpillBlock
->front(),
588 Builder
.SetInsertPoint(&Shape
.AllocaSpillBlock
->front());
589 // If we found any allocas, replace all of their remaining uses with Geps.
590 for (auto &P
: Allocas
) {
592 Builder
.CreateConstInBoundsGEP2_32(FrameTy
, FramePtr
, 0, P
.second
);
593 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) here,
594 // as we are changing location of the instruction.
595 G
->takeName(P
.first
);
596 P
.first
->replaceAllUsesWith(G
);
597 P
.first
->eraseFromParent();
602 // Sets the unwind edge of an instruction to a particular successor.
603 static void setUnwindEdgeTo(Instruction
*TI
, BasicBlock
*Succ
) {
604 if (auto *II
= dyn_cast
<InvokeInst
>(TI
))
605 II
->setUnwindDest(Succ
);
606 else if (auto *CS
= dyn_cast
<CatchSwitchInst
>(TI
))
607 CS
->setUnwindDest(Succ
);
608 else if (auto *CR
= dyn_cast
<CleanupReturnInst
>(TI
))
609 CR
->setUnwindDest(Succ
);
611 llvm_unreachable("unexpected terminator instruction");
614 // Replaces all uses of OldPred with the NewPred block in all PHINodes in a
616 static void updatePhiNodes(BasicBlock
*DestBB
, BasicBlock
*OldPred
,
618 PHINode
*LandingPadReplacement
) {
620 for (BasicBlock::iterator I
= DestBB
->begin(); isa
<PHINode
>(I
); ++I
) {
621 PHINode
*PN
= cast
<PHINode
>(I
);
623 // We manually update the LandingPadReplacement PHINode and it is the last
624 // PHI Node. So, if we find it, we are done.
625 if (LandingPadReplacement
== PN
)
628 // Reuse the previous value of BBIdx if it lines up. In cases where we
629 // have multiple phi nodes with *lots* of predecessors, this is a speed
630 // win because we don't have to scan the PHI looking for TIBB. This
631 // happens because the BB list of PHI nodes are usually in the same
633 if (PN
->getIncomingBlock(BBIdx
) != OldPred
)
634 BBIdx
= PN
->getBasicBlockIndex(OldPred
);
636 assert(BBIdx
!= (unsigned)-1 && "Invalid PHI Index!");
637 PN
->setIncomingBlock(BBIdx
, NewPred
);
641 // Uses SplitEdge unless the successor block is an EHPad, in which case do EH
642 // specific handling.
643 static BasicBlock
*ehAwareSplitEdge(BasicBlock
*BB
, BasicBlock
*Succ
,
644 LandingPadInst
*OriginalPad
,
645 PHINode
*LandingPadReplacement
) {
646 auto *PadInst
= Succ
->getFirstNonPHI();
647 if (!LandingPadReplacement
&& !PadInst
->isEHPad())
648 return SplitEdge(BB
, Succ
);
650 auto *NewBB
= BasicBlock::Create(BB
->getContext(), "", BB
->getParent(), Succ
);
651 setUnwindEdgeTo(BB
->getTerminator(), NewBB
);
652 updatePhiNodes(Succ
, BB
, NewBB
, LandingPadReplacement
);
654 if (LandingPadReplacement
) {
655 auto *NewLP
= OriginalPad
->clone();
656 auto *Terminator
= BranchInst::Create(Succ
, NewBB
);
657 NewLP
->insertBefore(Terminator
);
658 LandingPadReplacement
->addIncoming(NewLP
, NewBB
);
661 Value
*ParentPad
= nullptr;
662 if (auto *FuncletPad
= dyn_cast
<FuncletPadInst
>(PadInst
))
663 ParentPad
= FuncletPad
->getParentPad();
664 else if (auto *CatchSwitch
= dyn_cast
<CatchSwitchInst
>(PadInst
))
665 ParentPad
= CatchSwitch
->getParentPad();
667 llvm_unreachable("handling for other EHPads not implemented yet");
669 auto *NewCleanupPad
= CleanupPadInst::Create(ParentPad
, {}, "", NewBB
);
670 CleanupReturnInst::Create(NewCleanupPad
, Succ
, NewBB
);
674 static void rewritePHIs(BasicBlock
&BB
) {
675 // For every incoming edge we will create a block holding all
676 // incoming values in a single PHI nodes.
679 // %n.val = phi i32[%n, %entry], [%inc, %loop]
684 // %n.loop.pre = phi i32 [%n, %entry]
687 // %inc.loop.pre = phi i32 [%inc, %loop]
690 // After this rewrite, further analysis will ignore any phi nodes with more
691 // than one incoming edge.
693 // TODO: Simplify PHINodes in the basic block to remove duplicate
696 LandingPadInst
*LandingPad
= nullptr;
697 PHINode
*ReplPHI
= nullptr;
698 if ((LandingPad
= dyn_cast_or_null
<LandingPadInst
>(BB
.getFirstNonPHI()))) {
699 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
700 // We replace the original landing pad with a PHINode that will collect the
701 // results from all of them.
702 ReplPHI
= PHINode::Create(LandingPad
->getType(), 1, "", LandingPad
);
703 ReplPHI
->takeName(LandingPad
);
704 LandingPad
->replaceAllUsesWith(ReplPHI
);
705 // We will erase the original landing pad at the end of this function after
706 // ehAwareSplitEdge cloned it in the transition blocks.
709 SmallVector
<BasicBlock
*, 8> Preds(pred_begin(&BB
), pred_end(&BB
));
710 for (BasicBlock
*Pred
: Preds
) {
711 auto *IncomingBB
= ehAwareSplitEdge(Pred
, &BB
, LandingPad
, ReplPHI
);
712 IncomingBB
->setName(BB
.getName() + Twine(".from.") + Pred
->getName());
713 auto *PN
= cast
<PHINode
>(&BB
.front());
715 int Index
= PN
->getBasicBlockIndex(IncomingBB
);
716 Value
*V
= PN
->getIncomingValue(Index
);
717 PHINode
*InputV
= PHINode::Create(
718 V
->getType(), 1, V
->getName() + Twine(".") + BB
.getName(),
719 &IncomingBB
->front());
720 InputV
->addIncoming(V
, Pred
);
721 PN
->setIncomingValue(Index
, InputV
);
722 PN
= dyn_cast
<PHINode
>(PN
->getNextNode());
723 } while (PN
!= ReplPHI
); // ReplPHI is either null or the PHI that replaced
728 // Calls to ehAwareSplitEdge function cloned the original lading pad.
729 // No longer need it.
730 LandingPad
->eraseFromParent();
734 static void rewritePHIs(Function
&F
) {
735 SmallVector
<BasicBlock
*, 8> WorkList
;
737 for (BasicBlock
&BB
: F
)
738 if (auto *PN
= dyn_cast
<PHINode
>(&BB
.front()))
739 if (PN
->getNumIncomingValues() > 1)
740 WorkList
.push_back(&BB
);
742 for (BasicBlock
*BB
: WorkList
)
746 // Check for instructions that we can recreate on resume as opposed to spill
747 // the result into a coroutine frame.
748 static bool materializable(Instruction
&V
) {
749 return isa
<CastInst
>(&V
) || isa
<GetElementPtrInst
>(&V
) ||
750 isa
<BinaryOperator
>(&V
) || isa
<CmpInst
>(&V
) || isa
<SelectInst
>(&V
);
753 // Check for structural coroutine intrinsics that should not be spilled into
754 // the coroutine frame.
755 static bool isCoroutineStructureIntrinsic(Instruction
&I
) {
756 return isa
<CoroIdInst
>(&I
) || isa
<CoroSaveInst
>(&I
) ||
757 isa
<CoroSuspendInst
>(&I
);
760 // For every use of the value that is across suspend point, recreate that value
761 // after a suspend point.
762 static void rewriteMaterializableInstructions(IRBuilder
<> &IRB
,
763 SpillInfo
const &Spills
) {
764 BasicBlock
*CurrentBlock
= nullptr;
765 Instruction
*CurrentMaterialization
= nullptr;
766 Instruction
*CurrentDef
= nullptr;
768 for (auto const &E
: Spills
) {
769 // If it is a new definition, update CurrentXXX variables.
770 if (CurrentDef
!= E
.def()) {
771 CurrentDef
= cast
<Instruction
>(E
.def());
772 CurrentBlock
= nullptr;
773 CurrentMaterialization
= nullptr;
776 // If we have not seen this block, materialize the value.
777 if (CurrentBlock
!= E
.userBlock()) {
778 CurrentBlock
= E
.userBlock();
779 CurrentMaterialization
= cast
<Instruction
>(CurrentDef
)->clone();
780 CurrentMaterialization
->setName(CurrentDef
->getName());
781 CurrentMaterialization
->insertBefore(
782 &*CurrentBlock
->getFirstInsertionPt());
785 if (auto *PN
= dyn_cast
<PHINode
>(E
.user())) {
786 assert(PN
->getNumIncomingValues() == 1 && "unexpected number of incoming "
787 "values in the PHINode");
788 PN
->replaceAllUsesWith(CurrentMaterialization
);
789 PN
->eraseFromParent();
793 // Replace all uses of CurrentDef in the current instruction with the
794 // CurrentMaterialization for the block.
795 E
.user()->replaceUsesOfWith(CurrentDef
, CurrentMaterialization
);
799 // Move early uses of spilled variable after CoroBegin.
800 // For example, if a parameter had address taken, we may end up with the code
802 // define @f(i32 %n) {
803 // %n.addr = alloca i32
807 // we need to move the store after coro.begin
808 static void moveSpillUsesAfterCoroBegin(Function
&F
, SpillInfo
const &Spills
,
809 CoroBeginInst
*CoroBegin
) {
811 SmallVector
<Instruction
*, 8> NeedsMoving
;
813 Value
*CurrentValue
= nullptr;
815 for (auto const &E
: Spills
) {
816 if (CurrentValue
== E
.def())
819 CurrentValue
= E
.def();
821 for (User
*U
: CurrentValue
->users()) {
822 Instruction
*I
= cast
<Instruction
>(U
);
823 if (!DT
.dominates(CoroBegin
, I
)) {
824 LLVM_DEBUG(dbgs() << "will move: " << *I
<< "\n");
826 // TODO: Make this more robust. Currently if we run into a situation
827 // where simple instruction move won't work we panic and
828 // report_fatal_error.
829 for (User
*UI
: I
->users()) {
830 if (!DT
.dominates(CoroBegin
, cast
<Instruction
>(UI
)))
831 report_fatal_error("cannot move instruction since its users are not"
832 " dominated by CoroBegin");
835 NeedsMoving
.push_back(I
);
840 Instruction
*InsertPt
= CoroBegin
->getNextNode();
841 for (Instruction
*I
: NeedsMoving
)
842 I
->moveBefore(InsertPt
);
845 // Splits the block at a particular instruction unless it is the first
846 // instruction in the block with a single predecessor.
847 static BasicBlock
*splitBlockIfNotFirst(Instruction
*I
, const Twine
&Name
) {
848 auto *BB
= I
->getParent();
849 if (&BB
->front() == I
) {
850 if (BB
->getSinglePredecessor()) {
855 return BB
->splitBasicBlock(I
, Name
);
858 // Split above and below a particular instruction so that it
859 // will be all alone by itself in a block.
860 static void splitAround(Instruction
*I
, const Twine
&Name
) {
861 splitBlockIfNotFirst(I
, Name
);
862 splitBlockIfNotFirst(I
->getNextNode(), "After" + Name
);
865 void coro::buildCoroutineFrame(Function
&F
, Shape
&Shape
) {
866 // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
867 // access to local variables.
870 Shape
.PromiseAlloca
= Shape
.CoroBegin
->getId()->getPromise();
871 if (Shape
.PromiseAlloca
) {
872 Shape
.CoroBegin
->getId()->clearPromise();
875 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
876 // intrinsics are in their own blocks to simplify the logic of building up
877 // SuspendCrossing data.
878 for (CoroSuspendInst
*CSI
: Shape
.CoroSuspends
) {
879 splitAround(CSI
->getCoroSave(), "CoroSave");
880 splitAround(CSI
, "CoroSuspend");
883 // Put CoroEnds into their own blocks.
884 for (CoroEndInst
*CE
: Shape
.CoroEnds
)
885 splitAround(CE
, "CoroEnd");
887 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
888 // never has its definition separated from the PHI by the suspend point.
891 // Build suspend crossing info.
892 SuspendCrossingInfo
Checker(F
, Shape
);
894 IRBuilder
<> Builder(F
.getContext());
897 for (int Repeat
= 0; Repeat
< 4; ++Repeat
) {
898 // See if there are materializable instructions across suspend points.
899 for (Instruction
&I
: instructions(F
))
900 if (materializable(I
))
901 for (User
*U
: I
.users())
902 if (Checker
.isDefinitionAcrossSuspend(I
, U
))
903 Spills
.emplace_back(&I
, U
);
908 // Rewrite materializable instructions to be materialized at the use point.
909 LLVM_DEBUG(dump("Materializations", Spills
));
910 rewriteMaterializableInstructions(Builder
, Spills
);
914 // Collect the spills for arguments and other not-materializable values.
915 for (Argument
&A
: F
.args())
916 for (User
*U
: A
.users())
917 if (Checker
.isDefinitionAcrossSuspend(A
, U
))
918 Spills
.emplace_back(&A
, U
);
920 for (Instruction
&I
: instructions(F
)) {
921 // Values returned from coroutine structure intrinsics should not be part
922 // of the Coroutine Frame.
923 if (isCoroutineStructureIntrinsic(I
) || &I
== Shape
.CoroBegin
)
925 // The Coroutine Promise always included into coroutine frame, no need to
926 // check for suspend crossing.
927 if (Shape
.PromiseAlloca
== &I
)
930 for (User
*U
: I
.users())
931 if (Checker
.isDefinitionAcrossSuspend(I
, U
)) {
932 // We cannot spill a token.
933 if (I
.getType()->isTokenTy())
935 "token definition is separated from the use by a suspend point");
936 Spills
.emplace_back(&I
, U
);
939 LLVM_DEBUG(dump("Spills", Spills
));
940 moveSpillUsesAfterCoroBegin(F
, Spills
, Shape
.CoroBegin
);
941 Shape
.FrameTy
= buildFrameType(F
, Shape
, Spills
);
942 Shape
.FramePtr
= insertSpills(Spills
, Shape
);