1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
16 // TODO: pack values tightly using liveness info.
17 //===----------------------------------------------------------------------===//
19 #include "CoroInternal.h"
20 #include "llvm/ADT/BitVector.h"
21 #include "llvm/Analysis/PtrUseVisitor.h"
22 #include "llvm/Transforms/Utils/Local.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstIterator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/circular_raw_ostream.h"
31 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
32 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
36 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
37 // "coro-frame", which results in leaner debug spew.
38 #define DEBUG_TYPE "coro-suspend-crossing"
40 enum { SmallVectorThreshold
= 32 };
42 // Provides two way mapping between the blocks and numbers.
44 class BlockToIndexMapping
{
45 SmallVector
<BasicBlock
*, SmallVectorThreshold
> V
;
48 size_t size() const { return V
.size(); }
50 BlockToIndexMapping(Function
&F
) {
51 for (BasicBlock
&BB
: F
)
56 size_t blockToIndex(BasicBlock
*BB
) const {
57 auto *I
= llvm::lower_bound(V
, BB
);
58 assert(I
!= V
.end() && *I
== BB
&& "BasicBlockNumberng: Unknown block");
62 BasicBlock
*indexToBlock(unsigned Index
) const { return V
[Index
]; }
64 } // end anonymous namespace
66 // The SuspendCrossingInfo maintains data that allows to answer a question
67 // whether given two BasicBlocks A and B there is a path from A to B that
68 // passes through a suspend point.
70 // For every basic block 'i' it maintains a BlockData that consists of:
71 // Consumes: a bit vector which contains a set of indices of blocks that can
73 // Kills: a bit vector which contains a set of indices of blocks that can
74 // reach block 'i', but one of the path will cross a suspend point
75 // Suspend: a boolean indicating whether block 'i' contains a suspend point.
76 // End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
79 struct SuspendCrossingInfo
{
80 BlockToIndexMapping Mapping
;
88 SmallVector
<BlockData
, SmallVectorThreshold
> Block
;
90 iterator_range
<succ_iterator
> successors(BlockData
const &BD
) const {
91 BasicBlock
*BB
= Mapping
.indexToBlock(&BD
- &Block
[0]);
92 return llvm::successors(BB
);
95 BlockData
&getBlockData(BasicBlock
*BB
) {
96 return Block
[Mapping
.blockToIndex(BB
)];
100 void dump(StringRef Label
, BitVector
const &BV
) const;
102 SuspendCrossingInfo(Function
&F
, coro::Shape
&Shape
);
104 bool hasPathCrossingSuspendPoint(BasicBlock
*DefBB
, BasicBlock
*UseBB
) const {
105 size_t const DefIndex
= Mapping
.blockToIndex(DefBB
);
106 size_t const UseIndex
= Mapping
.blockToIndex(UseBB
);
108 assert(Block
[UseIndex
].Consumes
[DefIndex
] && "use must consume def");
109 bool const Result
= Block
[UseIndex
].Kills
[DefIndex
];
110 LLVM_DEBUG(dbgs() << UseBB
->getName() << " => " << DefBB
->getName()
111 << " answer is " << Result
<< "\n");
115 bool isDefinitionAcrossSuspend(BasicBlock
*DefBB
, User
*U
) const {
116 auto *I
= cast
<Instruction
>(U
);
118 // We rewrote PHINodes, so that only the ones with exactly one incoming
119 // value need to be analyzed.
120 if (auto *PN
= dyn_cast
<PHINode
>(I
))
121 if (PN
->getNumIncomingValues() > 1)
124 BasicBlock
*UseBB
= I
->getParent();
126 // As a special case, treat uses by an llvm.coro.suspend.retcon
127 // as if they were uses in the suspend's single predecessor: the
128 // uses conceptually occur before the suspend.
129 if (isa
<CoroSuspendRetconInst
>(I
)) {
130 UseBB
= UseBB
->getSinglePredecessor();
131 assert(UseBB
&& "should have split coro.suspend into its own block");
134 return hasPathCrossingSuspendPoint(DefBB
, UseBB
);
137 bool isDefinitionAcrossSuspend(Argument
&A
, User
*U
) const {
138 return isDefinitionAcrossSuspend(&A
.getParent()->getEntryBlock(), U
);
141 bool isDefinitionAcrossSuspend(Instruction
&I
, User
*U
) const {
142 auto *DefBB
= I
.getParent();
144 // As a special case, treat values produced by an llvm.coro.suspend.*
145 // as if they were defined in the single successor: the uses
146 // conceptually occur after the suspend.
147 if (isa
<AnyCoroSuspendInst
>(I
)) {
148 DefBB
= DefBB
->getSingleSuccessor();
149 assert(DefBB
&& "should have split coro.suspend into its own block");
152 return isDefinitionAcrossSuspend(DefBB
, U
);
155 } // end anonymous namespace
157 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
158 LLVM_DUMP_METHOD
void SuspendCrossingInfo::dump(StringRef Label
,
159 BitVector
const &BV
) const {
160 dbgs() << Label
<< ":";
161 for (size_t I
= 0, N
= BV
.size(); I
< N
; ++I
)
163 dbgs() << " " << Mapping
.indexToBlock(I
)->getName();
167 LLVM_DUMP_METHOD
void SuspendCrossingInfo::dump() const {
168 for (size_t I
= 0, N
= Block
.size(); I
< N
; ++I
) {
169 BasicBlock
*const B
= Mapping
.indexToBlock(I
);
170 dbgs() << B
->getName() << ":\n";
171 dump(" Consumes", Block
[I
].Consumes
);
172 dump(" Kills", Block
[I
].Kills
);
178 SuspendCrossingInfo::SuspendCrossingInfo(Function
&F
, coro::Shape
&Shape
)
180 const size_t N
= Mapping
.size();
183 // Initialize every block so that it consumes itself
184 for (size_t I
= 0; I
< N
; ++I
) {
186 B
.Consumes
.resize(N
);
191 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
192 // the code beyond coro.end is reachable during initial invocation of the
194 for (auto *CE
: Shape
.CoroEnds
)
195 getBlockData(CE
->getParent()).End
= true;
197 // Mark all suspend blocks and indicate that they kill everything they
198 // consume. Note, that crossing coro.save also requires a spill, as any code
199 // between coro.save and coro.suspend may resume the coroutine and all of the
200 // state needs to be saved by that time.
201 auto markSuspendBlock
= [&](IntrinsicInst
*BarrierInst
) {
202 BasicBlock
*SuspendBlock
= BarrierInst
->getParent();
203 auto &B
= getBlockData(SuspendBlock
);
205 B
.Kills
|= B
.Consumes
;
207 for (auto *CSI
: Shape
.CoroSuspends
) {
208 markSuspendBlock(CSI
);
209 if (auto *Save
= CSI
->getCoroSave())
210 markSuspendBlock(Save
);
213 // Iterate propagating consumes and kills until they stop changing.
219 LLVM_DEBUG(dbgs() << "iteration " << ++Iteration
);
220 LLVM_DEBUG(dbgs() << "==============\n");
223 for (size_t I
= 0; I
< N
; ++I
) {
225 for (BasicBlock
*SI
: successors(B
)) {
227 auto SuccNo
= Mapping
.blockToIndex(SI
);
229 // Saved Consumes and Kills bitsets so that it is easy to see
230 // if anything changed after propagation.
231 auto &S
= Block
[SuccNo
];
232 auto SavedConsumes
= S
.Consumes
;
233 auto SavedKills
= S
.Kills
;
235 // Propagate Kills and Consumes from block B into its successor S.
236 S
.Consumes
|= B
.Consumes
;
239 // If block B is a suspend block, it should propagate kills into the
240 // its successor for every block B consumes.
242 S
.Kills
|= B
.Consumes
;
245 // If block S is a suspend block, it should kill all of the blocks it
247 S
.Kills
|= S
.Consumes
;
249 // If block S is an end block, it should not propagate kills as the
250 // blocks following coro.end() are reached during initial invocation
251 // of the coroutine while all the data are still available on the
252 // stack or in the registers.
255 // This is reached when S block it not Suspend nor coro.end and it
256 // need to make sure that it is not in the kill set.
257 S
.Kills
.reset(SuccNo
);
260 // See if anything changed.
261 Changed
|= (S
.Kills
!= SavedKills
) || (S
.Consumes
!= SavedConsumes
);
263 if (S
.Kills
!= SavedKills
) {
264 LLVM_DEBUG(dbgs() << "\nblock " << I
<< " follower " << SI
->getName()
266 LLVM_DEBUG(dump("S.Kills", S
.Kills
));
267 LLVM_DEBUG(dump("SavedKills", SavedKills
));
269 if (S
.Consumes
!= SavedConsumes
) {
270 LLVM_DEBUG(dbgs() << "\nblock " << I
<< " follower " << SI
<< "\n");
271 LLVM_DEBUG(dump("S.Consume", S
.Consumes
));
272 LLVM_DEBUG(dump("SavedCons", SavedConsumes
));
280 #undef DEBUG_TYPE // "coro-suspend-crossing"
281 #define DEBUG_TYPE "coro-frame"
283 // We build up the list of spills for every case where a use is separated
284 // from the definition by a suspend point.
286 static const unsigned InvalidFieldIndex
= ~0U;
290 Value
*Def
= nullptr;
291 Instruction
*User
= nullptr;
292 unsigned FieldNo
= InvalidFieldIndex
;
295 Spill(Value
*Def
, llvm::User
*U
) : Def(Def
), User(cast
<Instruction
>(U
)) {}
297 Value
*def() const { return Def
; }
298 Instruction
*user() const { return User
; }
299 BasicBlock
*userBlock() const { return User
->getParent(); }
301 // Note that field index is stored in the first SpillEntry for a particular
302 // definition. Subsequent mentions of a defintion do not have fieldNo
303 // assigned. This works out fine as the users of Spills capture the info about
304 // the definition the first time they encounter it. Consider refactoring
305 // SpillInfo into two arrays to normalize the spill representation.
306 unsigned fieldIndex() const {
307 assert(FieldNo
!= InvalidFieldIndex
&& "Accessing unassigned field");
310 void setFieldIndex(unsigned FieldNumber
) {
311 assert(FieldNo
== InvalidFieldIndex
&& "Reassigning field number");
312 FieldNo
= FieldNumber
;
317 // Note that there may be more than one record with the same value of Def in
318 // the SpillInfo vector.
319 using SpillInfo
= SmallVector
<Spill
, 8>;
322 static void dump(StringRef Title
, SpillInfo
const &Spills
) {
323 dbgs() << "------------- " << Title
<< "--------------\n";
324 Value
*CurrentValue
= nullptr;
325 for (auto const &E
: Spills
) {
326 if (CurrentValue
!= E
.def()) {
327 CurrentValue
= E
.def();
328 CurrentValue
->dump();
337 // We cannot rely solely on natural alignment of a type when building a
338 // coroutine frame and if the alignment specified on the Alloca instruction
339 // differs from the natural alignment of the alloca type we will need to insert
341 struct PaddingCalculator
{
342 const DataLayout
&DL
;
343 LLVMContext
&Context
;
344 unsigned StructSize
= 0;
346 PaddingCalculator(LLVMContext
&Context
, DataLayout
const &DL
)
347 : DL(DL
), Context(Context
) {}
349 // Replicate the logic from IR/DataLayout.cpp to match field offset
350 // computation for LLVM structs.
351 void addType(Type
*Ty
) {
352 unsigned TyAlign
= DL
.getABITypeAlignment(Ty
);
353 if ((StructSize
& (TyAlign
- 1)) != 0)
354 StructSize
= alignTo(StructSize
, TyAlign
);
356 StructSize
+= DL
.getTypeAllocSize(Ty
); // Consume space for this data item.
359 void addTypes(SmallVectorImpl
<Type
*> const &Types
) {
360 for (auto *Ty
: Types
)
364 unsigned computePadding(Type
*Ty
, unsigned ForcedAlignment
) {
365 unsigned TyAlign
= DL
.getABITypeAlignment(Ty
);
366 auto Natural
= alignTo(StructSize
, TyAlign
);
367 auto Forced
= alignTo(StructSize
, ForcedAlignment
);
369 // Return how many bytes of padding we need to insert.
370 if (Natural
!= Forced
)
371 return std::max(Natural
, Forced
) - StructSize
;
373 // Rely on natural alignment.
377 // If padding required, return the padding field type to insert.
378 ArrayType
*getPaddingType(Type
*Ty
, unsigned ForcedAlignment
) {
379 if (auto Padding
= computePadding(Ty
, ForcedAlignment
))
380 return ArrayType::get(Type::getInt8Ty(Context
), Padding
);
387 // Build a struct that will keep state for an active coroutine.
389 // ResumeFnTy ResumeFnAddr;
390 // ResumeFnTy DestroyFnAddr;
392 // ... promise (if present) ...
395 static StructType
*buildFrameType(Function
&F
, coro::Shape
&Shape
,
397 LLVMContext
&C
= F
.getContext();
398 const DataLayout
&DL
= F
.getParent()->getDataLayout();
399 PaddingCalculator
Padder(C
, DL
);
400 SmallString
<32> Name(F
.getName());
401 Name
.append(".Frame");
402 StructType
*FrameTy
= StructType::create(C
, Name
);
403 SmallVector
<Type
*, 8> Types
;
405 AllocaInst
*PromiseAlloca
= Shape
.getPromiseAlloca();
407 if (Shape
.ABI
== coro::ABI::Switch
) {
408 auto *FramePtrTy
= FrameTy
->getPointerTo();
409 auto *FnTy
= FunctionType::get(Type::getVoidTy(C
), FramePtrTy
,
411 auto *FnPtrTy
= FnTy
->getPointerTo();
413 // Figure out how wide should be an integer type storing the suspend index.
414 unsigned IndexBits
= std::max(1U, Log2_64_Ceil(Shape
.CoroSuspends
.size()));
415 Type
*PromiseType
= PromiseAlloca
416 ? PromiseAlloca
->getType()->getElementType()
417 : Type::getInt1Ty(C
);
418 Type
*IndexType
= Type::getIntNTy(C
, IndexBits
);
419 Types
.push_back(FnPtrTy
);
420 Types
.push_back(FnPtrTy
);
421 Types
.push_back(PromiseType
);
422 Types
.push_back(IndexType
);
424 assert(PromiseAlloca
== nullptr && "lowering doesn't support promises");
427 Value
*CurrentDef
= nullptr;
429 Padder
.addTypes(Types
);
431 // Create an entry for every spilled value.
432 for (auto &S
: Spills
) {
433 if (CurrentDef
== S
.def())
436 CurrentDef
= S
.def();
437 // PromiseAlloca was already added to Types array earlier.
438 if (CurrentDef
== PromiseAlloca
)
443 if (auto *AI
= dyn_cast
<AllocaInst
>(CurrentDef
)) {
444 Ty
= AI
->getAllocatedType();
445 if (unsigned AllocaAlignment
= AI
->getAlignment()) {
446 // If alignment is specified in alloca, see if we need to insert extra
448 if (auto PaddingTy
= Padder
.getPaddingType(Ty
, AllocaAlignment
)) {
449 Types
.push_back(PaddingTy
);
450 Padder
.addType(PaddingTy
);
453 if (auto *CI
= dyn_cast
<ConstantInt
>(AI
->getArraySize()))
454 Count
= CI
->getValue().getZExtValue();
456 report_fatal_error("Coroutines cannot handle non static allocas yet");
458 Ty
= CurrentDef
->getType();
460 S
.setFieldIndex(Types
.size());
464 Types
.push_back(ArrayType::get(Ty
, Count
));
467 FrameTy
->setBody(Types
);
470 case coro::ABI::Switch
:
473 // Remember whether the frame is inline in the storage.
474 case coro::ABI::Retcon
:
475 case coro::ABI::RetconOnce
: {
476 auto &Layout
= F
.getParent()->getDataLayout();
477 auto Id
= Shape
.getRetconCoroId();
478 Shape
.RetconLowering
.IsFrameInlineInStorage
479 = (Layout
.getTypeAllocSize(FrameTy
) <= Id
->getStorageSize() &&
480 Layout
.getABITypeAlignment(FrameTy
) <= Id
->getStorageAlignment());
488 // We use a pointer use visitor to discover if there are any writes into an
489 // alloca that dominates CoroBegin. If that is the case, insertSpills will copy
490 // the value from the alloca into the coroutine frame spill slot corresponding
493 struct AllocaUseVisitor
: PtrUseVisitor
<AllocaUseVisitor
> {
494 using Base
= PtrUseVisitor
<AllocaUseVisitor
>;
495 AllocaUseVisitor(const DataLayout
&DL
, const DominatorTree
&DT
,
496 const CoroBeginInst
&CB
)
497 : PtrUseVisitor(DL
), DT(DT
), CoroBegin(CB
) {}
499 // We are only interested in uses that dominate coro.begin.
500 void visit(Instruction
&I
) {
501 if (DT
.dominates(&I
, &CoroBegin
))
504 // We need to provide this overload as PtrUseVisitor uses a pointer based
505 // visiting function.
506 void visit(Instruction
*I
) { return visit(*I
); }
508 void visitLoadInst(LoadInst
&) {} // Good. Nothing to do.
510 // If the use is an operand, the pointer escaped and anything can write into
511 // that memory. If the use is the pointer, we are definitely writing into the
512 // alloca and therefore we need to copy.
513 void visitStoreInst(StoreInst
&SI
) { PI
.setAborted(&SI
); }
515 // Any other instruction that is not filtered out by PtrUseVisitor, will
516 // result in the copy.
517 void visitInstruction(Instruction
&I
) { PI
.setAborted(&I
); }
520 const DominatorTree
&DT
;
521 const CoroBeginInst
&CoroBegin
;
524 static bool mightWriteIntoAllocaPtr(AllocaInst
&A
, const DominatorTree
&DT
,
525 const CoroBeginInst
&CB
) {
526 const DataLayout
&DL
= A
.getModule()->getDataLayout();
527 AllocaUseVisitor
Visitor(DL
, DT
, CB
);
528 auto PtrI
= Visitor
.visitPtr(A
);
529 if (PtrI
.isEscaped() || PtrI
.isAborted()) {
530 auto *PointerEscapingInstr
= PtrI
.getEscapingInst()
531 ? PtrI
.getEscapingInst()
532 : PtrI
.getAbortingInst();
533 if (PointerEscapingInstr
) {
535 dbgs() << "AllocaInst copy was triggered by instruction: "
536 << *PointerEscapingInstr
<< "\n");
543 // We need to make room to insert a spill after initial PHIs, but before
544 // catchswitch instruction. Placing it before violates the requirement that
545 // catchswitch, like all other EHPads must be the first nonPHI in a block.
547 // Split away catchswitch into a separate block and insert in its place:
549 // cleanuppad <InsertPt> cleanupret.
551 // cleanupret instruction will act as an insert point for the spill.
552 static Instruction
*splitBeforeCatchSwitch(CatchSwitchInst
*CatchSwitch
) {
553 BasicBlock
*CurrentBlock
= CatchSwitch
->getParent();
554 BasicBlock
*NewBlock
= CurrentBlock
->splitBasicBlock(CatchSwitch
);
555 CurrentBlock
->getTerminator()->eraseFromParent();
558 CleanupPadInst::Create(CatchSwitch
->getParentPad(), {}, "", CurrentBlock
);
560 CleanupReturnInst::Create(CleanupPad
, NewBlock
, CurrentBlock
);
564 // Replace all alloca and SSA values that are accessed across suspend points
565 // with GetElementPointer from coroutine frame + loads and stores. Create an
566 // AllocaSpillBB that will become the new entry block for the resume parts of
569 // %hdl = coro.begin(...)
574 // %hdl = coro.begin(...)
575 // %FramePtr = bitcast i8* hdl to %f.frame*
576 // br label %AllocaSpillBB
579 // ; geps corresponding to allocas that were moved to coroutine frame
580 // br label PostSpill
586 static Instruction
*insertSpills(const SpillInfo
&Spills
, coro::Shape
&Shape
) {
587 auto *CB
= Shape
.CoroBegin
;
588 LLVMContext
&C
= CB
->getContext();
589 IRBuilder
<> Builder(CB
->getNextNode());
590 StructType
*FrameTy
= Shape
.FrameTy
;
591 PointerType
*FramePtrTy
= FrameTy
->getPointerTo();
593 cast
<Instruction
>(Builder
.CreateBitCast(CB
, FramePtrTy
, "FramePtr"));
594 DominatorTree
DT(*CB
->getFunction());
596 Value
*CurrentValue
= nullptr;
597 BasicBlock
*CurrentBlock
= nullptr;
598 Value
*CurrentReload
= nullptr;
600 // Proper field number will be read from field definition.
601 unsigned Index
= InvalidFieldIndex
;
603 // We need to keep track of any allocas that need "spilling"
604 // since they will live in the coroutine frame now, all access to them
605 // need to be changed, not just the access across suspend points
606 // we remember allocas and their indices to be handled once we processed
608 SmallVector
<std::pair
<AllocaInst
*, unsigned>, 4> Allocas
;
609 // Promise alloca (if present) has a fixed field number.
610 if (auto *PromiseAlloca
= Shape
.getPromiseAlloca()) {
611 assert(Shape
.ABI
== coro::ABI::Switch
);
612 Allocas
.emplace_back(PromiseAlloca
, coro::Shape::SwitchFieldIndex::Promise
);
615 // Create a GEP with the given index into the coroutine frame for the original
616 // value Orig. Appends an extra 0 index for array-allocas, preserving the
618 auto GetFramePointer
= [&](uint32_t Index
, Value
*Orig
) -> Value
* {
619 SmallVector
<Value
*, 3> Indices
= {
620 ConstantInt::get(Type::getInt32Ty(C
), 0),
621 ConstantInt::get(Type::getInt32Ty(C
), Index
),
624 if (auto *AI
= dyn_cast
<AllocaInst
>(Orig
)) {
625 if (auto *CI
= dyn_cast
<ConstantInt
>(AI
->getArraySize())) {
626 auto Count
= CI
->getValue().getZExtValue();
628 Indices
.push_back(ConstantInt::get(Type::getInt32Ty(C
), 0));
631 report_fatal_error("Coroutines cannot handle non static allocas yet");
635 return Builder
.CreateInBoundsGEP(FrameTy
, FramePtr
, Indices
);
638 // Create a load instruction to reload the spilled value from the coroutine
640 auto CreateReload
= [&](Instruction
*InsertBefore
) {
641 assert(Index
!= InvalidFieldIndex
&& "accessing unassigned field number");
642 Builder
.SetInsertPoint(InsertBefore
);
644 auto *G
= GetFramePointer(Index
, CurrentValue
);
645 G
->setName(CurrentValue
->getName() + Twine(".reload.addr"));
647 return isa
<AllocaInst
>(CurrentValue
)
649 : Builder
.CreateLoad(FrameTy
->getElementType(Index
), G
,
650 CurrentValue
->getName() + Twine(".reload"));
653 for (auto const &E
: Spills
) {
654 // If we have not seen the value, generate a spill.
655 if (CurrentValue
!= E
.def()) {
656 CurrentValue
= E
.def();
657 CurrentBlock
= nullptr;
658 CurrentReload
= nullptr;
660 Index
= E
.fieldIndex();
662 if (auto *AI
= dyn_cast
<AllocaInst
>(CurrentValue
)) {
663 // Spilled AllocaInst will be replaced with GEP from the coroutine frame
664 // there is no spill required.
665 Allocas
.emplace_back(AI
, Index
);
666 if (!AI
->isStaticAlloca())
667 report_fatal_error("Coroutines cannot handle non static allocas yet");
669 // Otherwise, create a store instruction storing the value into the
672 Instruction
*InsertPt
= nullptr;
673 if (auto Arg
= dyn_cast
<Argument
>(CurrentValue
)) {
674 // For arguments, we will place the store instruction right after
675 // the coroutine frame pointer instruction, i.e. bitcast of
676 // coro.begin from i8* to %f.frame*.
677 InsertPt
= FramePtr
->getNextNode();
679 // If we're spilling an Argument, make sure we clear 'nocapture'
680 // from the coroutine function.
681 Arg
->getParent()->removeParamAttr(Arg
->getArgNo(),
682 Attribute::NoCapture
);
684 } else if (auto *II
= dyn_cast
<InvokeInst
>(CurrentValue
)) {
685 // If we are spilling the result of the invoke instruction, split the
686 // normal edge and insert the spill in the new block.
687 auto NewBB
= SplitEdge(II
->getParent(), II
->getNormalDest());
688 InsertPt
= NewBB
->getTerminator();
689 } else if (isa
<PHINode
>(CurrentValue
)) {
690 // Skip the PHINodes and EH pads instructions.
691 BasicBlock
*DefBlock
= cast
<Instruction
>(E
.def())->getParent();
692 if (auto *CSI
= dyn_cast
<CatchSwitchInst
>(DefBlock
->getTerminator()))
693 InsertPt
= splitBeforeCatchSwitch(CSI
);
695 InsertPt
= &*DefBlock
->getFirstInsertionPt();
696 } else if (auto CSI
= dyn_cast
<AnyCoroSuspendInst
>(CurrentValue
)) {
697 // Don't spill immediately after a suspend; splitting assumes
698 // that the suspend will be followed by a branch.
699 InsertPt
= CSI
->getParent()->getSingleSuccessor()->getFirstNonPHI();
701 auto *I
= cast
<Instruction
>(E
.def());
702 assert(!I
->isTerminator() && "unexpected terminator");
703 // For all other values, the spill is placed immediately after
705 if (DT
.dominates(CB
, I
)) {
706 InsertPt
= I
->getNextNode();
708 // Unless, it is not dominated by CoroBegin, then it will be
709 // inserted immediately after CoroFrame is computed.
710 InsertPt
= FramePtr
->getNextNode();
714 Builder
.SetInsertPoint(InsertPt
);
715 auto *G
= Builder
.CreateConstInBoundsGEP2_32(
716 FrameTy
, FramePtr
, 0, Index
,
717 CurrentValue
->getName() + Twine(".spill.addr"));
718 Builder
.CreateStore(CurrentValue
, G
);
722 // If we have not seen the use block, generate a reload in it.
723 if (CurrentBlock
!= E
.userBlock()) {
724 CurrentBlock
= E
.userBlock();
725 CurrentReload
= CreateReload(&*CurrentBlock
->getFirstInsertionPt());
728 // If we have a single edge PHINode, remove it and replace it with a reload
729 // from the coroutine frame. (We already took care of multi edge PHINodes
730 // by rewriting them in the rewritePHIs function).
731 if (auto *PN
= dyn_cast
<PHINode
>(E
.user())) {
732 assert(PN
->getNumIncomingValues() == 1 && "unexpected number of incoming "
733 "values in the PHINode");
734 PN
->replaceAllUsesWith(CurrentReload
);
735 PN
->eraseFromParent();
739 // Replace all uses of CurrentValue in the current instruction with reload.
740 E
.user()->replaceUsesOfWith(CurrentValue
, CurrentReload
);
743 BasicBlock
*FramePtrBB
= FramePtr
->getParent();
746 FramePtrBB
->splitBasicBlock(FramePtr
->getNextNode(), "AllocaSpillBB");
747 SpillBlock
->splitBasicBlock(&SpillBlock
->front(), "PostSpill");
748 Shape
.AllocaSpillBlock
= SpillBlock
;
749 // If we found any allocas, replace all of their remaining uses with Geps.
750 // Note: we cannot do it indiscriminately as some of the uses may not be
751 // dominated by CoroBegin.
752 bool MightNeedToCopy
= false;
753 Builder
.SetInsertPoint(&Shape
.AllocaSpillBlock
->front());
754 SmallVector
<Instruction
*, 4> UsersToUpdate
;
755 for (auto &P
: Allocas
) {
756 AllocaInst
*const A
= P
.first
;
757 UsersToUpdate
.clear();
758 for (User
*U
: A
->users()) {
759 auto *I
= cast
<Instruction
>(U
);
760 if (DT
.dominates(CB
, I
))
761 UsersToUpdate
.push_back(I
);
763 MightNeedToCopy
= true;
765 if (!UsersToUpdate
.empty()) {
766 auto *G
= GetFramePointer(P
.second
, A
);
768 for (Instruction
*I
: UsersToUpdate
)
769 I
->replaceUsesOfWith(A
, G
);
772 // If we discovered such uses not dominated by CoroBegin, see if any of them
773 // preceed coro begin and have instructions that can modify the
774 // value of the alloca and therefore would require a copying the value into
775 // the spill slot in the coroutine frame.
776 if (MightNeedToCopy
) {
777 Builder
.SetInsertPoint(FramePtr
->getNextNode());
779 for (auto &P
: Allocas
) {
780 AllocaInst
*const A
= P
.first
;
781 if (mightWriteIntoAllocaPtr(*A
, DT
, *CB
)) {
782 if (A
->isArrayAllocation())
784 "Coroutines cannot handle copying of array allocas yet");
786 auto *G
= GetFramePointer(P
.second
, A
);
787 auto *Value
= Builder
.CreateLoad(A
);
788 Builder
.CreateStore(Value
, G
);
795 // Sets the unwind edge of an instruction to a particular successor.
796 static void setUnwindEdgeTo(Instruction
*TI
, BasicBlock
*Succ
) {
797 if (auto *II
= dyn_cast
<InvokeInst
>(TI
))
798 II
->setUnwindDest(Succ
);
799 else if (auto *CS
= dyn_cast
<CatchSwitchInst
>(TI
))
800 CS
->setUnwindDest(Succ
);
801 else if (auto *CR
= dyn_cast
<CleanupReturnInst
>(TI
))
802 CR
->setUnwindDest(Succ
);
804 llvm_unreachable("unexpected terminator instruction");
807 // Replaces all uses of OldPred with the NewPred block in all PHINodes in a
809 static void updatePhiNodes(BasicBlock
*DestBB
, BasicBlock
*OldPred
,
811 PHINode
*LandingPadReplacement
) {
813 for (BasicBlock::iterator I
= DestBB
->begin(); isa
<PHINode
>(I
); ++I
) {
814 PHINode
*PN
= cast
<PHINode
>(I
);
816 // We manually update the LandingPadReplacement PHINode and it is the last
817 // PHI Node. So, if we find it, we are done.
818 if (LandingPadReplacement
== PN
)
821 // Reuse the previous value of BBIdx if it lines up. In cases where we
822 // have multiple phi nodes with *lots* of predecessors, this is a speed
823 // win because we don't have to scan the PHI looking for TIBB. This
824 // happens because the BB list of PHI nodes are usually in the same
826 if (PN
->getIncomingBlock(BBIdx
) != OldPred
)
827 BBIdx
= PN
->getBasicBlockIndex(OldPred
);
829 assert(BBIdx
!= (unsigned)-1 && "Invalid PHI Index!");
830 PN
->setIncomingBlock(BBIdx
, NewPred
);
834 // Uses SplitEdge unless the successor block is an EHPad, in which case do EH
835 // specific handling.
836 static BasicBlock
*ehAwareSplitEdge(BasicBlock
*BB
, BasicBlock
*Succ
,
837 LandingPadInst
*OriginalPad
,
838 PHINode
*LandingPadReplacement
) {
839 auto *PadInst
= Succ
->getFirstNonPHI();
840 if (!LandingPadReplacement
&& !PadInst
->isEHPad())
841 return SplitEdge(BB
, Succ
);
843 auto *NewBB
= BasicBlock::Create(BB
->getContext(), "", BB
->getParent(), Succ
);
844 setUnwindEdgeTo(BB
->getTerminator(), NewBB
);
845 updatePhiNodes(Succ
, BB
, NewBB
, LandingPadReplacement
);
847 if (LandingPadReplacement
) {
848 auto *NewLP
= OriginalPad
->clone();
849 auto *Terminator
= BranchInst::Create(Succ
, NewBB
);
850 NewLP
->insertBefore(Terminator
);
851 LandingPadReplacement
->addIncoming(NewLP
, NewBB
);
854 Value
*ParentPad
= nullptr;
855 if (auto *FuncletPad
= dyn_cast
<FuncletPadInst
>(PadInst
))
856 ParentPad
= FuncletPad
->getParentPad();
857 else if (auto *CatchSwitch
= dyn_cast
<CatchSwitchInst
>(PadInst
))
858 ParentPad
= CatchSwitch
->getParentPad();
860 llvm_unreachable("handling for other EHPads not implemented yet");
862 auto *NewCleanupPad
= CleanupPadInst::Create(ParentPad
, {}, "", NewBB
);
863 CleanupReturnInst::Create(NewCleanupPad
, Succ
, NewBB
);
867 static void rewritePHIs(BasicBlock
&BB
) {
868 // For every incoming edge we will create a block holding all
869 // incoming values in a single PHI nodes.
872 // %n.val = phi i32[%n, %entry], [%inc, %loop]
877 // %n.loop.pre = phi i32 [%n, %entry]
880 // %inc.loop.pre = phi i32 [%inc, %loop]
883 // After this rewrite, further analysis will ignore any phi nodes with more
884 // than one incoming edge.
886 // TODO: Simplify PHINodes in the basic block to remove duplicate
889 LandingPadInst
*LandingPad
= nullptr;
890 PHINode
*ReplPHI
= nullptr;
891 if ((LandingPad
= dyn_cast_or_null
<LandingPadInst
>(BB
.getFirstNonPHI()))) {
892 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
893 // We replace the original landing pad with a PHINode that will collect the
894 // results from all of them.
895 ReplPHI
= PHINode::Create(LandingPad
->getType(), 1, "", LandingPad
);
896 ReplPHI
->takeName(LandingPad
);
897 LandingPad
->replaceAllUsesWith(ReplPHI
);
898 // We will erase the original landing pad at the end of this function after
899 // ehAwareSplitEdge cloned it in the transition blocks.
902 SmallVector
<BasicBlock
*, 8> Preds(pred_begin(&BB
), pred_end(&BB
));
903 for (BasicBlock
*Pred
: Preds
) {
904 auto *IncomingBB
= ehAwareSplitEdge(Pred
, &BB
, LandingPad
, ReplPHI
);
905 IncomingBB
->setName(BB
.getName() + Twine(".from.") + Pred
->getName());
906 auto *PN
= cast
<PHINode
>(&BB
.front());
908 int Index
= PN
->getBasicBlockIndex(IncomingBB
);
909 Value
*V
= PN
->getIncomingValue(Index
);
910 PHINode
*InputV
= PHINode::Create(
911 V
->getType(), 1, V
->getName() + Twine(".") + BB
.getName(),
912 &IncomingBB
->front());
913 InputV
->addIncoming(V
, Pred
);
914 PN
->setIncomingValue(Index
, InputV
);
915 PN
= dyn_cast
<PHINode
>(PN
->getNextNode());
916 } while (PN
!= ReplPHI
); // ReplPHI is either null or the PHI that replaced
921 // Calls to ehAwareSplitEdge function cloned the original lading pad.
922 // No longer need it.
923 LandingPad
->eraseFromParent();
927 static void rewritePHIs(Function
&F
) {
928 SmallVector
<BasicBlock
*, 8> WorkList
;
930 for (BasicBlock
&BB
: F
)
931 if (auto *PN
= dyn_cast
<PHINode
>(&BB
.front()))
932 if (PN
->getNumIncomingValues() > 1)
933 WorkList
.push_back(&BB
);
935 for (BasicBlock
*BB
: WorkList
)
939 // Check for instructions that we can recreate on resume as opposed to spill
940 // the result into a coroutine frame.
941 static bool materializable(Instruction
&V
) {
942 return isa
<CastInst
>(&V
) || isa
<GetElementPtrInst
>(&V
) ||
943 isa
<BinaryOperator
>(&V
) || isa
<CmpInst
>(&V
) || isa
<SelectInst
>(&V
);
946 // Check for structural coroutine intrinsics that should not be spilled into
947 // the coroutine frame.
948 static bool isCoroutineStructureIntrinsic(Instruction
&I
) {
949 return isa
<CoroIdInst
>(&I
) || isa
<CoroSaveInst
>(&I
) ||
950 isa
<CoroSuspendInst
>(&I
);
953 // For every use of the value that is across suspend point, recreate that value
954 // after a suspend point.
955 static void rewriteMaterializableInstructions(IRBuilder
<> &IRB
,
956 SpillInfo
const &Spills
) {
957 BasicBlock
*CurrentBlock
= nullptr;
958 Instruction
*CurrentMaterialization
= nullptr;
959 Instruction
*CurrentDef
= nullptr;
961 for (auto const &E
: Spills
) {
962 // If it is a new definition, update CurrentXXX variables.
963 if (CurrentDef
!= E
.def()) {
964 CurrentDef
= cast
<Instruction
>(E
.def());
965 CurrentBlock
= nullptr;
966 CurrentMaterialization
= nullptr;
969 // If we have not seen this block, materialize the value.
970 if (CurrentBlock
!= E
.userBlock()) {
971 CurrentBlock
= E
.userBlock();
972 CurrentMaterialization
= cast
<Instruction
>(CurrentDef
)->clone();
973 CurrentMaterialization
->setName(CurrentDef
->getName());
974 CurrentMaterialization
->insertBefore(
975 &*CurrentBlock
->getFirstInsertionPt());
978 if (auto *PN
= dyn_cast
<PHINode
>(E
.user())) {
979 assert(PN
->getNumIncomingValues() == 1 && "unexpected number of incoming "
980 "values in the PHINode");
981 PN
->replaceAllUsesWith(CurrentMaterialization
);
982 PN
->eraseFromParent();
986 // Replace all uses of CurrentDef in the current instruction with the
987 // CurrentMaterialization for the block.
988 E
.user()->replaceUsesOfWith(CurrentDef
, CurrentMaterialization
);
992 // Splits the block at a particular instruction unless it is the first
993 // instruction in the block with a single predecessor.
994 static BasicBlock
*splitBlockIfNotFirst(Instruction
*I
, const Twine
&Name
) {
995 auto *BB
= I
->getParent();
996 if (&BB
->front() == I
) {
997 if (BB
->getSinglePredecessor()) {
1002 return BB
->splitBasicBlock(I
, Name
);
1005 // Split above and below a particular instruction so that it
1006 // will be all alone by itself in a block.
1007 static void splitAround(Instruction
*I
, const Twine
&Name
) {
1008 splitBlockIfNotFirst(I
, Name
);
1009 splitBlockIfNotFirst(I
->getNextNode(), "After" + Name
);
1012 static bool isSuspendBlock(BasicBlock
*BB
) {
1013 return isa
<AnyCoroSuspendInst
>(BB
->front());
1016 typedef SmallPtrSet
<BasicBlock
*, 8> VisitedBlocksSet
;
1018 /// Does control flow starting at the given block ever reach a suspend
1019 /// instruction before reaching a block in VisitedOrFreeBBs?
1020 static bool isSuspendReachableFrom(BasicBlock
*From
,
1021 VisitedBlocksSet
&VisitedOrFreeBBs
) {
1022 // Eagerly try to add this block to the visited set. If it's already
1023 // there, stop recursing; this path doesn't reach a suspend before
1024 // either looping or reaching a freeing block.
1025 if (!VisitedOrFreeBBs
.insert(From
).second
)
1028 // We assume that we'll already have split suspends into their own blocks.
1029 if (isSuspendBlock(From
))
1032 // Recurse on the successors.
1033 for (auto Succ
: successors(From
)) {
1034 if (isSuspendReachableFrom(Succ
, VisitedOrFreeBBs
))
1041 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
1043 static bool isLocalAlloca(CoroAllocaAllocInst
*AI
) {
1044 // Seed the visited set with all the basic blocks containing a free
1045 // so that we won't pass them up.
1046 VisitedBlocksSet VisitedOrFreeBBs
;
1047 for (auto User
: AI
->users()) {
1048 if (auto FI
= dyn_cast
<CoroAllocaFreeInst
>(User
))
1049 VisitedOrFreeBBs
.insert(FI
->getParent());
1052 return !isSuspendReachableFrom(AI
->getParent(), VisitedOrFreeBBs
);
1055 /// After we split the coroutine, will the given basic block be along
1056 /// an obvious exit path for the resumption function?
1057 static bool willLeaveFunctionImmediatelyAfter(BasicBlock
*BB
,
1058 unsigned depth
= 3) {
1059 // If we've bottomed out our depth count, stop searching and assume
1060 // that the path might loop back.
1061 if (depth
== 0) return false;
1063 // If this is a suspend block, we're about to exit the resumption function.
1064 if (isSuspendBlock(BB
)) return true;
1066 // Recurse into the successors.
1067 for (auto Succ
: successors(BB
)) {
1068 if (!willLeaveFunctionImmediatelyAfter(Succ
, depth
- 1))
1072 // If none of the successors leads back in a loop, we're on an exit/abort.
1076 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst
*AI
) {
1077 // Look for a free that isn't sufficiently obviously followed by
1078 // either a suspend or a termination, i.e. something that will leave
1079 // the coro resumption frame.
1080 for (auto U
: AI
->users()) {
1081 auto FI
= dyn_cast
<CoroAllocaFreeInst
>(U
);
1084 if (!willLeaveFunctionImmediatelyAfter(FI
->getParent()))
1088 // If we never found one, we don't need a stack save.
1092 /// Turn each of the given local allocas into a normal (dynamic) alloca
1094 static void lowerLocalAllocas(ArrayRef
<CoroAllocaAllocInst
*> LocalAllocas
,
1095 SmallVectorImpl
<Instruction
*> &DeadInsts
) {
1096 for (auto AI
: LocalAllocas
) {
1097 auto M
= AI
->getModule();
1098 IRBuilder
<> Builder(AI
);
1100 // Save the stack depth. Try to avoid doing this if the stackrestore
1101 // is going to immediately precede a return or something.
1102 Value
*StackSave
= nullptr;
1103 if (localAllocaNeedsStackSave(AI
))
1104 StackSave
= Builder
.CreateCall(
1105 Intrinsic::getDeclaration(M
, Intrinsic::stacksave
));
1108 auto Alloca
= Builder
.CreateAlloca(Builder
.getInt8Ty(), AI
->getSize());
1109 Alloca
->setAlignment(MaybeAlign(AI
->getAlignment()));
1111 for (auto U
: AI
->users()) {
1112 // Replace gets with the allocation.
1113 if (isa
<CoroAllocaGetInst
>(U
)) {
1114 U
->replaceAllUsesWith(Alloca
);
1116 // Replace frees with stackrestores. This is safe because
1117 // alloca.alloc is required to obey a stack discipline, although we
1118 // don't enforce that structurally.
1120 auto FI
= cast
<CoroAllocaFreeInst
>(U
);
1122 Builder
.SetInsertPoint(FI
);
1124 Intrinsic::getDeclaration(M
, Intrinsic::stackrestore
),
1128 DeadInsts
.push_back(cast
<Instruction
>(U
));
1131 DeadInsts
.push_back(AI
);
1135 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
1136 /// This happens during the all-instructions iteration, so it must not
1137 /// delete the call.
1138 static Instruction
*lowerNonLocalAlloca(CoroAllocaAllocInst
*AI
,
1140 SmallVectorImpl
<Instruction
*> &DeadInsts
) {
1141 IRBuilder
<> Builder(AI
);
1142 auto Alloc
= Shape
.emitAlloc(Builder
, AI
->getSize(), nullptr);
1144 for (User
*U
: AI
->users()) {
1145 if (isa
<CoroAllocaGetInst
>(U
)) {
1146 U
->replaceAllUsesWith(Alloc
);
1148 auto FI
= cast
<CoroAllocaFreeInst
>(U
);
1149 Builder
.SetInsertPoint(FI
);
1150 Shape
.emitDealloc(Builder
, Alloc
, nullptr);
1152 DeadInsts
.push_back(cast
<Instruction
>(U
));
1155 // Push this on last so that it gets deleted after all the others.
1156 DeadInsts
.push_back(AI
);
1158 // Return the new allocation value so that we can check for needed spills.
1159 return cast
<Instruction
>(Alloc
);
1162 /// Get the current swifterror value.
1163 static Value
*emitGetSwiftErrorValue(IRBuilder
<> &Builder
, Type
*ValueTy
,
1164 coro::Shape
&Shape
) {
1165 // Make a fake function pointer as a sort of intrinsic.
1166 auto FnTy
= FunctionType::get(ValueTy
, {}, false);
1167 auto Fn
= ConstantPointerNull::get(FnTy
->getPointerTo());
1169 auto Call
= Builder
.CreateCall(Fn
, {});
1170 Shape
.SwiftErrorOps
.push_back(Call
);
1175 /// Set the given value as the current swifterror value.
1177 /// Returns a slot that can be used as a swifterror slot.
1178 static Value
*emitSetSwiftErrorValue(IRBuilder
<> &Builder
, Value
*V
,
1179 coro::Shape
&Shape
) {
1180 // Make a fake function pointer as a sort of intrinsic.
1181 auto FnTy
= FunctionType::get(V
->getType()->getPointerTo(),
1182 {V
->getType()}, false);
1183 auto Fn
= ConstantPointerNull::get(FnTy
->getPointerTo());
1185 auto Call
= Builder
.CreateCall(Fn
, { V
});
1186 Shape
.SwiftErrorOps
.push_back(Call
);
1191 /// Set the swifterror value from the given alloca before a call,
1192 /// then put in back in the alloca afterwards.
1194 /// Returns an address that will stand in for the swifterror slot
1195 /// until splitting.
1196 static Value
*emitSetAndGetSwiftErrorValueAround(Instruction
*Call
,
1198 coro::Shape
&Shape
) {
1199 auto ValueTy
= Alloca
->getAllocatedType();
1200 IRBuilder
<> Builder(Call
);
1202 // Load the current value from the alloca and set it as the
1203 // swifterror value.
1204 auto ValueBeforeCall
= Builder
.CreateLoad(ValueTy
, Alloca
);
1205 auto Addr
= emitSetSwiftErrorValue(Builder
, ValueBeforeCall
, Shape
);
1207 // Move to after the call. Since swifterror only has a guaranteed
1208 // value on normal exits, we can ignore implicit and explicit unwind
1210 if (isa
<CallInst
>(Call
)) {
1211 Builder
.SetInsertPoint(Call
->getNextNode());
1213 auto Invoke
= cast
<InvokeInst
>(Call
);
1214 Builder
.SetInsertPoint(Invoke
->getNormalDest()->getFirstNonPHIOrDbg());
1217 // Get the current swifterror value and store it to the alloca.
1218 auto ValueAfterCall
= emitGetSwiftErrorValue(Builder
, ValueTy
, Shape
);
1219 Builder
.CreateStore(ValueAfterCall
, Alloca
);
1224 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1225 /// intrinsics and attempting to MemToReg the alloca away.
1226 static void eliminateSwiftErrorAlloca(Function
&F
, AllocaInst
*Alloca
,
1227 coro::Shape
&Shape
) {
1228 for (auto UI
= Alloca
->use_begin(), UE
= Alloca
->use_end(); UI
!= UE
; ) {
1229 // We're likely changing the use list, so use a mutation-safe
1230 // iteration pattern.
1234 // swifterror values can only be used in very specific ways.
1235 // We take advantage of that here.
1236 auto User
= Use
.getUser();
1237 if (isa
<LoadInst
>(User
) || isa
<StoreInst
>(User
))
1240 assert(isa
<CallInst
>(User
) || isa
<InvokeInst
>(User
));
1241 auto Call
= cast
<Instruction
>(User
);
1243 auto Addr
= emitSetAndGetSwiftErrorValueAround(Call
, Alloca
, Shape
);
1245 // Use the returned slot address as the call argument.
1249 // All the uses should be loads and stores now.
1250 assert(isAllocaPromotable(Alloca
));
1253 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1254 /// and then loading and storing in the prologue and epilog.
1256 /// The argument keeps the swifterror flag.
1257 static void eliminateSwiftErrorArgument(Function
&F
, Argument
&Arg
,
1259 SmallVectorImpl
<AllocaInst
*> &AllocasToPromote
) {
1260 IRBuilder
<> Builder(F
.getEntryBlock().getFirstNonPHIOrDbg());
1262 auto ArgTy
= cast
<PointerType
>(Arg
.getType());
1263 auto ValueTy
= ArgTy
->getElementType();
1265 // Reduce to the alloca case:
1267 // Create an alloca and replace all uses of the arg with it.
1268 auto Alloca
= Builder
.CreateAlloca(ValueTy
, ArgTy
->getAddressSpace());
1269 Arg
.replaceAllUsesWith(Alloca
);
1271 // Set an initial value in the alloca. swifterror is always null on entry.
1272 auto InitialValue
= Constant::getNullValue(ValueTy
);
1273 Builder
.CreateStore(InitialValue
, Alloca
);
1275 // Find all the suspends in the function and save and restore around them.
1276 for (auto Suspend
: Shape
.CoroSuspends
) {
1277 (void) emitSetAndGetSwiftErrorValueAround(Suspend
, Alloca
, Shape
);
1280 // Find all the coro.ends in the function and restore the error value.
1281 for (auto End
: Shape
.CoroEnds
) {
1282 Builder
.SetInsertPoint(End
);
1283 auto FinalValue
= Builder
.CreateLoad(ValueTy
, Alloca
);
1284 (void) emitSetSwiftErrorValue(Builder
, FinalValue
, Shape
);
1287 // Now we can use the alloca logic.
1288 AllocasToPromote
.push_back(Alloca
);
1289 eliminateSwiftErrorAlloca(F
, Alloca
, Shape
);
1292 /// Eliminate all problematic uses of swifterror arguments and allocas
1293 /// from the function. We'll fix them up later when splitting the function.
1294 static void eliminateSwiftError(Function
&F
, coro::Shape
&Shape
) {
1295 SmallVector
<AllocaInst
*, 4> AllocasToPromote
;
1297 // Look for a swifterror argument.
1298 for (auto &Arg
: F
.args()) {
1299 if (!Arg
.hasSwiftErrorAttr()) continue;
1301 eliminateSwiftErrorArgument(F
, Arg
, Shape
, AllocasToPromote
);
1305 // Look for swifterror allocas.
1306 for (auto &Inst
: F
.getEntryBlock()) {
1307 auto Alloca
= dyn_cast
<AllocaInst
>(&Inst
);
1308 if (!Alloca
|| !Alloca
->isSwiftError()) continue;
1310 // Clear the swifterror flag.
1311 Alloca
->setSwiftError(false);
1313 AllocasToPromote
.push_back(Alloca
);
1314 eliminateSwiftErrorAlloca(F
, Alloca
, Shape
);
1317 // If we have any allocas to promote, compute a dominator tree and
1318 // promote them en masse.
1319 if (!AllocasToPromote
.empty()) {
1320 DominatorTree
DT(F
);
1321 PromoteMemToReg(AllocasToPromote
, DT
);
1325 void coro::buildCoroutineFrame(Function
&F
, Shape
&Shape
) {
1326 // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
1327 // access to local variables.
1330 eliminateSwiftError(F
, Shape
);
1332 if (Shape
.ABI
== coro::ABI::Switch
&&
1333 Shape
.SwitchLowering
.PromiseAlloca
) {
1334 Shape
.getSwitchCoroId()->clearPromise();
1337 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
1338 // intrinsics are in their own blocks to simplify the logic of building up
1339 // SuspendCrossing data.
1340 for (auto *CSI
: Shape
.CoroSuspends
) {
1341 if (auto *Save
= CSI
->getCoroSave())
1342 splitAround(Save
, "CoroSave");
1343 splitAround(CSI
, "CoroSuspend");
1346 // Put CoroEnds into their own blocks.
1347 for (CoroEndInst
*CE
: Shape
.CoroEnds
)
1348 splitAround(CE
, "CoroEnd");
1350 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
1351 // never has its definition separated from the PHI by the suspend point.
1354 // Build suspend crossing info.
1355 SuspendCrossingInfo
Checker(F
, Shape
);
1357 IRBuilder
<> Builder(F
.getContext());
1359 SmallVector
<CoroAllocaAllocInst
*, 4> LocalAllocas
;
1360 SmallVector
<Instruction
*, 4> DeadInstructions
;
1362 for (int Repeat
= 0; Repeat
< 4; ++Repeat
) {
1363 // See if there are materializable instructions across suspend points.
1364 for (Instruction
&I
: instructions(F
))
1365 if (materializable(I
))
1366 for (User
*U
: I
.users())
1367 if (Checker
.isDefinitionAcrossSuspend(I
, U
))
1368 Spills
.emplace_back(&I
, U
);
1373 // Rewrite materializable instructions to be materialized at the use point.
1374 LLVM_DEBUG(dump("Materializations", Spills
));
1375 rewriteMaterializableInstructions(Builder
, Spills
);
1379 // Collect the spills for arguments and other not-materializable values.
1380 for (Argument
&A
: F
.args())
1381 for (User
*U
: A
.users())
1382 if (Checker
.isDefinitionAcrossSuspend(A
, U
))
1383 Spills
.emplace_back(&A
, U
);
1385 for (Instruction
&I
: instructions(F
)) {
1386 // Values returned from coroutine structure intrinsics should not be part
1387 // of the Coroutine Frame.
1388 if (isCoroutineStructureIntrinsic(I
) || &I
== Shape
.CoroBegin
)
1391 // The Coroutine Promise always included into coroutine frame, no need to
1392 // check for suspend crossing.
1393 if (Shape
.ABI
== coro::ABI::Switch
&&
1394 Shape
.SwitchLowering
.PromiseAlloca
== &I
)
1397 // Handle alloca.alloc specially here.
1398 if (auto AI
= dyn_cast
<CoroAllocaAllocInst
>(&I
)) {
1399 // Check whether the alloca's lifetime is bounded by suspend points.
1400 if (isLocalAlloca(AI
)) {
1401 LocalAllocas
.push_back(AI
);
1405 // If not, do a quick rewrite of the alloca and then add spills of
1406 // the rewritten value. The rewrite doesn't invalidate anything in
1407 // Spills because the other alloca intrinsics have no other operands
1408 // besides AI, and it doesn't invalidate the iteration because we delay
1410 auto Alloc
= lowerNonLocalAlloca(AI
, Shape
, DeadInstructions
);
1412 for (User
*U
: Alloc
->users()) {
1413 if (Checker
.isDefinitionAcrossSuspend(*Alloc
, U
))
1414 Spills
.emplace_back(Alloc
, U
);
1419 // Ignore alloca.get; we process this as part of coro.alloca.alloc.
1420 if (isa
<CoroAllocaGetInst
>(I
)) {
1424 for (User
*U
: I
.users())
1425 if (Checker
.isDefinitionAcrossSuspend(I
, U
)) {
1426 // We cannot spill a token.
1427 if (I
.getType()->isTokenTy())
1429 "token definition is separated from the use by a suspend point");
1430 Spills
.emplace_back(&I
, U
);
1433 LLVM_DEBUG(dump("Spills", Spills
));
1434 Shape
.FrameTy
= buildFrameType(F
, Shape
, Spills
);
1435 Shape
.FramePtr
= insertSpills(Spills
, Shape
);
1436 lowerLocalAllocas(LocalAllocas
, DeadInstructions
);
1438 for (auto I
: DeadInstructions
)
1439 I
->eraseFromParent();