1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
15 //===----------------------------------------------------------------------===//
17 #include "CoroInternal.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/PostOrderIterator.h"
20 #include "llvm/ADT/ScopeExit.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Analysis/PtrUseVisitor.h"
23 #include "llvm/Analysis/StackLifetime.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/IR/CFG.h"
26 #include "llvm/IR/DIBuilder.h"
27 #include "llvm/IR/DebugInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstIterator.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/OptimizedStructLayout.h"
35 #include "llvm/Support/circular_raw_ostream.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
38 #include "llvm/Transforms/Utils/Local.h"
39 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
46 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
47 // "coro-frame", which results in leaner debug spew.
48 #define DEBUG_TYPE "coro-suspend-crossing"
50 enum { SmallVectorThreshold
= 32 };
52 // Provides two way mapping between the blocks and numbers.
54 class BlockToIndexMapping
{
55 SmallVector
<BasicBlock
*, SmallVectorThreshold
> V
;
58 size_t size() const { return V
.size(); }
60 BlockToIndexMapping(Function
&F
) {
61 for (BasicBlock
&BB
: F
)
66 size_t blockToIndex(BasicBlock
const *BB
) const {
67 auto *I
= llvm::lower_bound(V
, BB
);
68 assert(I
!= V
.end() && *I
== BB
&& "BasicBlockNumberng: Unknown block");
72 BasicBlock
*indexToBlock(unsigned Index
) const { return V
[Index
]; }
74 } // end anonymous namespace
76 // The SuspendCrossingInfo maintains data that allows to answer a question
77 // whether given two BasicBlocks A and B there is a path from A to B that
78 // passes through a suspend point.
80 // For every basic block 'i' it maintains a BlockData that consists of:
81 // Consumes: a bit vector which contains a set of indices of blocks that can
82 // reach block 'i'. A block can trivially reach itself.
83 // Kills: a bit vector which contains a set of indices of blocks that can
84 // reach block 'i' but there is a path crossing a suspend point
85 // not repeating 'i' (path to 'i' without cycles containing 'i').
86 // Suspend: a boolean indicating whether block 'i' contains a suspend point.
87 // End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
88 // KillLoop: There is a path from 'i' to 'i' not otherwise repeating 'i' that
89 // crosses a suspend point.
92 class SuspendCrossingInfo
{
93 BlockToIndexMapping Mapping
;
100 bool KillLoop
= false;
101 bool Changed
= false;
103 SmallVector
<BlockData
, SmallVectorThreshold
> Block
;
105 iterator_range
<pred_iterator
> predecessors(BlockData
const &BD
) const {
106 BasicBlock
*BB
= Mapping
.indexToBlock(&BD
- &Block
[0]);
107 return llvm::predecessors(BB
);
110 BlockData
&getBlockData(BasicBlock
*BB
) {
111 return Block
[Mapping
.blockToIndex(BB
)];
114 /// Compute the BlockData for the current function in one iteration.
115 /// Initialize - Whether this is the first iteration, we can optimize
116 /// the initial case a little bit by manual loop switch.
117 /// Returns whether the BlockData changes in this iteration.
118 template <bool Initialize
= false>
119 bool computeBlockData(const ReversePostOrderTraversal
<Function
*> &RPOT
);
122 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
124 void dump(StringRef Label
, BitVector
const &BV
) const;
127 SuspendCrossingInfo(Function
&F
, coro::Shape
&Shape
);
129 /// Returns true if there is a path from \p From to \p To crossing a suspend
130 /// point without crossing \p From a 2nd time.
131 bool hasPathCrossingSuspendPoint(BasicBlock
*From
, BasicBlock
*To
) const {
132 size_t const FromIndex
= Mapping
.blockToIndex(From
);
133 size_t const ToIndex
= Mapping
.blockToIndex(To
);
134 bool const Result
= Block
[ToIndex
].Kills
[FromIndex
];
135 LLVM_DEBUG(dbgs() << From
->getName() << " => " << To
->getName()
136 << " answer is " << Result
<< "\n");
140 /// Returns true if there is a path from \p From to \p To crossing a suspend
141 /// point without crossing \p From a 2nd time. If \p From is the same as \p To
142 /// this will also check if there is a looping path crossing a suspend point.
143 bool hasPathOrLoopCrossingSuspendPoint(BasicBlock
*From
,
144 BasicBlock
*To
) const {
145 size_t const FromIndex
= Mapping
.blockToIndex(From
);
146 size_t const ToIndex
= Mapping
.blockToIndex(To
);
147 bool Result
= Block
[ToIndex
].Kills
[FromIndex
] ||
148 (From
== To
&& Block
[ToIndex
].KillLoop
);
149 LLVM_DEBUG(dbgs() << From
->getName() << " => " << To
->getName()
150 << " answer is " << Result
<< " (path or loop)\n");
154 bool isDefinitionAcrossSuspend(BasicBlock
*DefBB
, User
*U
) const {
155 auto *I
= cast
<Instruction
>(U
);
157 // We rewrote PHINodes, so that only the ones with exactly one incoming
158 // value need to be analyzed.
159 if (auto *PN
= dyn_cast
<PHINode
>(I
))
160 if (PN
->getNumIncomingValues() > 1)
163 BasicBlock
*UseBB
= I
->getParent();
165 // As a special case, treat uses by an llvm.coro.suspend.retcon or an
166 // llvm.coro.suspend.async as if they were uses in the suspend's single
167 // predecessor: the uses conceptually occur before the suspend.
168 if (isa
<CoroSuspendRetconInst
>(I
) || isa
<CoroSuspendAsyncInst
>(I
)) {
169 UseBB
= UseBB
->getSinglePredecessor();
170 assert(UseBB
&& "should have split coro.suspend into its own block");
173 return hasPathCrossingSuspendPoint(DefBB
, UseBB
);
176 bool isDefinitionAcrossSuspend(Argument
&A
, User
*U
) const {
177 return isDefinitionAcrossSuspend(&A
.getParent()->getEntryBlock(), U
);
180 bool isDefinitionAcrossSuspend(Instruction
&I
, User
*U
) const {
181 auto *DefBB
= I
.getParent();
183 // As a special case, treat values produced by an llvm.coro.suspend.*
184 // as if they were defined in the single successor: the uses
185 // conceptually occur after the suspend.
186 if (isa
<AnyCoroSuspendInst
>(I
)) {
187 DefBB
= DefBB
->getSingleSuccessor();
188 assert(DefBB
&& "should have split coro.suspend into its own block");
191 return isDefinitionAcrossSuspend(DefBB
, U
);
194 bool isDefinitionAcrossSuspend(Value
&V
, User
*U
) const {
195 if (auto *Arg
= dyn_cast
<Argument
>(&V
))
196 return isDefinitionAcrossSuspend(*Arg
, U
);
197 if (auto *Inst
= dyn_cast
<Instruction
>(&V
))
198 return isDefinitionAcrossSuspend(*Inst
, U
);
201 "Coroutine could only collect Argument and Instruction now.");
204 } // end anonymous namespace
206 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
207 LLVM_DUMP_METHOD
void SuspendCrossingInfo::dump(StringRef Label
,
208 BitVector
const &BV
) const {
209 dbgs() << Label
<< ":";
210 for (size_t I
= 0, N
= BV
.size(); I
< N
; ++I
)
212 dbgs() << " " << Mapping
.indexToBlock(I
)->getName();
216 LLVM_DUMP_METHOD
void SuspendCrossingInfo::dump() const {
217 for (size_t I
= 0, N
= Block
.size(); I
< N
; ++I
) {
218 BasicBlock
*const B
= Mapping
.indexToBlock(I
);
219 dbgs() << B
->getName() << ":\n";
220 dump(" Consumes", Block
[I
].Consumes
);
221 dump(" Kills", Block
[I
].Kills
);
227 template <bool Initialize
>
228 bool SuspendCrossingInfo::computeBlockData(
229 const ReversePostOrderTraversal
<Function
*> &RPOT
) {
230 bool Changed
= false;
232 for (const BasicBlock
*BB
: RPOT
) {
233 auto BBNo
= Mapping
.blockToIndex(BB
);
234 auto &B
= Block
[BBNo
];
236 // We don't need to count the predecessors when initialization.
237 if constexpr (!Initialize
)
238 // If all the predecessors of the current Block don't change,
239 // the BlockData for the current block must not change too.
240 if (all_of(predecessors(B
), [this](BasicBlock
*BB
) {
241 return !Block
[Mapping
.blockToIndex(BB
)].Changed
;
247 // Saved Consumes and Kills bitsets so that it is easy to see
248 // if anything changed after propagation.
249 auto SavedConsumes
= B
.Consumes
;
250 auto SavedKills
= B
.Kills
;
252 for (BasicBlock
*PI
: predecessors(B
)) {
253 auto PrevNo
= Mapping
.blockToIndex(PI
);
254 auto &P
= Block
[PrevNo
];
256 // Propagate Kills and Consumes from predecessors into B.
257 B
.Consumes
|= P
.Consumes
;
260 // If block P is a suspend block, it should propagate kills into block
261 // B for every block P consumes.
263 B
.Kills
|= P
.Consumes
;
267 // If block B is a suspend block, it should kill all of the blocks it
269 B
.Kills
|= B
.Consumes
;
271 // If block B is an end block, it should not propagate kills as the
272 // blocks following coro.end() are reached during initial invocation
273 // of the coroutine while all the data are still available on the
274 // stack or in the registers.
277 // This is reached when B block it not Suspend nor coro.end and it
278 // need to make sure that it is not in the kill set.
279 B
.KillLoop
|= B
.Kills
[BBNo
];
283 if constexpr (!Initialize
) {
284 B
.Changed
= (B
.Kills
!= SavedKills
) || (B
.Consumes
!= SavedConsumes
);
285 Changed
|= B
.Changed
;
292 SuspendCrossingInfo::SuspendCrossingInfo(Function
&F
, coro::Shape
&Shape
)
294 const size_t N
= Mapping
.size();
297 // Initialize every block so that it consumes itself
298 for (size_t I
= 0; I
< N
; ++I
) {
300 B
.Consumes
.resize(N
);
306 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
307 // the code beyond coro.end is reachable during initial invocation of the
309 for (auto *CE
: Shape
.CoroEnds
)
310 getBlockData(CE
->getParent()).End
= true;
312 // Mark all suspend blocks and indicate that they kill everything they
313 // consume. Note, that crossing coro.save also requires a spill, as any code
314 // between coro.save and coro.suspend may resume the coroutine and all of the
315 // state needs to be saved by that time.
316 auto markSuspendBlock
= [&](IntrinsicInst
*BarrierInst
) {
317 BasicBlock
*SuspendBlock
= BarrierInst
->getParent();
318 auto &B
= getBlockData(SuspendBlock
);
320 B
.Kills
|= B
.Consumes
;
322 for (auto *CSI
: Shape
.CoroSuspends
) {
323 markSuspendBlock(CSI
);
324 if (auto *Save
= CSI
->getCoroSave())
325 markSuspendBlock(Save
);
328 // It is considered to be faster to use RPO traversal for forward-edges
329 // dataflow analysis.
330 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
331 computeBlockData
</*Initialize=*/true>(RPOT
);
332 while (computeBlockData
</*Initialize*/ false>(RPOT
))
340 // RematGraph is used to construct a DAG for rematerializable instructions
341 // When the constructor is invoked with a candidate instruction (which is
342 // materializable) it builds a DAG of materializable instructions from that
344 // Typically, for each instruction identified as re-materializable across a
345 // suspend point, a RematGraph will be created.
347 // Each RematNode in the graph contains the edges to instructions providing
348 // operands in the current node.
351 SmallVector
<RematNode
*> Operands
;
352 RematNode() = default;
353 RematNode(Instruction
*V
) : Node(V
) {}
356 RematNode
*EntryNode
;
358 SmallMapVector
<Instruction
*, std::unique_ptr
<RematNode
>, 8>;
360 const std::function
<bool(Instruction
&)> &MaterializableCallback
;
361 SuspendCrossingInfo
&Checker
;
363 RematGraph(const std::function
<bool(Instruction
&)> &MaterializableCallback
,
364 Instruction
*I
, SuspendCrossingInfo
&Checker
)
365 : MaterializableCallback(MaterializableCallback
), Checker(Checker
) {
366 std::unique_ptr
<RematNode
> FirstNode
= std::make_unique
<RematNode
>(I
);
367 EntryNode
= FirstNode
.get();
368 std::deque
<std::unique_ptr
<RematNode
>> WorkList
;
369 addNode(std::move(FirstNode
), WorkList
, cast
<User
>(I
));
370 while (WorkList
.size()) {
371 std::unique_ptr
<RematNode
> N
= std::move(WorkList
.front());
372 WorkList
.pop_front();
373 addNode(std::move(N
), WorkList
, cast
<User
>(I
));
377 void addNode(std::unique_ptr
<RematNode
> NUPtr
,
378 std::deque
<std::unique_ptr
<RematNode
>> &WorkList
,
380 RematNode
*N
= NUPtr
.get();
381 if (Remats
.count(N
->Node
))
384 // We haven't see this node yet - add to the list
385 Remats
[N
->Node
] = std::move(NUPtr
);
386 for (auto &Def
: N
->Node
->operands()) {
387 Instruction
*D
= dyn_cast
<Instruction
>(Def
.get());
388 if (!D
|| !MaterializableCallback(*D
) ||
389 !Checker
.isDefinitionAcrossSuspend(*D
, FirstUse
))
392 if (Remats
.count(D
)) {
393 // Already have this in the graph
394 N
->Operands
.push_back(Remats
[D
].get());
399 for (auto &I
: WorkList
) {
402 N
->Operands
.push_back(I
.get());
408 std::unique_ptr
<RematNode
> ChildNode
= std::make_unique
<RematNode
>(D
);
409 N
->Operands
.push_back(ChildNode
.get());
410 WorkList
.push_back(std::move(ChildNode
));
415 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
418 if (EntryNode
->Node
->getParent()->hasName())
419 dbgs() << EntryNode
->Node
->getParent()->getName();
421 EntryNode
->Node
->getParent()->printAsOperand(dbgs(), false);
422 dbgs() << ") : " << *EntryNode
->Node
<< "\n";
423 for (auto &E
: Remats
) {
424 dbgs() << *(E
.first
) << "\n";
425 for (RematNode
*U
: E
.second
->Operands
)
426 dbgs() << " " << *U
->Node
<< "\n";
431 } // end anonymous namespace
435 template <> struct GraphTraits
<RematGraph
*> {
436 using NodeRef
= RematGraph::RematNode
*;
437 using ChildIteratorType
= RematGraph::RematNode
**;
439 static NodeRef
getEntryNode(RematGraph
*G
) { return G
->EntryNode
; }
440 static ChildIteratorType
child_begin(NodeRef N
) {
441 return N
->Operands
.begin();
443 static ChildIteratorType
child_end(NodeRef N
) { return N
->Operands
.end(); }
446 } // end namespace llvm
448 #undef DEBUG_TYPE // "coro-suspend-crossing"
449 #define DEBUG_TYPE "coro-frame"
452 class FrameTypeBuilder
;
453 // Mapping from the to-be-spilled value to all the users that need reload.
454 using SpillInfo
= SmallMapVector
<Value
*, SmallVector
<Instruction
*, 2>, 8>;
457 DenseMap
<Instruction
*, std::optional
<APInt
>> Aliases
;
458 bool MayWriteBeforeCoroBegin
;
459 AllocaInfo(AllocaInst
*Alloca
,
460 DenseMap
<Instruction
*, std::optional
<APInt
>> Aliases
,
461 bool MayWriteBeforeCoroBegin
)
462 : Alloca(Alloca
), Aliases(std::move(Aliases
)),
463 MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin
) {}
465 struct FrameDataInfo
{
466 // All the values (that are not allocas) that needs to be spilled to the
469 // Allocas contains all values defined as allocas that need to live in the
471 SmallVector
<AllocaInfo
, 8> Allocas
;
473 SmallVector
<Value
*, 8> getAllDefs() const {
474 SmallVector
<Value
*, 8> Defs
;
475 for (const auto &P
: Spills
)
476 Defs
.push_back(P
.first
);
477 for (const auto &A
: Allocas
)
478 Defs
.push_back(A
.Alloca
);
482 uint32_t getFieldIndex(Value
*V
) const {
483 auto Itr
= FieldIndexMap
.find(V
);
484 assert(Itr
!= FieldIndexMap
.end() &&
485 "Value does not have a frame field index");
489 void setFieldIndex(Value
*V
, uint32_t Index
) {
490 assert((LayoutIndexUpdateStarted
|| FieldIndexMap
.count(V
) == 0) &&
491 "Cannot set the index for the same field twice.");
492 FieldIndexMap
[V
] = Index
;
495 Align
getAlign(Value
*V
) const {
496 auto Iter
= FieldAlignMap
.find(V
);
497 assert(Iter
!= FieldAlignMap
.end());
501 void setAlign(Value
*V
, Align AL
) {
502 assert(FieldAlignMap
.count(V
) == 0);
503 FieldAlignMap
.insert({V
, AL
});
506 uint64_t getDynamicAlign(Value
*V
) const {
507 auto Iter
= FieldDynamicAlignMap
.find(V
);
508 assert(Iter
!= FieldDynamicAlignMap
.end());
512 void setDynamicAlign(Value
*V
, uint64_t Align
) {
513 assert(FieldDynamicAlignMap
.count(V
) == 0);
514 FieldDynamicAlignMap
.insert({V
, Align
});
517 uint64_t getOffset(Value
*V
) const {
518 auto Iter
= FieldOffsetMap
.find(V
);
519 assert(Iter
!= FieldOffsetMap
.end());
523 void setOffset(Value
*V
, uint64_t Offset
) {
524 assert(FieldOffsetMap
.count(V
) == 0);
525 FieldOffsetMap
.insert({V
, Offset
});
528 // Remap the index of every field in the frame, using the final layout index.
529 void updateLayoutIndex(FrameTypeBuilder
&B
);
532 // LayoutIndexUpdateStarted is used to avoid updating the index of any field
534 bool LayoutIndexUpdateStarted
= false;
535 // Map from values to their slot indexes on the frame. They will be first set
536 // with their original insertion field index. After the frame is built, their
537 // indexes will be updated into the final layout index.
538 DenseMap
<Value
*, uint32_t> FieldIndexMap
;
539 // Map from values to their alignment on the frame. They would be set after
540 // the frame is built.
541 DenseMap
<Value
*, Align
> FieldAlignMap
;
542 DenseMap
<Value
*, uint64_t> FieldDynamicAlignMap
;
543 // Map from values to their offset on the frame. They would be set after
544 // the frame is built.
545 DenseMap
<Value
*, uint64_t> FieldOffsetMap
;
550 static void dumpSpills(StringRef Title
, const SpillInfo
&Spills
) {
551 dbgs() << "------------- " << Title
<< "--------------\n";
552 for (const auto &E
: Spills
) {
555 for (auto *I
: E
.second
)
559 static void dumpRemats(
561 const SmallMapVector
<Instruction
*, std::unique_ptr
<RematGraph
>, 8> &RM
) {
562 dbgs() << "------------- " << Title
<< "--------------\n";
563 for (const auto &E
: RM
) {
569 static void dumpAllocas(const SmallVectorImpl
<AllocaInfo
> &Allocas
) {
570 dbgs() << "------------- Allocas --------------\n";
571 for (const auto &A
: Allocas
) {
578 using FieldIDType
= size_t;
579 // We cannot rely solely on natural alignment of a type when building a
580 // coroutine frame and if the alignment specified on the Alloca instruction
581 // differs from the natural alignment of the alloca type we will need to insert
583 class FrameTypeBuilder
{
589 FieldIDType LayoutFieldIndex
;
592 uint64_t DynamicAlignBuffer
;
595 const DataLayout
&DL
;
596 LLVMContext
&Context
;
597 uint64_t StructSize
= 0;
599 bool IsFinished
= false;
601 std::optional
<Align
> MaxFrameAlignment
;
603 SmallVector
<Field
, 8> Fields
;
604 DenseMap
<Value
*, unsigned> FieldIndexByKey
;
607 FrameTypeBuilder(LLVMContext
&Context
, const DataLayout
&DL
,
608 std::optional
<Align
> MaxFrameAlignment
)
609 : DL(DL
), Context(Context
), MaxFrameAlignment(MaxFrameAlignment
) {}
611 /// Add a field to this structure for the storage of an `alloca`
613 [[nodiscard
]] FieldIDType
addFieldForAlloca(AllocaInst
*AI
,
614 bool IsHeader
= false) {
615 Type
*Ty
= AI
->getAllocatedType();
617 // Make an array type if this is a static array allocation.
618 if (AI
->isArrayAllocation()) {
619 if (auto *CI
= dyn_cast
<ConstantInt
>(AI
->getArraySize()))
620 Ty
= ArrayType::get(Ty
, CI
->getValue().getZExtValue());
622 report_fatal_error("Coroutines cannot handle non static allocas yet");
625 return addField(Ty
, AI
->getAlign(), IsHeader
);
628 /// We want to put the allocas whose lifetime-ranges are not overlapped
629 /// into one slot of coroutine frame.
630 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
632 /// cppcoro::task<void> alternative_paths(bool cond) {
636 /// co_await something();
640 /// co_await something();
644 /// We want to put variable a and variable b in the same slot to
645 /// reduce the size of coroutine frame.
647 /// This function use StackLifetime algorithm to partition the AllocaInsts in
648 /// Spills to non-overlapped sets in order to put Alloca in the same
649 /// non-overlapped set into the same slot in the Coroutine Frame. Then add
650 /// field for the allocas in the same non-overlapped set by using the largest
651 /// type as the field type.
653 /// Side Effects: Because We sort the allocas, the order of allocas in the
654 /// frame may be different with the order in the source code.
655 void addFieldForAllocas(const Function
&F
, FrameDataInfo
&FrameData
,
658 /// Add a field to this structure.
659 [[nodiscard
]] FieldIDType
addField(Type
*Ty
, MaybeAlign MaybeFieldAlignment
,
660 bool IsHeader
= false,
661 bool IsSpillOfValue
= false) {
662 assert(!IsFinished
&& "adding fields to a finished builder");
663 assert(Ty
&& "must provide a type for a field");
665 // The field size is always the alloc size of the type.
666 uint64_t FieldSize
= DL
.getTypeAllocSize(Ty
);
668 // For an alloca with size=0, we don't need to add a field and they
669 // can just point to any index in the frame. Use index 0.
670 if (FieldSize
== 0) {
674 // The field alignment might not be the type alignment, but we need
675 // to remember the type alignment anyway to build the type.
676 // If we are spilling values we don't need to worry about ABI alignment
678 Align ABIAlign
= DL
.getABITypeAlign(Ty
);
679 Align TyAlignment
= ABIAlign
;
680 if (IsSpillOfValue
&& MaxFrameAlignment
&& *MaxFrameAlignment
< ABIAlign
)
681 TyAlignment
= *MaxFrameAlignment
;
682 Align FieldAlignment
= MaybeFieldAlignment
.value_or(TyAlignment
);
684 // The field alignment could be bigger than the max frame case, in that case
685 // we request additional storage to be able to dynamically align the
687 uint64_t DynamicAlignBuffer
= 0;
688 if (MaxFrameAlignment
&& (FieldAlignment
> *MaxFrameAlignment
)) {
690 offsetToAlignment(MaxFrameAlignment
->value(), FieldAlignment
);
691 FieldAlignment
= *MaxFrameAlignment
;
692 FieldSize
= FieldSize
+ DynamicAlignBuffer
;
695 // Lay out header fields immediately.
698 Offset
= alignTo(StructSize
, FieldAlignment
);
699 StructSize
= Offset
+ FieldSize
;
701 // Everything else has a flexible offset.
703 Offset
= OptimizedStructLayoutField::FlexibleOffset
;
706 Fields
.push_back({FieldSize
, Offset
, Ty
, 0, FieldAlignment
, TyAlignment
,
707 DynamicAlignBuffer
});
708 return Fields
.size() - 1;
711 /// Finish the layout and set the body on the given type.
712 void finish(StructType
*Ty
);
714 uint64_t getStructSize() const {
715 assert(IsFinished
&& "not yet finished!");
719 Align
getStructAlign() const {
720 assert(IsFinished
&& "not yet finished!");
724 FieldIDType
getLayoutFieldIndex(FieldIDType Id
) const {
725 assert(IsFinished
&& "not yet finished!");
726 return Fields
[Id
].LayoutFieldIndex
;
729 Field
getLayoutField(FieldIDType Id
) const {
730 assert(IsFinished
&& "not yet finished!");
736 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder
&B
) {
737 auto Updater
= [&](Value
*I
) {
738 auto Field
= B
.getLayoutField(getFieldIndex(I
));
739 setFieldIndex(I
, Field
.LayoutFieldIndex
);
740 setAlign(I
, Field
.Alignment
);
741 uint64_t dynamicAlign
=
742 Field
.DynamicAlignBuffer
743 ? Field
.DynamicAlignBuffer
+ Field
.Alignment
.value()
745 setDynamicAlign(I
, dynamicAlign
);
746 setOffset(I
, Field
.Offset
);
748 LayoutIndexUpdateStarted
= true;
749 for (auto &S
: Spills
)
751 for (const auto &A
: Allocas
)
753 LayoutIndexUpdateStarted
= false;
756 void FrameTypeBuilder::addFieldForAllocas(const Function
&F
,
757 FrameDataInfo
&FrameData
,
758 coro::Shape
&Shape
) {
759 using AllocaSetType
= SmallVector
<AllocaInst
*, 4>;
760 SmallVector
<AllocaSetType
, 4> NonOverlapedAllocas
;
762 // We need to add field for allocas at the end of this function.
763 auto AddFieldForAllocasAtExit
= make_scope_exit([&]() {
764 for (auto AllocaList
: NonOverlapedAllocas
) {
765 auto *LargestAI
= *AllocaList
.begin();
766 FieldIDType Id
= addFieldForAlloca(LargestAI
);
767 for (auto *Alloca
: AllocaList
)
768 FrameData
.setFieldIndex(Alloca
, Id
);
772 if (!Shape
.OptimizeFrame
) {
773 for (const auto &A
: FrameData
.Allocas
) {
774 AllocaInst
*Alloca
= A
.Alloca
;
775 NonOverlapedAllocas
.emplace_back(AllocaSetType(1, Alloca
));
780 // Because there are paths from the lifetime.start to coro.end
781 // for each alloca, the liferanges for every alloca is overlaped
782 // in the blocks who contain coro.end and the successor blocks.
783 // So we choose to skip there blocks when we calculate the liferange
784 // for each alloca. It should be reasonable since there shouldn't be uses
785 // in these blocks and the coroutine frame shouldn't be used outside the
788 // Note that the user of coro.suspend may not be SwitchInst. However, this
789 // case seems too complex to handle. And it is harmless to skip these
790 // patterns since it just prevend putting the allocas to live in the same
792 DenseMap
<SwitchInst
*, BasicBlock
*> DefaultSuspendDest
;
793 for (auto *CoroSuspendInst
: Shape
.CoroSuspends
) {
794 for (auto *U
: CoroSuspendInst
->users()) {
795 if (auto *ConstSWI
= dyn_cast
<SwitchInst
>(U
)) {
796 auto *SWI
= const_cast<SwitchInst
*>(ConstSWI
);
797 DefaultSuspendDest
[SWI
] = SWI
->getDefaultDest();
798 SWI
->setDefaultDest(SWI
->getSuccessor(1));
803 auto ExtractAllocas
= [&]() {
804 AllocaSetType Allocas
;
805 Allocas
.reserve(FrameData
.Allocas
.size());
806 for (const auto &A
: FrameData
.Allocas
)
807 Allocas
.push_back(A
.Alloca
);
810 StackLifetime
StackLifetimeAnalyzer(F
, ExtractAllocas(),
811 StackLifetime::LivenessType::May
);
812 StackLifetimeAnalyzer
.run();
813 auto IsAllocaInferenre
= [&](const AllocaInst
*AI1
, const AllocaInst
*AI2
) {
814 return StackLifetimeAnalyzer
.getLiveRange(AI1
).overlaps(
815 StackLifetimeAnalyzer
.getLiveRange(AI2
));
817 auto GetAllocaSize
= [&](const AllocaInfo
&A
) {
818 std::optional
<TypeSize
> RetSize
= A
.Alloca
->getAllocationSize(DL
);
819 assert(RetSize
&& "Variable Length Arrays (VLA) are not supported.\n");
820 assert(!RetSize
->isScalable() && "Scalable vectors are not yet supported");
821 return RetSize
->getFixedValue();
823 // Put larger allocas in the front. So the larger allocas have higher
824 // priority to merge, which can save more space potentially. Also each
825 // AllocaSet would be ordered. So we can get the largest Alloca in one
827 sort(FrameData
.Allocas
, [&](const auto &Iter1
, const auto &Iter2
) {
828 return GetAllocaSize(Iter1
) > GetAllocaSize(Iter2
);
830 for (const auto &A
: FrameData
.Allocas
) {
831 AllocaInst
*Alloca
= A
.Alloca
;
833 // Try to find if the Alloca is not inferenced with any existing
834 // NonOverlappedAllocaSet. If it is true, insert the alloca to that
835 // NonOverlappedAllocaSet.
836 for (auto &AllocaSet
: NonOverlapedAllocas
) {
837 assert(!AllocaSet
.empty() && "Processing Alloca Set is not empty.\n");
838 bool NoInference
= none_of(AllocaSet
, [&](auto Iter
) {
839 return IsAllocaInferenre(Alloca
, Iter
);
841 // If the alignment of A is multiple of the alignment of B, the address
842 // of A should satisfy the requirement for aligning for B.
844 // There may be other more fine-grained strategies to handle the alignment
845 // infomation during the merging process. But it seems hard to handle
846 // these strategies and benefit little.
847 bool Alignable
= [&]() -> bool {
848 auto *LargestAlloca
= *AllocaSet
.begin();
849 return LargestAlloca
->getAlign().value() % Alloca
->getAlign().value() ==
852 bool CouldMerge
= NoInference
&& Alignable
;
855 AllocaSet
.push_back(Alloca
);
860 NonOverlapedAllocas
.emplace_back(AllocaSetType(1, Alloca
));
863 // Recover the default target destination for each Switch statement
865 for (auto SwitchAndDefaultDest
: DefaultSuspendDest
) {
866 SwitchInst
*SWI
= SwitchAndDefaultDest
.first
;
867 BasicBlock
*DestBB
= SwitchAndDefaultDest
.second
;
868 SWI
->setDefaultDest(DestBB
);
870 // This Debug Info could tell us which allocas are merged into one slot.
871 LLVM_DEBUG(for (auto &AllocaSet
872 : NonOverlapedAllocas
) {
873 if (AllocaSet
.size() > 1) {
874 dbgs() << "In Function:" << F
.getName() << "\n";
875 dbgs() << "Find Union Set "
877 dbgs() << "\tAllocas are \n";
878 for (auto Alloca
: AllocaSet
)
879 dbgs() << "\t\t" << *Alloca
<< "\n";
884 void FrameTypeBuilder::finish(StructType
*Ty
) {
885 assert(!IsFinished
&& "already finished!");
887 // Prepare the optimal-layout field array.
888 // The Id in the layout field is a pointer to our Field for it.
889 SmallVector
<OptimizedStructLayoutField
, 8> LayoutFields
;
890 LayoutFields
.reserve(Fields
.size());
891 for (auto &Field
: Fields
) {
892 LayoutFields
.emplace_back(&Field
, Field
.Size
, Field
.Alignment
,
897 auto SizeAndAlign
= performOptimizedStructLayout(LayoutFields
);
898 StructSize
= SizeAndAlign
.first
;
899 StructAlign
= SizeAndAlign
.second
;
901 auto getField
= [](const OptimizedStructLayoutField
&LayoutField
) -> Field
& {
902 return *static_cast<Field
*>(const_cast<void*>(LayoutField
.Id
));
905 // We need to produce a packed struct type if there's a field whose
906 // assigned offset isn't a multiple of its natural type alignment.
908 for (auto &LayoutField
: LayoutFields
) {
909 auto &F
= getField(LayoutField
);
910 if (!isAligned(F
.TyAlignment
, LayoutField
.Offset
))
916 // Build the struct body.
917 SmallVector
<Type
*, 16> FieldTypes
;
918 FieldTypes
.reserve(LayoutFields
.size() * 3 / 2);
919 uint64_t LastOffset
= 0;
920 for (auto &LayoutField
: LayoutFields
) {
921 auto &F
= getField(LayoutField
);
923 auto Offset
= LayoutField
.Offset
;
925 // Add a padding field if there's a padding gap and we're either
926 // building a packed struct or the padding gap is more than we'd
927 // get from aligning to the field type's natural alignment.
928 assert(Offset
>= LastOffset
);
929 if (Offset
!= LastOffset
) {
930 if (Packed
|| alignTo(LastOffset
, F
.TyAlignment
) != Offset
)
931 FieldTypes
.push_back(ArrayType::get(Type::getInt8Ty(Context
),
932 Offset
- LastOffset
));
936 F
.LayoutFieldIndex
= FieldTypes
.size();
938 FieldTypes
.push_back(F
.Ty
);
939 if (F
.DynamicAlignBuffer
) {
940 FieldTypes
.push_back(
941 ArrayType::get(Type::getInt8Ty(Context
), F
.DynamicAlignBuffer
));
943 LastOffset
= Offset
+ F
.Size
;
946 Ty
->setBody(FieldTypes
, Packed
);
949 // Check that the IR layout matches the offsets we expect.
950 auto Layout
= DL
.getStructLayout(Ty
);
951 for (auto &F
: Fields
) {
952 assert(Ty
->getElementType(F
.LayoutFieldIndex
) == F
.Ty
);
953 assert(Layout
->getElementOffset(F
.LayoutFieldIndex
) == F
.Offset
);
960 static void cacheDIVar(FrameDataInfo
&FrameData
,
961 DenseMap
<Value
*, DILocalVariable
*> &DIVarCache
) {
962 for (auto *V
: FrameData
.getAllDefs()) {
963 if (DIVarCache
.contains(V
))
966 auto CacheIt
= [&DIVarCache
, V
](const auto &Container
) {
967 auto *I
= llvm::find_if(Container
, [](auto *DDI
) {
968 return DDI
->getExpression()->getNumElements() == 0;
970 if (I
!= Container
.end())
971 DIVarCache
.insert({V
, (*I
)->getVariable()});
973 CacheIt(findDbgDeclares(V
));
974 CacheIt(findDPVDeclares(V
));
978 /// Create name for Type. It uses MDString to store new created string to
979 /// avoid memory leak.
980 static StringRef
solveTypeName(Type
*Ty
) {
981 if (Ty
->isIntegerTy()) {
982 // The longest name in common may be '__int_128', which has 9 bits.
983 SmallString
<16> Buffer
;
984 raw_svector_ostream
OS(Buffer
);
985 OS
<< "__int_" << cast
<IntegerType
>(Ty
)->getBitWidth();
986 auto *MDName
= MDString::get(Ty
->getContext(), OS
.str());
987 return MDName
->getString();
990 if (Ty
->isFloatingPointTy()) {
993 if (Ty
->isDoubleTy())
995 return "__floating_type_";
998 if (Ty
->isPointerTy())
999 return "PointerType";
1001 if (Ty
->isStructTy()) {
1002 if (!cast
<StructType
>(Ty
)->hasName())
1003 return "__LiteralStructType_";
1005 auto Name
= Ty
->getStructName();
1007 SmallString
<16> Buffer(Name
);
1008 for (auto &Iter
: Buffer
)
1009 if (Iter
== '.' || Iter
== ':')
1011 auto *MDName
= MDString::get(Ty
->getContext(), Buffer
.str());
1012 return MDName
->getString();
1015 return "UnknownType";
1018 static DIType
*solveDIType(DIBuilder
&Builder
, Type
*Ty
,
1019 const DataLayout
&Layout
, DIScope
*Scope
,
1021 DenseMap
<Type
*, DIType
*> &DITypeCache
) {
1022 if (DIType
*DT
= DITypeCache
.lookup(Ty
))
1025 StringRef Name
= solveTypeName(Ty
);
1027 DIType
*RetType
= nullptr;
1029 if (Ty
->isIntegerTy()) {
1030 auto BitWidth
= cast
<IntegerType
>(Ty
)->getBitWidth();
1031 RetType
= Builder
.createBasicType(Name
, BitWidth
, dwarf::DW_ATE_signed
,
1032 llvm::DINode::FlagArtificial
);
1033 } else if (Ty
->isFloatingPointTy()) {
1034 RetType
= Builder
.createBasicType(Name
, Layout
.getTypeSizeInBits(Ty
),
1035 dwarf::DW_ATE_float
,
1036 llvm::DINode::FlagArtificial
);
1037 } else if (Ty
->isPointerTy()) {
1038 // Construct PointerType points to null (aka void *) instead of exploring
1039 // pointee type to avoid infinite search problem. For example, we would be
1040 // in trouble if we traverse recursively:
1046 Builder
.createPointerType(nullptr, Layout
.getTypeSizeInBits(Ty
),
1047 Layout
.getABITypeAlign(Ty
).value() * CHAR_BIT
,
1048 /*DWARFAddressSpace=*/std::nullopt
, Name
);
1049 } else if (Ty
->isStructTy()) {
1050 auto *DIStruct
= Builder
.createStructType(
1051 Scope
, Name
, Scope
->getFile(), LineNum
, Layout
.getTypeSizeInBits(Ty
),
1052 Layout
.getPrefTypeAlign(Ty
).value() * CHAR_BIT
,
1053 llvm::DINode::FlagArtificial
, nullptr, llvm::DINodeArray());
1055 auto *StructTy
= cast
<StructType
>(Ty
);
1056 SmallVector
<Metadata
*, 16> Elements
;
1057 for (unsigned I
= 0; I
< StructTy
->getNumElements(); I
++) {
1058 DIType
*DITy
= solveDIType(Builder
, StructTy
->getElementType(I
), Layout
,
1059 Scope
, LineNum
, DITypeCache
);
1061 Elements
.push_back(Builder
.createMemberType(
1062 Scope
, DITy
->getName(), Scope
->getFile(), LineNum
,
1063 DITy
->getSizeInBits(), DITy
->getAlignInBits(),
1064 Layout
.getStructLayout(StructTy
)->getElementOffsetInBits(I
),
1065 llvm::DINode::FlagArtificial
, DITy
));
1068 Builder
.replaceArrays(DIStruct
, Builder
.getOrCreateArray(Elements
));
1072 LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty
<< "\n");
1073 TypeSize Size
= Layout
.getTypeSizeInBits(Ty
);
1074 auto *CharSizeType
= Builder
.createBasicType(
1075 Name
, 8, dwarf::DW_ATE_unsigned_char
, llvm::DINode::FlagArtificial
);
1078 RetType
= CharSizeType
;
1081 Size
= TypeSize::getFixed(Size
+ 8 - (Size
% 8));
1083 RetType
= Builder
.createArrayType(
1084 Size
, Layout
.getPrefTypeAlign(Ty
).value(), CharSizeType
,
1085 Builder
.getOrCreateArray(Builder
.getOrCreateSubrange(0, Size
/ 8)));
1089 DITypeCache
.insert({Ty
, RetType
});
1093 /// Build artificial debug info for C++ coroutine frames to allow users to
1094 /// inspect the contents of the frame directly
1096 /// Create Debug information for coroutine frame with debug name "__coro_frame".
1097 /// The debug information for the fields of coroutine frame is constructed from
1098 /// the following way:
1099 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
1100 /// the corresponding debug variables for the value. If we can find the
1101 /// debug variable, we can get full and accurate debug information.
1102 /// 2. If we can't get debug information in step 1 and 2, we could only try to
1103 /// build the DIType by Type. We did this in solveDIType. We only handle
1104 /// integer, float, double, integer type and struct type for now.
1105 static void buildFrameDebugInfo(Function
&F
, coro::Shape
&Shape
,
1106 FrameDataInfo
&FrameData
) {
1107 DISubprogram
*DIS
= F
.getSubprogram();
1108 // If there is no DISubprogram for F, it implies the Function are not compiled
1109 // with debug info. So we also don't need to generate debug info for the frame
1111 if (!DIS
|| !DIS
->getUnit() ||
1112 !dwarf::isCPlusPlus(
1113 (dwarf::SourceLanguage
)DIS
->getUnit()->getSourceLanguage()))
1116 assert(Shape
.ABI
== coro::ABI::Switch
&&
1117 "We could only build debug infomation for C++ coroutine now.\n");
1119 DIBuilder
DBuilder(*F
.getParent(), /*AllowUnresolved*/ false);
1121 AllocaInst
*PromiseAlloca
= Shape
.getPromiseAlloca();
1122 assert(PromiseAlloca
&&
1123 "Coroutine with switch ABI should own Promise alloca");
1125 TinyPtrVector
<DbgDeclareInst
*> DIs
= findDbgDeclares(PromiseAlloca
);
1126 TinyPtrVector
<DPValue
*> DPVs
= findDPVDeclares(PromiseAlloca
);
1128 DILocalVariable
*PromiseDIVariable
= nullptr;
1129 DILocation
*DILoc
= nullptr;
1131 DbgDeclareInst
*PromiseDDI
= DIs
.front();
1132 PromiseDIVariable
= PromiseDDI
->getVariable();
1133 DILoc
= PromiseDDI
->getDebugLoc().get();
1134 } else if (!DPVs
.empty()) {
1135 DPValue
*PromiseDPV
= DPVs
.front();
1136 PromiseDIVariable
= PromiseDPV
->getVariable();
1137 DILoc
= PromiseDPV
->getDebugLoc().get();
1142 DILocalScope
*PromiseDIScope
= PromiseDIVariable
->getScope();
1143 DIFile
*DFile
= PromiseDIScope
->getFile();
1144 unsigned LineNum
= PromiseDIVariable
->getLine();
1146 DICompositeType
*FrameDITy
= DBuilder
.createStructType(
1147 DIS
->getUnit(), Twine(F
.getName() + ".coro_frame_ty").str(),
1148 DFile
, LineNum
, Shape
.FrameSize
* 8,
1149 Shape
.FrameAlign
.value() * 8, llvm::DINode::FlagArtificial
, nullptr,
1150 llvm::DINodeArray());
1151 StructType
*FrameTy
= Shape
.FrameTy
;
1152 SmallVector
<Metadata
*, 16> Elements
;
1153 DataLayout Layout
= F
.getParent()->getDataLayout();
1155 DenseMap
<Value
*, DILocalVariable
*> DIVarCache
;
1156 cacheDIVar(FrameData
, DIVarCache
);
1158 unsigned ResumeIndex
= coro::Shape::SwitchFieldIndex::Resume
;
1159 unsigned DestroyIndex
= coro::Shape::SwitchFieldIndex::Destroy
;
1160 unsigned IndexIndex
= Shape
.SwitchLowering
.IndexField
;
1162 DenseMap
<unsigned, StringRef
> NameCache
;
1163 NameCache
.insert({ResumeIndex
, "__resume_fn"});
1164 NameCache
.insert({DestroyIndex
, "__destroy_fn"});
1165 NameCache
.insert({IndexIndex
, "__coro_index"});
1167 Type
*ResumeFnTy
= FrameTy
->getElementType(ResumeIndex
),
1168 *DestroyFnTy
= FrameTy
->getElementType(DestroyIndex
),
1169 *IndexTy
= FrameTy
->getElementType(IndexIndex
);
1171 DenseMap
<unsigned, DIType
*> TyCache
;
1173 {ResumeIndex
, DBuilder
.createPointerType(
1174 nullptr, Layout
.getTypeSizeInBits(ResumeFnTy
))});
1176 {DestroyIndex
, DBuilder
.createPointerType(
1177 nullptr, Layout
.getTypeSizeInBits(DestroyFnTy
))});
1179 /// FIXME: If we fill the field `SizeInBits` with the actual size of
1180 /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
1181 TyCache
.insert({IndexIndex
, DBuilder
.createBasicType(
1183 (Layout
.getTypeSizeInBits(IndexTy
) < 8)
1185 : Layout
.getTypeSizeInBits(IndexTy
),
1186 dwarf::DW_ATE_unsigned_char
)});
1188 for (auto *V
: FrameData
.getAllDefs()) {
1189 if (!DIVarCache
.contains(V
))
1192 auto Index
= FrameData
.getFieldIndex(V
);
1194 NameCache
.insert({Index
, DIVarCache
[V
]->getName()});
1195 TyCache
.insert({Index
, DIVarCache
[V
]->getType()});
1198 // Cache from index to (Align, Offset Pair)
1199 DenseMap
<unsigned, std::pair
<unsigned, unsigned>> OffsetCache
;
1200 // The Align and Offset of Resume function and Destroy function are fixed.
1201 OffsetCache
.insert({ResumeIndex
, {8, 0}});
1202 OffsetCache
.insert({DestroyIndex
, {8, 8}});
1205 {Shape
.SwitchLowering
.IndexAlign
, Shape
.SwitchLowering
.IndexOffset
}});
1207 for (auto *V
: FrameData
.getAllDefs()) {
1208 auto Index
= FrameData
.getFieldIndex(V
);
1211 {Index
, {FrameData
.getAlign(V
).value(), FrameData
.getOffset(V
)}});
1214 DenseMap
<Type
*, DIType
*> DITypeCache
;
1215 // This counter is used to avoid same type names. e.g., there would be
1216 // many i32 and i64 types in one coroutine. And we would use i32_0 and
1217 // i32_1 to avoid the same type. Since it makes no sense the name of the
1218 // fields confilicts with each other.
1219 unsigned UnknownTypeNum
= 0;
1220 for (unsigned Index
= 0; Index
< FrameTy
->getNumElements(); Index
++) {
1221 if (!OffsetCache
.contains(Index
))
1225 uint64_t SizeInBits
;
1226 uint32_t AlignInBits
;
1227 uint64_t OffsetInBits
;
1228 DIType
*DITy
= nullptr;
1230 Type
*Ty
= FrameTy
->getElementType(Index
);
1231 assert(Ty
->isSized() && "We can't handle type which is not sized.\n");
1232 SizeInBits
= Layout
.getTypeSizeInBits(Ty
).getFixedValue();
1233 AlignInBits
= OffsetCache
[Index
].first
* 8;
1234 OffsetInBits
= OffsetCache
[Index
].second
* 8;
1236 if (NameCache
.contains(Index
)) {
1237 Name
= NameCache
[Index
].str();
1238 DITy
= TyCache
[Index
];
1240 DITy
= solveDIType(DBuilder
, Ty
, Layout
, FrameDITy
, LineNum
, DITypeCache
);
1241 assert(DITy
&& "SolveDIType shouldn't return nullptr.\n");
1242 Name
= DITy
->getName().str();
1243 Name
+= "_" + std::to_string(UnknownTypeNum
);
1247 Elements
.push_back(DBuilder
.createMemberType(
1248 FrameDITy
, Name
, DFile
, LineNum
, SizeInBits
, AlignInBits
, OffsetInBits
,
1249 llvm::DINode::FlagArtificial
, DITy
));
1252 DBuilder
.replaceArrays(FrameDITy
, DBuilder
.getOrCreateArray(Elements
));
1254 auto *FrameDIVar
= DBuilder
.createAutoVariable(PromiseDIScope
, "__coro_frame",
1255 DFile
, LineNum
, FrameDITy
,
1256 true, DINode::FlagArtificial
);
1257 assert(FrameDIVar
->isValidLocationForIntrinsic(DILoc
));
1259 // Subprogram would have ContainedNodes field which records the debug
1260 // variables it contained. So we need to add __coro_frame to the
1261 // ContainedNodes of it.
1263 // If we don't add __coro_frame to the RetainedNodes, user may get
1264 // `no symbol __coro_frame in context` rather than `__coro_frame`
1265 // is optimized out, which is more precise.
1266 if (auto *SubProgram
= dyn_cast
<DISubprogram
>(PromiseDIScope
)) {
1267 auto RetainedNodes
= SubProgram
->getRetainedNodes();
1268 SmallVector
<Metadata
*, 32> RetainedNodesVec(RetainedNodes
.begin(),
1269 RetainedNodes
.end());
1270 RetainedNodesVec
.push_back(FrameDIVar
);
1271 SubProgram
->replaceOperandWith(
1272 7, (MDTuple::get(F
.getContext(), RetainedNodesVec
)));
1275 if (UseNewDbgInfoFormat
) {
1276 DPValue
*NewDPV
= new DPValue(ValueAsMetadata::get(Shape
.FramePtr
),
1277 FrameDIVar
, DBuilder
.createExpression(),
1278 DILoc
, DPValue::LocationType::Declare
);
1279 BasicBlock::iterator It
= Shape
.getInsertPtAfterFramePtr();
1280 It
->getParent()->insertDPValueBefore(NewDPV
, It
);
1282 DBuilder
.insertDeclare(Shape
.FramePtr
, FrameDIVar
,
1283 DBuilder
.createExpression(), DILoc
,
1284 &*Shape
.getInsertPtAfterFramePtr());
1288 // Build a struct that will keep state for an active coroutine.
1290 // ResumeFnTy ResumeFnAddr;
1291 // ResumeFnTy DestroyFnAddr;
1292 // ... promise (if present) ...
1296 static StructType
*buildFrameType(Function
&F
, coro::Shape
&Shape
,
1297 FrameDataInfo
&FrameData
) {
1298 LLVMContext
&C
= F
.getContext();
1299 const DataLayout
&DL
= F
.getParent()->getDataLayout();
1300 StructType
*FrameTy
= [&] {
1301 SmallString
<32> Name(F
.getName());
1302 Name
.append(".Frame");
1303 return StructType::create(C
, Name
);
1306 // We will use this value to cap the alignment of spilled values.
1307 std::optional
<Align
> MaxFrameAlignment
;
1308 if (Shape
.ABI
== coro::ABI::Async
)
1309 MaxFrameAlignment
= Shape
.AsyncLowering
.getContextAlignment();
1310 FrameTypeBuilder
B(C
, DL
, MaxFrameAlignment
);
1312 AllocaInst
*PromiseAlloca
= Shape
.getPromiseAlloca();
1313 std::optional
<FieldIDType
> SwitchIndexFieldId
;
1315 if (Shape
.ABI
== coro::ABI::Switch
) {
1316 auto *FnPtrTy
= PointerType::getUnqual(C
);
1318 // Add header fields for the resume and destroy functions.
1319 // We can rely on these being perfectly packed.
1320 (void)B
.addField(FnPtrTy
, std::nullopt
, /*header*/ true);
1321 (void)B
.addField(FnPtrTy
, std::nullopt
, /*header*/ true);
1323 // PromiseAlloca field needs to be explicitly added here because it's
1324 // a header field with a fixed offset based on its alignment. Hence it
1325 // needs special handling and cannot be added to FrameData.Allocas.
1327 FrameData
.setFieldIndex(
1328 PromiseAlloca
, B
.addFieldForAlloca(PromiseAlloca
, /*header*/ true));
1330 // Add a field to store the suspend index. This doesn't need to
1331 // be in the header.
1332 unsigned IndexBits
= std::max(1U, Log2_64_Ceil(Shape
.CoroSuspends
.size()));
1333 Type
*IndexType
= Type::getIntNTy(C
, IndexBits
);
1335 SwitchIndexFieldId
= B
.addField(IndexType
, std::nullopt
);
1337 assert(PromiseAlloca
== nullptr && "lowering doesn't support promises");
1340 // Because multiple allocas may own the same field slot,
1341 // we add allocas to field here.
1342 B
.addFieldForAllocas(F
, FrameData
, Shape
);
1343 // Add PromiseAlloca to Allocas list so that
1344 // 1. updateLayoutIndex could update its index after
1345 // `performOptimizedStructLayout`
1346 // 2. it is processed in insertSpills.
1347 if (Shape
.ABI
== coro::ABI::Switch
&& PromiseAlloca
)
1348 // We assume that the promise alloca won't be modified before
1349 // CoroBegin and no alias will be create before CoroBegin.
1350 FrameData
.Allocas
.emplace_back(
1351 PromiseAlloca
, DenseMap
<Instruction
*, std::optional
<APInt
>>{}, false);
1352 // Create an entry for every spilled value.
1353 for (auto &S
: FrameData
.Spills
) {
1354 Type
*FieldType
= S
.first
->getType();
1355 // For byval arguments, we need to store the pointed value in the frame,
1356 // instead of the pointer itself.
1357 if (const Argument
*A
= dyn_cast
<Argument
>(S
.first
))
1358 if (A
->hasByValAttr())
1359 FieldType
= A
->getParamByValType();
1360 FieldIDType Id
= B
.addField(FieldType
, std::nullopt
, false /*header*/,
1361 true /*IsSpillOfValue*/);
1362 FrameData
.setFieldIndex(S
.first
, Id
);
1366 FrameData
.updateLayoutIndex(B
);
1367 Shape
.FrameAlign
= B
.getStructAlign();
1368 Shape
.FrameSize
= B
.getStructSize();
1370 switch (Shape
.ABI
) {
1371 case coro::ABI::Switch
: {
1372 // In the switch ABI, remember the switch-index field.
1373 auto IndexField
= B
.getLayoutField(*SwitchIndexFieldId
);
1374 Shape
.SwitchLowering
.IndexField
= IndexField
.LayoutFieldIndex
;
1375 Shape
.SwitchLowering
.IndexAlign
= IndexField
.Alignment
.value();
1376 Shape
.SwitchLowering
.IndexOffset
= IndexField
.Offset
;
1378 // Also round the frame size up to a multiple of its alignment, as is
1379 // generally expected in C/C++.
1380 Shape
.FrameSize
= alignTo(Shape
.FrameSize
, Shape
.FrameAlign
);
1384 // In the retcon ABI, remember whether the frame is inline in the storage.
1385 case coro::ABI::Retcon
:
1386 case coro::ABI::RetconOnce
: {
1387 auto Id
= Shape
.getRetconCoroId();
1388 Shape
.RetconLowering
.IsFrameInlineInStorage
1389 = (B
.getStructSize() <= Id
->getStorageSize() &&
1390 B
.getStructAlign() <= Id
->getStorageAlignment());
1393 case coro::ABI::Async
: {
1394 Shape
.AsyncLowering
.FrameOffset
=
1395 alignTo(Shape
.AsyncLowering
.ContextHeaderSize
, Shape
.FrameAlign
);
1396 // Also make the final context size a multiple of the context alignment to
1397 // make allocation easier for allocators.
1398 Shape
.AsyncLowering
.ContextSize
=
1399 alignTo(Shape
.AsyncLowering
.FrameOffset
+ Shape
.FrameSize
,
1400 Shape
.AsyncLowering
.getContextAlignment());
1401 if (Shape
.AsyncLowering
.getContextAlignment() < Shape
.FrameAlign
) {
1403 "The alignment requirment of frame variables cannot be higher than "
1404 "the alignment of the async function context");
1413 // We use a pointer use visitor to track how an alloca is being used.
1414 // The goal is to be able to answer the following three questions:
1415 // 1. Should this alloca be allocated on the frame instead.
1416 // 2. Could the content of the alloca be modified prior to CoroBegn, which would
1417 // require copying the data from alloca to the frame after CoroBegin.
1418 // 3. Is there any alias created for this alloca prior to CoroBegin, but used
1419 // after CoroBegin. In that case, we will need to recreate the alias after
1420 // CoroBegin based off the frame. To answer question 1, we track two things:
1421 // a. List of all BasicBlocks that use this alloca or any of the aliases of
1422 // the alloca. In the end, we check if there exists any two basic blocks that
1423 // cross suspension points. If so, this alloca must be put on the frame. b.
1424 // Whether the alloca or any alias of the alloca is escaped at some point,
1425 // either by storing the address somewhere, or the address is used in a
1426 // function call that might capture. If it's ever escaped, this alloca must be
1427 // put on the frame conservatively.
1428 // To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
1429 // Whenever a potential write happens, either through a store instruction, a
1430 // function call or any of the memory intrinsics, we check whether this
1431 // instruction is prior to CoroBegin. To answer question 3, we track the offsets
1432 // of all aliases created for the alloca prior to CoroBegin but used after
1433 // CoroBegin. std::optional is used to be able to represent the case when the
1434 // offset is unknown (e.g. when you have a PHINode that takes in different
1435 // offset values). We cannot handle unknown offsets and will assert. This is the
1436 // potential issue left out. An ideal solution would likely require a
1437 // significant redesign.
1439 struct AllocaUseVisitor
: PtrUseVisitor
<AllocaUseVisitor
> {
1440 using Base
= PtrUseVisitor
<AllocaUseVisitor
>;
1441 AllocaUseVisitor(const DataLayout
&DL
, const DominatorTree
&DT
,
1442 const CoroBeginInst
&CB
, const SuspendCrossingInfo
&Checker
,
1443 bool ShouldUseLifetimeStartInfo
)
1444 : PtrUseVisitor(DL
), DT(DT
), CoroBegin(CB
), Checker(Checker
),
1445 ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo
) {}
1447 void visit(Instruction
&I
) {
1450 // If the pointer is escaped prior to CoroBegin, we have to assume it would
1451 // be written into before CoroBegin as well.
1452 if (PI
.isEscaped() && !DT
.dominates(&CoroBegin
, PI
.getEscapingInst())) {
1453 MayWriteBeforeCoroBegin
= true;
1456 // We need to provide this overload as PtrUseVisitor uses a pointer based
1457 // visiting function.
1458 void visit(Instruction
*I
) { return visit(*I
); }
1460 void visitPHINode(PHINode
&I
) {
1465 void visitSelectInst(SelectInst
&I
) {
1470 void visitStoreInst(StoreInst
&SI
) {
1471 // Regardless whether the alias of the alloca is the value operand or the
1472 // pointer operand, we need to assume the alloca is been written.
1475 if (SI
.getValueOperand() != U
->get())
1478 // We are storing the pointer into a memory location, potentially escaping.
1479 // As an optimization, we try to detect simple cases where it doesn't
1480 // actually escape, for example:
1482 // %addr = alloca ..
1483 // store %ptr, %addr
1486 // If %addr is only used by loading from it, we could simply treat %x as
1487 // another alias of %ptr, and not considering %ptr being escaped.
1488 auto IsSimpleStoreThenLoad
= [&]() {
1489 auto *AI
= dyn_cast
<AllocaInst
>(SI
.getPointerOperand());
1490 // If the memory location we are storing to is not an alloca, it
1491 // could be an alias of some other memory locations, which is difficult
1495 // StoreAliases contains aliases of the memory location stored into.
1496 SmallVector
<Instruction
*, 4> StoreAliases
= {AI
};
1497 while (!StoreAliases
.empty()) {
1498 Instruction
*I
= StoreAliases
.pop_back_val();
1499 for (User
*U
: I
->users()) {
1500 // If we are loading from the memory location, we are creating an
1501 // alias of the original pointer.
1502 if (auto *LI
= dyn_cast
<LoadInst
>(U
)) {
1507 // If we are overriding the memory location, the pointer certainly
1509 if (auto *S
= dyn_cast
<StoreInst
>(U
))
1510 if (S
->getPointerOperand() == I
)
1512 if (auto *II
= dyn_cast
<IntrinsicInst
>(U
))
1513 if (II
->isLifetimeStartOrEnd())
1515 // BitCastInst creats aliases of the memory location being stored
1517 if (auto *BI
= dyn_cast
<BitCastInst
>(U
)) {
1518 StoreAliases
.push_back(BI
);
1528 if (!IsSimpleStoreThenLoad())
1532 // All mem intrinsics modify the data.
1533 void visitMemIntrinsic(MemIntrinsic
&MI
) { handleMayWrite(MI
); }
1535 void visitBitCastInst(BitCastInst
&BC
) {
1536 Base::visitBitCastInst(BC
);
1540 void visitAddrSpaceCastInst(AddrSpaceCastInst
&ASC
) {
1541 Base::visitAddrSpaceCastInst(ASC
);
1545 void visitGetElementPtrInst(GetElementPtrInst
&GEPI
) {
1546 // The base visitor will adjust Offset accordingly.
1547 Base::visitGetElementPtrInst(GEPI
);
1551 void visitIntrinsicInst(IntrinsicInst
&II
) {
1552 // When we found the lifetime markers refers to a
1553 // subrange of the original alloca, ignore the lifetime
1554 // markers to avoid misleading the analysis.
1555 if (II
.getIntrinsicID() != Intrinsic::lifetime_start
|| !IsOffsetKnown
||
1557 return Base::visitIntrinsicInst(II
);
1558 LifetimeStarts
.insert(&II
);
1561 void visitCallBase(CallBase
&CB
) {
1562 for (unsigned Op
= 0, OpCount
= CB
.arg_size(); Op
< OpCount
; ++Op
)
1563 if (U
->get() == CB
.getArgOperand(Op
) && !CB
.doesNotCapture(Op
))
1568 bool getShouldLiveOnFrame() const {
1569 if (!ShouldLiveOnFrame
)
1570 ShouldLiveOnFrame
= computeShouldLiveOnFrame();
1571 return *ShouldLiveOnFrame
;
1574 bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin
; }
1576 DenseMap
<Instruction
*, std::optional
<APInt
>> getAliasesCopy() const {
1577 assert(getShouldLiveOnFrame() && "This method should only be called if the "
1578 "alloca needs to live on the frame.");
1579 for (const auto &P
: AliasOffetMap
)
1581 report_fatal_error("Unable to handle an alias with unknown offset "
1582 "created before CoroBegin.");
1583 return AliasOffetMap
;
1587 const DominatorTree
&DT
;
1588 const CoroBeginInst
&CoroBegin
;
1589 const SuspendCrossingInfo
&Checker
;
1590 // All alias to the original AllocaInst, created before CoroBegin and used
1591 // after CoroBegin. Each entry contains the instruction and the offset in the
1592 // original Alloca. They need to be recreated after CoroBegin off the frame.
1593 DenseMap
<Instruction
*, std::optional
<APInt
>> AliasOffetMap
{};
1594 SmallPtrSet
<Instruction
*, 4> Users
{};
1595 SmallPtrSet
<IntrinsicInst
*, 2> LifetimeStarts
{};
1596 bool MayWriteBeforeCoroBegin
{false};
1597 bool ShouldUseLifetimeStartInfo
{true};
1599 mutable std::optional
<bool> ShouldLiveOnFrame
{};
1601 bool computeShouldLiveOnFrame() const {
1602 // If lifetime information is available, we check it first since it's
1603 // more precise. We look at every pair of lifetime.start intrinsic and
1604 // every basic block that uses the pointer to see if they cross suspension
1605 // points. The uses cover both direct uses as well as indirect uses.
1606 if (ShouldUseLifetimeStartInfo
&& !LifetimeStarts
.empty()) {
1607 for (auto *I
: Users
)
1608 for (auto *S
: LifetimeStarts
)
1609 if (Checker
.isDefinitionAcrossSuspend(*S
, I
))
1611 // Addresses are guaranteed to be identical after every lifetime.start so
1612 // we cannot use the local stack if the address escaped and there is a
1613 // suspend point between lifetime markers. This should also cover the
1614 // case of a single lifetime.start intrinsic in a loop with suspend point.
1615 if (PI
.isEscaped()) {
1616 for (auto *A
: LifetimeStarts
) {
1617 for (auto *B
: LifetimeStarts
) {
1618 if (Checker
.hasPathOrLoopCrossingSuspendPoint(A
->getParent(),
1626 // FIXME: Ideally the isEscaped check should come at the beginning.
1627 // However there are a few loose ends that need to be fixed first before
1628 // we can do that. We need to make sure we are not over-conservative, so
1629 // that the data accessed in-between await_suspend and symmetric transfer
1630 // is always put on the stack, and also data accessed after coro.end is
1631 // always put on the stack (esp the return object). To fix that, we need
1633 // 1) Potentially treat sret as nocapture in calls
1634 // 2) Special handle the return object and put it on the stack
1635 // 3) Utilize lifetime.end intrinsic
1639 for (auto *U1
: Users
)
1640 for (auto *U2
: Users
)
1641 if (Checker
.isDefinitionAcrossSuspend(*U1
, U2
))
1647 void handleMayWrite(const Instruction
&I
) {
1648 if (!DT
.dominates(&CoroBegin
, &I
))
1649 MayWriteBeforeCoroBegin
= true;
1652 bool usedAfterCoroBegin(Instruction
&I
) {
1653 for (auto &U
: I
.uses())
1654 if (DT
.dominates(&CoroBegin
, U
))
1659 void handleAlias(Instruction
&I
) {
1660 // We track all aliases created prior to CoroBegin but used after.
1661 // These aliases may need to be recreated after CoroBegin if the alloca
1662 // need to live on the frame.
1663 if (DT
.dominates(&CoroBegin
, &I
) || !usedAfterCoroBegin(I
))
1666 if (!IsOffsetKnown
) {
1667 AliasOffetMap
[&I
].reset();
1669 auto Itr
= AliasOffetMap
.find(&I
);
1670 if (Itr
== AliasOffetMap
.end()) {
1671 AliasOffetMap
[&I
] = Offset
;
1672 } else if (Itr
->second
&& *Itr
->second
!= Offset
) {
1673 // If we have seen two different possible values for this alias, we set
1675 AliasOffetMap
[&I
].reset();
1682 // We need to make room to insert a spill after initial PHIs, but before
1683 // catchswitch instruction. Placing it before violates the requirement that
1684 // catchswitch, like all other EHPads must be the first nonPHI in a block.
1686 // Split away catchswitch into a separate block and insert in its place:
1688 // cleanuppad <InsertPt> cleanupret.
1690 // cleanupret instruction will act as an insert point for the spill.
1691 static Instruction
*splitBeforeCatchSwitch(CatchSwitchInst
*CatchSwitch
) {
1692 BasicBlock
*CurrentBlock
= CatchSwitch
->getParent();
1693 BasicBlock
*NewBlock
= CurrentBlock
->splitBasicBlock(CatchSwitch
);
1694 CurrentBlock
->getTerminator()->eraseFromParent();
1697 CleanupPadInst::Create(CatchSwitch
->getParentPad(), {}, "", CurrentBlock
);
1699 CleanupReturnInst::Create(CleanupPad
, NewBlock
, CurrentBlock
);
1703 // Replace all alloca and SSA values that are accessed across suspend points
1704 // with GetElementPointer from coroutine frame + loads and stores. Create an
1705 // AllocaSpillBB that will become the new entry block for the resume parts of
1708 // %hdl = coro.begin(...)
1713 // %hdl = coro.begin(...)
1714 // br label %AllocaSpillBB
1717 // ; geps corresponding to allocas that were moved to coroutine frame
1718 // br label PostSpill
1724 static void insertSpills(const FrameDataInfo
&FrameData
, coro::Shape
&Shape
) {
1725 auto *CB
= Shape
.CoroBegin
;
1726 LLVMContext
&C
= CB
->getContext();
1727 Function
*F
= CB
->getFunction();
1728 IRBuilder
<> Builder(C
);
1729 StructType
*FrameTy
= Shape
.FrameTy
;
1730 Value
*FramePtr
= Shape
.FramePtr
;
1731 DominatorTree
DT(*F
);
1732 SmallDenseMap
<Argument
*, AllocaInst
*, 4> ArgToAllocaMap
;
1734 // Create a GEP with the given index into the coroutine frame for the original
1735 // value Orig. Appends an extra 0 index for array-allocas, preserving the
1737 auto GetFramePointer
= [&](Value
*Orig
) -> Value
* {
1738 FieldIDType Index
= FrameData
.getFieldIndex(Orig
);
1739 SmallVector
<Value
*, 3> Indices
= {
1740 ConstantInt::get(Type::getInt32Ty(C
), 0),
1741 ConstantInt::get(Type::getInt32Ty(C
), Index
),
1744 if (auto *AI
= dyn_cast
<AllocaInst
>(Orig
)) {
1745 if (auto *CI
= dyn_cast
<ConstantInt
>(AI
->getArraySize())) {
1746 auto Count
= CI
->getValue().getZExtValue();
1748 Indices
.push_back(ConstantInt::get(Type::getInt32Ty(C
), 0));
1751 report_fatal_error("Coroutines cannot handle non static allocas yet");
1755 auto GEP
= cast
<GetElementPtrInst
>(
1756 Builder
.CreateInBoundsGEP(FrameTy
, FramePtr
, Indices
));
1757 if (auto *AI
= dyn_cast
<AllocaInst
>(Orig
)) {
1758 if (FrameData
.getDynamicAlign(Orig
) != 0) {
1759 assert(FrameData
.getDynamicAlign(Orig
) == AI
->getAlign().value());
1760 auto *M
= AI
->getModule();
1761 auto *IntPtrTy
= M
->getDataLayout().getIntPtrType(AI
->getType());
1762 auto *PtrValue
= Builder
.CreatePtrToInt(GEP
, IntPtrTy
);
1764 ConstantInt::get(IntPtrTy
, AI
->getAlign().value() - 1);
1765 PtrValue
= Builder
.CreateAdd(PtrValue
, AlignMask
);
1766 PtrValue
= Builder
.CreateAnd(PtrValue
, Builder
.CreateNot(AlignMask
));
1767 return Builder
.CreateIntToPtr(PtrValue
, AI
->getType());
1769 // If the type of GEP is not equal to the type of AllocaInst, it implies
1770 // that the AllocaInst may be reused in the Frame slot of other
1771 // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1772 // the Frame storage.
1774 // Note: If we change the strategy dealing with alignment, we need to refine
1776 if (GEP
->getType() != Orig
->getType())
1777 return Builder
.CreateAddrSpaceCast(GEP
, Orig
->getType(),
1778 Orig
->getName() + Twine(".cast"));
1783 for (auto const &E
: FrameData
.Spills
) {
1784 Value
*Def
= E
.first
;
1785 auto SpillAlignment
= Align(FrameData
.getAlign(Def
));
1786 // Create a store instruction storing the value into the
1788 BasicBlock::iterator InsertPt
;
1789 Type
*ByValTy
= nullptr;
1790 if (auto *Arg
= dyn_cast
<Argument
>(Def
)) {
1791 // For arguments, we will place the store instruction right after
1792 // the coroutine frame pointer instruction, i.e. coro.begin.
1793 InsertPt
= Shape
.getInsertPtAfterFramePtr();
1795 // If we're spilling an Argument, make sure we clear 'nocapture'
1796 // from the coroutine function.
1797 Arg
->getParent()->removeParamAttr(Arg
->getArgNo(), Attribute::NoCapture
);
1799 if (Arg
->hasByValAttr())
1800 ByValTy
= Arg
->getParamByValType();
1801 } else if (auto *CSI
= dyn_cast
<AnyCoroSuspendInst
>(Def
)) {
1802 // Don't spill immediately after a suspend; splitting assumes
1803 // that the suspend will be followed by a branch.
1804 InsertPt
= CSI
->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
1806 auto *I
= cast
<Instruction
>(Def
);
1807 if (!DT
.dominates(CB
, I
)) {
1808 // If it is not dominated by CoroBegin, then spill should be
1809 // inserted immediately after CoroFrame is computed.
1810 InsertPt
= Shape
.getInsertPtAfterFramePtr();
1811 } else if (auto *II
= dyn_cast
<InvokeInst
>(I
)) {
1812 // If we are spilling the result of the invoke instruction, split
1813 // the normal edge and insert the spill in the new block.
1814 auto *NewBB
= SplitEdge(II
->getParent(), II
->getNormalDest());
1815 InsertPt
= NewBB
->getTerminator()->getIterator();
1816 } else if (isa
<PHINode
>(I
)) {
1817 // Skip the PHINodes and EH pads instructions.
1818 BasicBlock
*DefBlock
= I
->getParent();
1819 if (auto *CSI
= dyn_cast
<CatchSwitchInst
>(DefBlock
->getTerminator()))
1820 InsertPt
= splitBeforeCatchSwitch(CSI
)->getIterator();
1822 InsertPt
= DefBlock
->getFirstInsertionPt();
1824 assert(!I
->isTerminator() && "unexpected terminator");
1825 // For all other values, the spill is placed immediately after
1827 InsertPt
= I
->getNextNode()->getIterator();
1831 auto Index
= FrameData
.getFieldIndex(Def
);
1832 Builder
.SetInsertPoint(InsertPt
->getParent(), InsertPt
);
1833 auto *G
= Builder
.CreateConstInBoundsGEP2_32(
1834 FrameTy
, FramePtr
, 0, Index
, Def
->getName() + Twine(".spill.addr"));
1836 // For byval arguments, we need to store the pointed value in the frame,
1837 // instead of the pointer itself.
1838 auto *Value
= Builder
.CreateLoad(ByValTy
, Def
);
1839 Builder
.CreateAlignedStore(Value
, G
, SpillAlignment
);
1841 Builder
.CreateAlignedStore(Def
, G
, SpillAlignment
);
1844 BasicBlock
*CurrentBlock
= nullptr;
1845 Value
*CurrentReload
= nullptr;
1846 for (auto *U
: E
.second
) {
1847 // If we have not seen the use block, create a load instruction to reload
1848 // the spilled value from the coroutine frame. Populates the Value pointer
1849 // reference provided with the frame GEP.
1850 if (CurrentBlock
!= U
->getParent()) {
1851 CurrentBlock
= U
->getParent();
1852 Builder
.SetInsertPoint(CurrentBlock
,
1853 CurrentBlock
->getFirstInsertionPt());
1855 auto *GEP
= GetFramePointer(E
.first
);
1856 GEP
->setName(E
.first
->getName() + Twine(".reload.addr"));
1858 CurrentReload
= GEP
;
1860 CurrentReload
= Builder
.CreateAlignedLoad(
1861 FrameTy
->getElementType(FrameData
.getFieldIndex(E
.first
)), GEP
,
1862 SpillAlignment
, E
.first
->getName() + Twine(".reload"));
1864 TinyPtrVector
<DbgDeclareInst
*> DIs
= findDbgDeclares(Def
);
1865 TinyPtrVector
<DPValue
*> DPVs
= findDPVDeclares(Def
);
1866 // Try best to find dbg.declare. If the spill is a temp, there may not
1867 // be a direct dbg.declare. Walk up the load chain to find one from an
1869 if (F
->getSubprogram()) {
1871 while (DIs
.empty() && DPVs
.empty() && isa
<LoadInst
>(CurDef
)) {
1872 auto *LdInst
= cast
<LoadInst
>(CurDef
);
1873 // Only consider ptr to ptr same type load.
1874 if (LdInst
->getPointerOperandType() != LdInst
->getType())
1876 CurDef
= LdInst
->getPointerOperand();
1877 if (!isa
<AllocaInst
, LoadInst
>(CurDef
))
1879 DIs
= findDbgDeclares(CurDef
);
1880 DPVs
= findDPVDeclares(CurDef
);
1884 auto SalvageOne
= [&](auto *DDI
) {
1885 bool AllowUnresolved
= false;
1886 // This dbg.declare is preserved for all coro-split function
1887 // fragments. It will be unreachable in the main function, and
1888 // processed by coro::salvageDebugInfo() by CoroCloner.
1889 if (UseNewDbgInfoFormat
) {
1891 new DPValue(ValueAsMetadata::get(CurrentReload
),
1892 DDI
->getVariable(), DDI
->getExpression(),
1893 DDI
->getDebugLoc(), DPValue::LocationType::Declare
);
1894 Builder
.GetInsertPoint()->getParent()->insertDPValueBefore(
1895 NewDPV
, Builder
.GetInsertPoint());
1897 DIBuilder(*CurrentBlock
->getParent()->getParent(), AllowUnresolved
)
1898 .insertDeclare(CurrentReload
, DDI
->getVariable(),
1899 DDI
->getExpression(), DDI
->getDebugLoc(),
1900 &*Builder
.GetInsertPoint());
1902 // This dbg.declare is for the main function entry point. It
1903 // will be deleted in all coro-split functions.
1904 coro::salvageDebugInfo(ArgToAllocaMap
, *DDI
, Shape
.OptimizeFrame
,
1905 false /*UseEntryValue*/);
1907 for_each(DIs
, SalvageOne
);
1908 for_each(DPVs
, SalvageOne
);
1911 // If we have a single edge PHINode, remove it and replace it with a
1912 // reload from the coroutine frame. (We already took care of multi edge
1913 // PHINodes by rewriting them in the rewritePHIs function).
1914 if (auto *PN
= dyn_cast
<PHINode
>(U
)) {
1915 assert(PN
->getNumIncomingValues() == 1 &&
1916 "unexpected number of incoming "
1917 "values in the PHINode");
1918 PN
->replaceAllUsesWith(CurrentReload
);
1919 PN
->eraseFromParent();
1923 // Replace all uses of CurrentValue in the current instruction with
1925 U
->replaceUsesOfWith(Def
, CurrentReload
);
1926 // Instructions are added to Def's user list if the attached
1927 // debug records use Def. Update those now.
1928 for (auto &DPV
: U
->getDbgValueRange())
1929 DPV
.replaceVariableLocationOp(Def
, CurrentReload
, true);
1933 BasicBlock
*FramePtrBB
= Shape
.getInsertPtAfterFramePtr()->getParent();
1935 auto SpillBlock
= FramePtrBB
->splitBasicBlock(
1936 Shape
.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1937 SpillBlock
->splitBasicBlock(&SpillBlock
->front(), "PostSpill");
1938 Shape
.AllocaSpillBlock
= SpillBlock
;
1940 // retcon and retcon.once lowering assumes all uses have been sunk.
1941 if (Shape
.ABI
== coro::ABI::Retcon
|| Shape
.ABI
== coro::ABI::RetconOnce
||
1942 Shape
.ABI
== coro::ABI::Async
) {
1943 // If we found any allocas, replace all of their remaining uses with Geps.
1944 Builder
.SetInsertPoint(SpillBlock
, SpillBlock
->begin());
1945 for (const auto &P
: FrameData
.Allocas
) {
1946 AllocaInst
*Alloca
= P
.Alloca
;
1947 auto *G
= GetFramePointer(Alloca
);
1949 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1950 // here, as we are changing location of the instruction.
1951 G
->takeName(Alloca
);
1952 Alloca
->replaceAllUsesWith(G
);
1953 Alloca
->eraseFromParent();
1958 // If we found any alloca, replace all of their remaining uses with GEP
1959 // instructions. To remain debugbility, we replace the uses of allocas for
1960 // dbg.declares and dbg.values with the reload from the frame.
1961 // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1962 // as some of the uses may not be dominated by CoroBegin.
1963 Builder
.SetInsertPoint(Shape
.AllocaSpillBlock
,
1964 Shape
.AllocaSpillBlock
->begin());
1965 SmallVector
<Instruction
*, 4> UsersToUpdate
;
1966 for (const auto &A
: FrameData
.Allocas
) {
1967 AllocaInst
*Alloca
= A
.Alloca
;
1968 UsersToUpdate
.clear();
1969 for (User
*U
: Alloca
->users()) {
1970 auto *I
= cast
<Instruction
>(U
);
1971 if (DT
.dominates(CB
, I
))
1972 UsersToUpdate
.push_back(I
);
1974 if (UsersToUpdate
.empty())
1976 auto *G
= GetFramePointer(Alloca
);
1977 G
->setName(Alloca
->getName() + Twine(".reload.addr"));
1979 SmallVector
<DbgVariableIntrinsic
*, 4> DIs
;
1980 SmallVector
<DPValue
*> DPValues
;
1981 findDbgUsers(DIs
, Alloca
, &DPValues
);
1982 for (auto *DVI
: DIs
)
1983 DVI
->replaceUsesOfWith(Alloca
, G
);
1984 for (auto *DPV
: DPValues
)
1985 DPV
->replaceVariableLocationOp(Alloca
, G
);
1987 for (Instruction
*I
: UsersToUpdate
) {
1988 // It is meaningless to retain the lifetime intrinsics refer for the
1989 // member of coroutine frames and the meaningless lifetime intrinsics
1990 // are possible to block further optimizations.
1991 if (I
->isLifetimeStartOrEnd()) {
1992 I
->eraseFromParent();
1996 I
->replaceUsesOfWith(Alloca
, G
);
1999 Builder
.SetInsertPoint(&*Shape
.getInsertPtAfterFramePtr());
2000 for (const auto &A
: FrameData
.Allocas
) {
2001 AllocaInst
*Alloca
= A
.Alloca
;
2002 if (A
.MayWriteBeforeCoroBegin
) {
2003 // isEscaped really means potentially modified before CoroBegin.
2004 if (Alloca
->isArrayAllocation())
2006 "Coroutines cannot handle copying of array allocas yet");
2008 auto *G
= GetFramePointer(Alloca
);
2009 auto *Value
= Builder
.CreateLoad(Alloca
->getAllocatedType(), Alloca
);
2010 Builder
.CreateStore(Value
, G
);
2012 // For each alias to Alloca created before CoroBegin but used after
2013 // CoroBegin, we recreate them after CoroBegin by appplying the offset
2014 // to the pointer in the frame.
2015 for (const auto &Alias
: A
.Aliases
) {
2016 auto *FramePtr
= GetFramePointer(Alloca
);
2017 auto &Value
= *Alias
.second
;
2018 auto ITy
= IntegerType::get(C
, Value
.getBitWidth());
2020 Builder
.CreatePtrAdd(FramePtr
, ConstantInt::get(ITy
, Value
));
2021 Alias
.first
->replaceUsesWithIf(
2022 AliasPtr
, [&](Use
&U
) { return DT
.dominates(CB
, U
); });
2026 // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
2027 // the case that the PromiseAlloca may have writes before CoroBegin in the
2028 // above codes. And it may be problematic in edge cases. See
2029 // https://github.com/llvm/llvm-project/issues/57861 for an example.
2030 if (Shape
.ABI
== coro::ABI::Switch
&& Shape
.SwitchLowering
.PromiseAlloca
) {
2031 AllocaInst
*PA
= Shape
.SwitchLowering
.PromiseAlloca
;
2032 // If there is memory accessing to promise alloca before CoroBegin;
2033 bool HasAccessingPromiseBeforeCB
= llvm::any_of(PA
->uses(), [&](Use
&U
) {
2034 auto *Inst
= dyn_cast
<Instruction
>(U
.getUser());
2035 if (!Inst
|| DT
.dominates(CB
, Inst
))
2038 if (auto *CI
= dyn_cast
<CallInst
>(Inst
)) {
2039 // It is fine if the call wouldn't write to the Promise.
2040 // This is possible for @llvm.coro.id intrinsics, which
2041 // would take the promise as the second argument as a
2043 if (CI
->onlyReadsMemory() ||
2044 CI
->onlyReadsMemory(CI
->getArgOperandNo(&U
)))
2049 return isa
<StoreInst
>(Inst
) ||
2050 // It may take too much time to track the uses.
2051 // Be conservative about the case the use may escape.
2052 isa
<GetElementPtrInst
>(Inst
) ||
2053 // There would always be a bitcast for the promise alloca
2054 // before we enabled Opaque pointers. And now given
2055 // opaque pointers are enabled by default. This should be
2057 isa
<BitCastInst
>(Inst
);
2059 if (HasAccessingPromiseBeforeCB
) {
2060 Builder
.SetInsertPoint(&*Shape
.getInsertPtAfterFramePtr());
2061 auto *G
= GetFramePointer(PA
);
2062 auto *Value
= Builder
.CreateLoad(PA
->getAllocatedType(), PA
);
2063 Builder
.CreateStore(Value
, G
);
2068 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
2069 // PHI in InsertedBB.
2070 static void movePHIValuesToInsertedBlock(BasicBlock
*SuccBB
,
2071 BasicBlock
*InsertedBB
,
2073 PHINode
*UntilPHI
= nullptr) {
2074 auto *PN
= cast
<PHINode
>(&SuccBB
->front());
2076 int Index
= PN
->getBasicBlockIndex(InsertedBB
);
2077 Value
*V
= PN
->getIncomingValue(Index
);
2078 PHINode
*InputV
= PHINode::Create(
2079 V
->getType(), 1, V
->getName() + Twine(".") + SuccBB
->getName());
2080 InputV
->insertBefore(InsertedBB
->begin());
2081 InputV
->addIncoming(V
, PredBB
);
2082 PN
->setIncomingValue(Index
, InputV
);
2083 PN
= dyn_cast
<PHINode
>(PN
->getNextNode());
2084 } while (PN
!= UntilPHI
);
2087 // Rewrites the PHI Nodes in a cleanuppad.
2088 static void rewritePHIsForCleanupPad(BasicBlock
*CleanupPadBB
,
2089 CleanupPadInst
*CleanupPad
) {
2090 // For every incoming edge to a CleanupPad we will create a new block holding
2091 // all incoming values in single-value PHI nodes. We will then create another
2092 // block to act as a dispather (as all unwind edges for related EH blocks
2093 // must be the same).
2096 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
2097 // %3 = cleanuppad within none []
2101 // cleanuppad.corodispatch
2102 // %2 = phi i8[0, %catchswitch], [1, %catch.1]
2103 // %3 = cleanuppad within none []
2104 // switch i8 % 2, label %unreachable
2105 // [i8 0, label %cleanuppad.from.catchswitch
2106 // i8 1, label %cleanuppad.from.catch.1]
2107 // cleanuppad.from.catchswitch:
2108 // %4 = phi i32 [%0, %catchswitch]
2109 // br %label cleanuppad
2110 // cleanuppad.from.catch.1:
2111 // %6 = phi i32 [%1, %catch.1]
2112 // br %label cleanuppad
2114 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
2115 // [%6, %cleanuppad.from.catch.1]
2117 // Unreachable BB, in case switching on an invalid value in the dispatcher.
2118 auto *UnreachBB
= BasicBlock::Create(
2119 CleanupPadBB
->getContext(), "unreachable", CleanupPadBB
->getParent());
2120 IRBuilder
<> Builder(UnreachBB
);
2121 Builder
.CreateUnreachable();
2123 // Create a new cleanuppad which will be the dispatcher.
2124 auto *NewCleanupPadBB
=
2125 BasicBlock::Create(CleanupPadBB
->getContext(),
2126 CleanupPadBB
->getName() + Twine(".corodispatch"),
2127 CleanupPadBB
->getParent(), CleanupPadBB
);
2128 Builder
.SetInsertPoint(NewCleanupPadBB
);
2129 auto *SwitchType
= Builder
.getInt8Ty();
2130 auto *SetDispatchValuePN
=
2131 Builder
.CreatePHI(SwitchType
, pred_size(CleanupPadBB
));
2132 CleanupPad
->removeFromParent();
2133 CleanupPad
->insertAfter(SetDispatchValuePN
);
2134 auto *SwitchOnDispatch
= Builder
.CreateSwitch(SetDispatchValuePN
, UnreachBB
,
2135 pred_size(CleanupPadBB
));
2137 int SwitchIndex
= 0;
2138 SmallVector
<BasicBlock
*, 8> Preds(predecessors(CleanupPadBB
));
2139 for (BasicBlock
*Pred
: Preds
) {
2140 // Create a new cleanuppad and move the PHI values to there.
2141 auto *CaseBB
= BasicBlock::Create(CleanupPadBB
->getContext(),
2142 CleanupPadBB
->getName() +
2143 Twine(".from.") + Pred
->getName(),
2144 CleanupPadBB
->getParent(), CleanupPadBB
);
2145 updatePhiNodes(CleanupPadBB
, Pred
, CaseBB
);
2146 CaseBB
->setName(CleanupPadBB
->getName() + Twine(".from.") +
2148 Builder
.SetInsertPoint(CaseBB
);
2149 Builder
.CreateBr(CleanupPadBB
);
2150 movePHIValuesToInsertedBlock(CleanupPadBB
, CaseBB
, NewCleanupPadBB
);
2152 // Update this Pred to the new unwind point.
2153 setUnwindEdgeTo(Pred
->getTerminator(), NewCleanupPadBB
);
2155 // Setup the switch in the dispatcher.
2156 auto *SwitchConstant
= ConstantInt::get(SwitchType
, SwitchIndex
);
2157 SetDispatchValuePN
->addIncoming(SwitchConstant
, Pred
);
2158 SwitchOnDispatch
->addCase(SwitchConstant
, CaseBB
);
2163 static void cleanupSinglePredPHIs(Function
&F
) {
2164 SmallVector
<PHINode
*, 32> Worklist
;
2165 for (auto &BB
: F
) {
2166 for (auto &Phi
: BB
.phis()) {
2167 if (Phi
.getNumIncomingValues() == 1) {
2168 Worklist
.push_back(&Phi
);
2173 while (!Worklist
.empty()) {
2174 auto *Phi
= Worklist
.pop_back_val();
2175 auto *OriginalValue
= Phi
->getIncomingValue(0);
2176 Phi
->replaceAllUsesWith(OriginalValue
);
2180 static void rewritePHIs(BasicBlock
&BB
) {
2181 // For every incoming edge we will create a block holding all
2182 // incoming values in a single PHI nodes.
2185 // %n.val = phi i32[%n, %entry], [%inc, %loop]
2190 // %n.loop.pre = phi i32 [%n, %entry]
2193 // %inc.loop.pre = phi i32 [%inc, %loop]
2196 // After this rewrite, further analysis will ignore any phi nodes with more
2197 // than one incoming edge.
2199 // TODO: Simplify PHINodes in the basic block to remove duplicate
2202 // Special case for CleanupPad: all EH blocks must have the same unwind edge
2203 // so we need to create an additional "dispatcher" block.
2204 if (auto *CleanupPad
=
2205 dyn_cast_or_null
<CleanupPadInst
>(BB
.getFirstNonPHI())) {
2206 SmallVector
<BasicBlock
*, 8> Preds(predecessors(&BB
));
2207 for (BasicBlock
*Pred
: Preds
) {
2208 if (CatchSwitchInst
*CS
=
2209 dyn_cast
<CatchSwitchInst
>(Pred
->getTerminator())) {
2210 // CleanupPad with a CatchSwitch predecessor: therefore this is an
2211 // unwind destination that needs to be handle specially.
2212 assert(CS
->getUnwindDest() == &BB
);
2214 rewritePHIsForCleanupPad(&BB
, CleanupPad
);
2220 LandingPadInst
*LandingPad
= nullptr;
2221 PHINode
*ReplPHI
= nullptr;
2222 if ((LandingPad
= dyn_cast_or_null
<LandingPadInst
>(BB
.getFirstNonPHI()))) {
2223 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
2224 // We replace the original landing pad with a PHINode that will collect the
2225 // results from all of them.
2226 ReplPHI
= PHINode::Create(LandingPad
->getType(), 1, "");
2227 ReplPHI
->insertBefore(LandingPad
->getIterator());
2228 ReplPHI
->takeName(LandingPad
);
2229 LandingPad
->replaceAllUsesWith(ReplPHI
);
2230 // We will erase the original landing pad at the end of this function after
2231 // ehAwareSplitEdge cloned it in the transition blocks.
2234 SmallVector
<BasicBlock
*, 8> Preds(predecessors(&BB
));
2235 for (BasicBlock
*Pred
: Preds
) {
2236 auto *IncomingBB
= ehAwareSplitEdge(Pred
, &BB
, LandingPad
, ReplPHI
);
2237 IncomingBB
->setName(BB
.getName() + Twine(".from.") + Pred
->getName());
2239 // Stop the moving of values at ReplPHI, as this is either null or the PHI
2240 // that replaced the landing pad.
2241 movePHIValuesToInsertedBlock(&BB
, IncomingBB
, Pred
, ReplPHI
);
2245 // Calls to ehAwareSplitEdge function cloned the original lading pad.
2246 // No longer need it.
2247 LandingPad
->eraseFromParent();
2251 static void rewritePHIs(Function
&F
) {
2252 SmallVector
<BasicBlock
*, 8> WorkList
;
2254 for (BasicBlock
&BB
: F
)
2255 if (auto *PN
= dyn_cast
<PHINode
>(&BB
.front()))
2256 if (PN
->getNumIncomingValues() > 1)
2257 WorkList
.push_back(&BB
);
2259 for (BasicBlock
*BB
: WorkList
)
2263 /// Default materializable callback
2264 // Check for instructions that we can recreate on resume as opposed to spill
2265 // the result into a coroutine frame.
2266 bool coro::defaultMaterializable(Instruction
&V
) {
2267 return (isa
<CastInst
>(&V
) || isa
<GetElementPtrInst
>(&V
) ||
2268 isa
<BinaryOperator
>(&V
) || isa
<CmpInst
>(&V
) || isa
<SelectInst
>(&V
));
2271 // Check for structural coroutine intrinsics that should not be spilled into
2272 // the coroutine frame.
2273 static bool isCoroutineStructureIntrinsic(Instruction
&I
) {
2274 return isa
<CoroIdInst
>(&I
) || isa
<CoroSaveInst
>(&I
) ||
2275 isa
<CoroSuspendInst
>(&I
);
2278 // For each instruction identified as materializable across the suspend point,
2279 // and its associated DAG of other rematerializable instructions,
2280 // recreate the DAG of instructions after the suspend point.
2281 static void rewriteMaterializableInstructions(
2282 const SmallMapVector
<Instruction
*, std::unique_ptr
<RematGraph
>, 8>
2284 // This has to be done in 2 phases
2285 // Do the remats and record the required defs to be replaced in the
2286 // original use instructions
2287 // Once all the remats are complete, replace the uses in the final
2288 // instructions with the new defs
2295 SmallVector
<ProcessNode
> FinalInstructionsToProcess
;
2297 for (const auto &E
: AllRemats
) {
2298 Instruction
*Use
= E
.first
;
2299 Instruction
*CurrentMaterialization
= nullptr;
2300 RematGraph
*RG
= E
.second
.get();
2301 ReversePostOrderTraversal
<RematGraph
*> RPOT(RG
);
2302 SmallVector
<Instruction
*> InstructionsToProcess
;
2304 // If the target use is actually a suspend instruction then we have to
2305 // insert the remats into the end of the predecessor (there should only be
2306 // one). This is so that suspend blocks always have the suspend instruction
2307 // as the first instruction.
2308 auto InsertPoint
= &*Use
->getParent()->getFirstInsertionPt();
2309 if (isa
<AnyCoroSuspendInst
>(Use
)) {
2310 BasicBlock
*SuspendPredecessorBlock
=
2311 Use
->getParent()->getSinglePredecessor();
2312 assert(SuspendPredecessorBlock
&& "malformed coro suspend instruction");
2313 InsertPoint
= SuspendPredecessorBlock
->getTerminator();
2316 // Note: skip the first instruction as this is the actual use that we're
2317 // rematerializing everything for.
2318 auto I
= RPOT
.begin();
2320 for (; I
!= RPOT
.end(); ++I
) {
2321 Instruction
*D
= (*I
)->Node
;
2322 CurrentMaterialization
= D
->clone();
2323 CurrentMaterialization
->setName(D
->getName());
2324 CurrentMaterialization
->insertBefore(InsertPoint
);
2325 InsertPoint
= CurrentMaterialization
;
2327 // Replace all uses of Def in the instructions being added as part of this
2328 // rematerialization group
2329 for (auto &I
: InstructionsToProcess
)
2330 I
->replaceUsesOfWith(D
, CurrentMaterialization
);
2332 // Don't replace the final use at this point as this can cause problems
2333 // for other materializations. Instead, for any final use that uses a
2334 // define that's being rematerialized, record the replace values
2335 for (unsigned i
= 0, E
= Use
->getNumOperands(); i
!= E
; ++i
)
2336 if (Use
->getOperand(i
) == D
) // Is this operand pointing to oldval?
2337 FinalInstructionsToProcess
.push_back(
2338 {Use
, D
, CurrentMaterialization
});
2340 InstructionsToProcess
.push_back(CurrentMaterialization
);
2344 // Finally, replace the uses with the defines that we've just rematerialized
2345 for (auto &R
: FinalInstructionsToProcess
) {
2346 if (auto *PN
= dyn_cast
<PHINode
>(R
.Use
)) {
2347 assert(PN
->getNumIncomingValues() == 1 && "unexpected number of incoming "
2348 "values in the PHINode");
2349 PN
->replaceAllUsesWith(R
.Remat
);
2350 PN
->eraseFromParent();
2353 R
.Use
->replaceUsesOfWith(R
.Def
, R
.Remat
);
2357 // Splits the block at a particular instruction unless it is the first
2358 // instruction in the block with a single predecessor.
2359 static BasicBlock
*splitBlockIfNotFirst(Instruction
*I
, const Twine
&Name
) {
2360 auto *BB
= I
->getParent();
2361 if (&BB
->front() == I
) {
2362 if (BB
->getSinglePredecessor()) {
2367 return BB
->splitBasicBlock(I
, Name
);
2370 // Split above and below a particular instruction so that it
2371 // will be all alone by itself in a block.
2372 static void splitAround(Instruction
*I
, const Twine
&Name
) {
2373 splitBlockIfNotFirst(I
, Name
);
2374 splitBlockIfNotFirst(I
->getNextNode(), "After" + Name
);
2377 static bool isSuspendBlock(BasicBlock
*BB
) {
2378 return isa
<AnyCoroSuspendInst
>(BB
->front());
2381 typedef SmallPtrSet
<BasicBlock
*, 8> VisitedBlocksSet
;
2383 /// Does control flow starting at the given block ever reach a suspend
2384 /// instruction before reaching a block in VisitedOrFreeBBs?
2385 static bool isSuspendReachableFrom(BasicBlock
*From
,
2386 VisitedBlocksSet
&VisitedOrFreeBBs
) {
2387 // Eagerly try to add this block to the visited set. If it's already
2388 // there, stop recursing; this path doesn't reach a suspend before
2389 // either looping or reaching a freeing block.
2390 if (!VisitedOrFreeBBs
.insert(From
).second
)
2393 // We assume that we'll already have split suspends into their own blocks.
2394 if (isSuspendBlock(From
))
2397 // Recurse on the successors.
2398 for (auto *Succ
: successors(From
)) {
2399 if (isSuspendReachableFrom(Succ
, VisitedOrFreeBBs
))
2406 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
2408 static bool isLocalAlloca(CoroAllocaAllocInst
*AI
) {
2409 // Seed the visited set with all the basic blocks containing a free
2410 // so that we won't pass them up.
2411 VisitedBlocksSet VisitedOrFreeBBs
;
2412 for (auto *User
: AI
->users()) {
2413 if (auto FI
= dyn_cast
<CoroAllocaFreeInst
>(User
))
2414 VisitedOrFreeBBs
.insert(FI
->getParent());
2417 return !isSuspendReachableFrom(AI
->getParent(), VisitedOrFreeBBs
);
2420 /// After we split the coroutine, will the given basic block be along
2421 /// an obvious exit path for the resumption function?
2422 static bool willLeaveFunctionImmediatelyAfter(BasicBlock
*BB
,
2423 unsigned depth
= 3) {
2424 // If we've bottomed out our depth count, stop searching and assume
2425 // that the path might loop back.
2426 if (depth
== 0) return false;
2428 // If this is a suspend block, we're about to exit the resumption function.
2429 if (isSuspendBlock(BB
)) return true;
2431 // Recurse into the successors.
2432 for (auto *Succ
: successors(BB
)) {
2433 if (!willLeaveFunctionImmediatelyAfter(Succ
, depth
- 1))
2437 // If none of the successors leads back in a loop, we're on an exit/abort.
2441 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst
*AI
) {
2442 // Look for a free that isn't sufficiently obviously followed by
2443 // either a suspend or a termination, i.e. something that will leave
2444 // the coro resumption frame.
2445 for (auto *U
: AI
->users()) {
2446 auto FI
= dyn_cast
<CoroAllocaFreeInst
>(U
);
2449 if (!willLeaveFunctionImmediatelyAfter(FI
->getParent()))
2453 // If we never found one, we don't need a stack save.
2457 /// Turn each of the given local allocas into a normal (dynamic) alloca
2459 static void lowerLocalAllocas(ArrayRef
<CoroAllocaAllocInst
*> LocalAllocas
,
2460 SmallVectorImpl
<Instruction
*> &DeadInsts
) {
2461 for (auto *AI
: LocalAllocas
) {
2462 IRBuilder
<> Builder(AI
);
2464 // Save the stack depth. Try to avoid doing this if the stackrestore
2465 // is going to immediately precede a return or something.
2466 Value
*StackSave
= nullptr;
2467 if (localAllocaNeedsStackSave(AI
))
2468 StackSave
= Builder
.CreateStackSave();
2471 auto Alloca
= Builder
.CreateAlloca(Builder
.getInt8Ty(), AI
->getSize());
2472 Alloca
->setAlignment(AI
->getAlignment());
2474 for (auto *U
: AI
->users()) {
2475 // Replace gets with the allocation.
2476 if (isa
<CoroAllocaGetInst
>(U
)) {
2477 U
->replaceAllUsesWith(Alloca
);
2479 // Replace frees with stackrestores. This is safe because
2480 // alloca.alloc is required to obey a stack discipline, although we
2481 // don't enforce that structurally.
2483 auto FI
= cast
<CoroAllocaFreeInst
>(U
);
2485 Builder
.SetInsertPoint(FI
);
2486 Builder
.CreateStackRestore(StackSave
);
2489 DeadInsts
.push_back(cast
<Instruction
>(U
));
2492 DeadInsts
.push_back(AI
);
2496 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
2497 /// This happens during the all-instructions iteration, so it must not
2498 /// delete the call.
2499 static Instruction
*lowerNonLocalAlloca(CoroAllocaAllocInst
*AI
,
2501 SmallVectorImpl
<Instruction
*> &DeadInsts
) {
2502 IRBuilder
<> Builder(AI
);
2503 auto Alloc
= Shape
.emitAlloc(Builder
, AI
->getSize(), nullptr);
2505 for (User
*U
: AI
->users()) {
2506 if (isa
<CoroAllocaGetInst
>(U
)) {
2507 U
->replaceAllUsesWith(Alloc
);
2509 auto FI
= cast
<CoroAllocaFreeInst
>(U
);
2510 Builder
.SetInsertPoint(FI
);
2511 Shape
.emitDealloc(Builder
, Alloc
, nullptr);
2513 DeadInsts
.push_back(cast
<Instruction
>(U
));
2516 // Push this on last so that it gets deleted after all the others.
2517 DeadInsts
.push_back(AI
);
2519 // Return the new allocation value so that we can check for needed spills.
2520 return cast
<Instruction
>(Alloc
);
2523 /// Get the current swifterror value.
2524 static Value
*emitGetSwiftErrorValue(IRBuilder
<> &Builder
, Type
*ValueTy
,
2525 coro::Shape
&Shape
) {
2526 // Make a fake function pointer as a sort of intrinsic.
2527 auto FnTy
= FunctionType::get(ValueTy
, {}, false);
2528 auto Fn
= ConstantPointerNull::get(Builder
.getPtrTy());
2530 auto Call
= Builder
.CreateCall(FnTy
, Fn
, {});
2531 Shape
.SwiftErrorOps
.push_back(Call
);
2536 /// Set the given value as the current swifterror value.
2538 /// Returns a slot that can be used as a swifterror slot.
2539 static Value
*emitSetSwiftErrorValue(IRBuilder
<> &Builder
, Value
*V
,
2540 coro::Shape
&Shape
) {
2541 // Make a fake function pointer as a sort of intrinsic.
2542 auto FnTy
= FunctionType::get(Builder
.getPtrTy(),
2543 {V
->getType()}, false);
2544 auto Fn
= ConstantPointerNull::get(Builder
.getPtrTy());
2546 auto Call
= Builder
.CreateCall(FnTy
, Fn
, { V
});
2547 Shape
.SwiftErrorOps
.push_back(Call
);
2552 /// Set the swifterror value from the given alloca before a call,
2553 /// then put in back in the alloca afterwards.
2555 /// Returns an address that will stand in for the swifterror slot
2556 /// until splitting.
2557 static Value
*emitSetAndGetSwiftErrorValueAround(Instruction
*Call
,
2559 coro::Shape
&Shape
) {
2560 auto ValueTy
= Alloca
->getAllocatedType();
2561 IRBuilder
<> Builder(Call
);
2563 // Load the current value from the alloca and set it as the
2564 // swifterror value.
2565 auto ValueBeforeCall
= Builder
.CreateLoad(ValueTy
, Alloca
);
2566 auto Addr
= emitSetSwiftErrorValue(Builder
, ValueBeforeCall
, Shape
);
2568 // Move to after the call. Since swifterror only has a guaranteed
2569 // value on normal exits, we can ignore implicit and explicit unwind
2571 if (isa
<CallInst
>(Call
)) {
2572 Builder
.SetInsertPoint(Call
->getNextNode());
2574 auto Invoke
= cast
<InvokeInst
>(Call
);
2575 Builder
.SetInsertPoint(Invoke
->getNormalDest()->getFirstNonPHIOrDbg());
2578 // Get the current swifterror value and store it to the alloca.
2579 auto ValueAfterCall
= emitGetSwiftErrorValue(Builder
, ValueTy
, Shape
);
2580 Builder
.CreateStore(ValueAfterCall
, Alloca
);
2585 /// Eliminate a formerly-swifterror alloca by inserting the get/set
2586 /// intrinsics and attempting to MemToReg the alloca away.
2587 static void eliminateSwiftErrorAlloca(Function
&F
, AllocaInst
*Alloca
,
2588 coro::Shape
&Shape
) {
2589 for (Use
&Use
: llvm::make_early_inc_range(Alloca
->uses())) {
2590 // swifterror values can only be used in very specific ways.
2591 // We take advantage of that here.
2592 auto User
= Use
.getUser();
2593 if (isa
<LoadInst
>(User
) || isa
<StoreInst
>(User
))
2596 assert(isa
<CallInst
>(User
) || isa
<InvokeInst
>(User
));
2597 auto Call
= cast
<Instruction
>(User
);
2599 auto Addr
= emitSetAndGetSwiftErrorValueAround(Call
, Alloca
, Shape
);
2601 // Use the returned slot address as the call argument.
2605 // All the uses should be loads and stores now.
2606 assert(isAllocaPromotable(Alloca
));
2609 /// "Eliminate" a swifterror argument by reducing it to the alloca case
2610 /// and then loading and storing in the prologue and epilog.
2612 /// The argument keeps the swifterror flag.
2613 static void eliminateSwiftErrorArgument(Function
&F
, Argument
&Arg
,
2615 SmallVectorImpl
<AllocaInst
*> &AllocasToPromote
) {
2616 IRBuilder
<> Builder(F
.getEntryBlock().getFirstNonPHIOrDbg());
2618 auto ArgTy
= cast
<PointerType
>(Arg
.getType());
2619 auto ValueTy
= PointerType::getUnqual(F
.getContext());
2621 // Reduce to the alloca case:
2623 // Create an alloca and replace all uses of the arg with it.
2624 auto Alloca
= Builder
.CreateAlloca(ValueTy
, ArgTy
->getAddressSpace());
2625 Arg
.replaceAllUsesWith(Alloca
);
2627 // Set an initial value in the alloca. swifterror is always null on entry.
2628 auto InitialValue
= Constant::getNullValue(ValueTy
);
2629 Builder
.CreateStore(InitialValue
, Alloca
);
2631 // Find all the suspends in the function and save and restore around them.
2632 for (auto *Suspend
: Shape
.CoroSuspends
) {
2633 (void) emitSetAndGetSwiftErrorValueAround(Suspend
, Alloca
, Shape
);
2636 // Find all the coro.ends in the function and restore the error value.
2637 for (auto *End
: Shape
.CoroEnds
) {
2638 Builder
.SetInsertPoint(End
);
2639 auto FinalValue
= Builder
.CreateLoad(ValueTy
, Alloca
);
2640 (void) emitSetSwiftErrorValue(Builder
, FinalValue
, Shape
);
2643 // Now we can use the alloca logic.
2644 AllocasToPromote
.push_back(Alloca
);
2645 eliminateSwiftErrorAlloca(F
, Alloca
, Shape
);
2648 /// Eliminate all problematic uses of swifterror arguments and allocas
2649 /// from the function. We'll fix them up later when splitting the function.
2650 static void eliminateSwiftError(Function
&F
, coro::Shape
&Shape
) {
2651 SmallVector
<AllocaInst
*, 4> AllocasToPromote
;
2653 // Look for a swifterror argument.
2654 for (auto &Arg
: F
.args()) {
2655 if (!Arg
.hasSwiftErrorAttr()) continue;
2657 eliminateSwiftErrorArgument(F
, Arg
, Shape
, AllocasToPromote
);
2661 // Look for swifterror allocas.
2662 for (auto &Inst
: F
.getEntryBlock()) {
2663 auto Alloca
= dyn_cast
<AllocaInst
>(&Inst
);
2664 if (!Alloca
|| !Alloca
->isSwiftError()) continue;
2666 // Clear the swifterror flag.
2667 Alloca
->setSwiftError(false);
2669 AllocasToPromote
.push_back(Alloca
);
2670 eliminateSwiftErrorAlloca(F
, Alloca
, Shape
);
2673 // If we have any allocas to promote, compute a dominator tree and
2674 // promote them en masse.
2675 if (!AllocasToPromote
.empty()) {
2676 DominatorTree
DT(F
);
2677 PromoteMemToReg(AllocasToPromote
, DT
);
2681 /// retcon and retcon.once conventions assume that all spill uses can be sunk
2682 /// after the coro.begin intrinsic.
2683 static void sinkSpillUsesAfterCoroBegin(Function
&F
,
2684 const FrameDataInfo
&FrameData
,
2685 CoroBeginInst
*CoroBegin
) {
2686 DominatorTree
Dom(F
);
2688 SmallSetVector
<Instruction
*, 32> ToMove
;
2689 SmallVector
<Instruction
*, 32> Worklist
;
2691 // Collect all users that precede coro.begin.
2692 for (auto *Def
: FrameData
.getAllDefs()) {
2693 for (User
*U
: Def
->users()) {
2694 auto Inst
= cast
<Instruction
>(U
);
2695 if (Inst
->getParent() != CoroBegin
->getParent() ||
2696 Dom
.dominates(CoroBegin
, Inst
))
2698 if (ToMove
.insert(Inst
))
2699 Worklist
.push_back(Inst
);
2702 // Recursively collect users before coro.begin.
2703 while (!Worklist
.empty()) {
2704 auto *Def
= Worklist
.pop_back_val();
2705 for (User
*U
: Def
->users()) {
2706 auto Inst
= cast
<Instruction
>(U
);
2707 if (Dom
.dominates(CoroBegin
, Inst
))
2709 if (ToMove
.insert(Inst
))
2710 Worklist
.push_back(Inst
);
2714 // Sort by dominance.
2715 SmallVector
<Instruction
*, 64> InsertionList(ToMove
.begin(), ToMove
.end());
2716 llvm::sort(InsertionList
, [&Dom
](Instruction
*A
, Instruction
*B
) -> bool {
2717 // If a dominates b it should preceed (<) b.
2718 return Dom
.dominates(A
, B
);
2721 Instruction
*InsertPt
= CoroBegin
->getNextNode();
2722 for (Instruction
*Inst
: InsertionList
)
2723 Inst
->moveBefore(InsertPt
);
2726 /// For each local variable that all of its user are only used inside one of
2727 /// suspended region, we sink their lifetime.start markers to the place where
2728 /// after the suspend block. Doing so minimizes the lifetime of each variable,
2729 /// hence minimizing the amount of data we end up putting on the frame.
2730 static void sinkLifetimeStartMarkers(Function
&F
, coro::Shape
&Shape
,
2731 SuspendCrossingInfo
&Checker
) {
2735 DominatorTree
DT(F
);
2737 // Collect all possible basic blocks which may dominate all uses of allocas.
2738 SmallPtrSet
<BasicBlock
*, 4> DomSet
;
2739 DomSet
.insert(&F
.getEntryBlock());
2740 for (auto *CSI
: Shape
.CoroSuspends
) {
2741 BasicBlock
*SuspendBlock
= CSI
->getParent();
2742 assert(isSuspendBlock(SuspendBlock
) && SuspendBlock
->getSingleSuccessor() &&
2743 "should have split coro.suspend into its own block");
2744 DomSet
.insert(SuspendBlock
->getSingleSuccessor());
2747 for (Instruction
&I
: instructions(F
)) {
2748 AllocaInst
* AI
= dyn_cast
<AllocaInst
>(&I
);
2752 for (BasicBlock
*DomBB
: DomSet
) {
2754 SmallVector
<Instruction
*, 1> Lifetimes
;
2756 auto isLifetimeStart
= [](Instruction
* I
) {
2757 if (auto* II
= dyn_cast
<IntrinsicInst
>(I
))
2758 return II
->getIntrinsicID() == Intrinsic::lifetime_start
;
2762 auto collectLifetimeStart
= [&](Instruction
*U
, AllocaInst
*AI
) {
2763 if (isLifetimeStart(U
)) {
2764 Lifetimes
.push_back(U
);
2767 if (!U
->hasOneUse() || U
->stripPointerCasts() != AI
)
2769 if (isLifetimeStart(U
->user_back())) {
2770 Lifetimes
.push_back(U
->user_back());
2776 for (User
*U
: AI
->users()) {
2777 Instruction
*UI
= cast
<Instruction
>(U
);
2778 // For all users except lifetime.start markers, if they are all
2779 // dominated by one of the basic blocks and do not cross
2780 // suspend points as well, then there is no need to spill the
2782 if (!DT
.dominates(DomBB
, UI
->getParent()) ||
2783 Checker
.isDefinitionAcrossSuspend(DomBB
, UI
)) {
2784 // Skip lifetime.start, GEP and bitcast used by lifetime.start
2786 if (collectLifetimeStart(UI
, AI
))
2792 // Sink lifetime.start markers to dominate block when they are
2793 // only used outside the region.
2794 if (Valid
&& Lifetimes
.size() != 0) {
2795 auto *NewLifetime
= Lifetimes
[0]->clone();
2796 NewLifetime
->replaceUsesOfWith(NewLifetime
->getOperand(1), AI
);
2797 NewLifetime
->insertBefore(DomBB
->getTerminator());
2799 // All the outsided lifetime.start markers are no longer necessary.
2800 for (Instruction
*S
: Lifetimes
)
2801 S
->eraseFromParent();
2809 static void collectFrameAlloca(AllocaInst
*AI
, coro::Shape
&Shape
,
2810 const SuspendCrossingInfo
&Checker
,
2811 SmallVectorImpl
<AllocaInfo
> &Allocas
,
2812 const DominatorTree
&DT
) {
2813 if (Shape
.CoroSuspends
.empty())
2816 // The PromiseAlloca will be specially handled since it needs to be in a
2817 // fixed position in the frame.
2818 if (AI
== Shape
.SwitchLowering
.PromiseAlloca
)
2821 // The __coro_gro alloca should outlive the promise, make sure we
2822 // keep it outside the frame.
2823 if (AI
->hasMetadata(LLVMContext::MD_coro_outside_frame
))
2826 // The code that uses lifetime.start intrinsic does not work for functions
2827 // with loops without exit. Disable it on ABIs we know to generate such
2829 bool ShouldUseLifetimeStartInfo
=
2830 (Shape
.ABI
!= coro::ABI::Async
&& Shape
.ABI
!= coro::ABI::Retcon
&&
2831 Shape
.ABI
!= coro::ABI::RetconOnce
);
2832 AllocaUseVisitor Visitor
{AI
->getModule()->getDataLayout(), DT
,
2833 *Shape
.CoroBegin
, Checker
,
2834 ShouldUseLifetimeStartInfo
};
2835 Visitor
.visitPtr(*AI
);
2836 if (!Visitor
.getShouldLiveOnFrame())
2838 Allocas
.emplace_back(AI
, Visitor
.getAliasesCopy(),
2839 Visitor
.getMayWriteBeforeCoroBegin());
2842 static std::optional
<std::pair
<Value
&, DIExpression
&>>
2843 salvageDebugInfoImpl(SmallDenseMap
<Argument
*, AllocaInst
*, 4> &ArgToAllocaMap
,
2844 bool OptimizeFrame
, bool UseEntryValue
, Function
*F
,
2845 Value
*Storage
, DIExpression
*Expr
,
2846 bool SkipOutermostLoad
) {
2847 IRBuilder
<> Builder(F
->getContext());
2848 auto InsertPt
= F
->getEntryBlock().getFirstInsertionPt();
2849 while (isa
<IntrinsicInst
>(InsertPt
))
2851 Builder
.SetInsertPoint(&F
->getEntryBlock(), InsertPt
);
2853 while (auto *Inst
= dyn_cast_or_null
<Instruction
>(Storage
)) {
2854 if (auto *LdInst
= dyn_cast
<LoadInst
>(Inst
)) {
2855 Storage
= LdInst
->getPointerOperand();
2856 // FIXME: This is a heuristic that works around the fact that
2857 // LLVM IR debug intrinsics cannot yet distinguish between
2858 // memory and value locations: Because a dbg.declare(alloca) is
2859 // implicitly a memory location no DW_OP_deref operation for the
2860 // last direct load from an alloca is necessary. This condition
2861 // effectively drops the *last* DW_OP_deref in the expression.
2862 if (!SkipOutermostLoad
)
2863 Expr
= DIExpression::prepend(Expr
, DIExpression::DerefBefore
);
2864 } else if (auto *StInst
= dyn_cast
<StoreInst
>(Inst
)) {
2865 Storage
= StInst
->getValueOperand();
2867 SmallVector
<uint64_t, 16> Ops
;
2868 SmallVector
<Value
*, 0> AdditionalValues
;
2869 Value
*Op
= llvm::salvageDebugInfoImpl(
2870 *Inst
, Expr
? Expr
->getNumLocationOperands() : 0, Ops
,
2872 if (!Op
|| !AdditionalValues
.empty()) {
2873 // If salvaging failed or salvaging produced more than one location
2874 // operand, give up.
2878 Expr
= DIExpression::appendOpsToArg(Expr
, Ops
, 0, /*StackValue*/ false);
2880 SkipOutermostLoad
= false;
2883 return std::nullopt
;
2885 auto *StorageAsArg
= dyn_cast
<Argument
>(Storage
);
2886 const bool IsSwiftAsyncArg
=
2887 StorageAsArg
&& StorageAsArg
->hasAttribute(Attribute::SwiftAsync
);
2889 // Swift async arguments are described by an entry value of the ABI-defined
2890 // register containing the coroutine context.
2891 // Entry values in variadic expressions are not supported.
2892 if (IsSwiftAsyncArg
&& UseEntryValue
&& !Expr
->isEntryValue() &&
2893 Expr
->isSingleLocationExpression())
2894 Expr
= DIExpression::prepend(Expr
, DIExpression::EntryValue
);
2896 // If the coroutine frame is an Argument, store it in an alloca to improve
2897 // its availability (e.g. registers may be clobbered).
2898 // Avoid this if optimizations are enabled (they would remove the alloca) or
2899 // if the value is guaranteed to be available through other means (e.g. swift
2901 if (StorageAsArg
&& !OptimizeFrame
&& !IsSwiftAsyncArg
) {
2902 auto &Cached
= ArgToAllocaMap
[StorageAsArg
];
2904 Cached
= Builder
.CreateAlloca(Storage
->getType(), 0, nullptr,
2905 Storage
->getName() + ".debug");
2906 Builder
.CreateStore(Storage
, Cached
);
2909 // FIXME: LLVM lacks nuanced semantics to differentiate between
2910 // memory and direct locations at the IR level. The backend will
2911 // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2912 // location. Thus, if there are deref and offset operations in the
2913 // expression, we need to add a DW_OP_deref at the *start* of the
2914 // expression to first load the contents of the alloca before
2915 // adjusting it with the expression.
2916 Expr
= DIExpression::prepend(Expr
, DIExpression::DerefBefore
);
2919 return {{*Storage
, *Expr
}};
2922 void coro::salvageDebugInfo(
2923 SmallDenseMap
<Argument
*, AllocaInst
*, 4> &ArgToAllocaMap
,
2924 DbgVariableIntrinsic
&DVI
, bool OptimizeFrame
, bool UseEntryValue
) {
2926 Function
*F
= DVI
.getFunction();
2927 // Follow the pointer arithmetic all the way to the incoming
2928 // function argument and convert into a DIExpression.
2929 bool SkipOutermostLoad
= !isa
<DbgValueInst
>(DVI
);
2930 Value
*OriginalStorage
= DVI
.getVariableLocationOp(0);
2932 auto SalvagedInfo
= ::salvageDebugInfoImpl(
2933 ArgToAllocaMap
, OptimizeFrame
, UseEntryValue
, F
, OriginalStorage
,
2934 DVI
.getExpression(), SkipOutermostLoad
);
2938 Value
*Storage
= &SalvagedInfo
->first
;
2939 DIExpression
*Expr
= &SalvagedInfo
->second
;
2941 DVI
.replaceVariableLocationOp(OriginalStorage
, Storage
);
2942 DVI
.setExpression(Expr
);
2943 // We only hoist dbg.declare today since it doesn't make sense to hoist
2944 // dbg.value since it does not have the same function wide guarantees that
2945 // dbg.declare does.
2946 if (isa
<DbgDeclareInst
>(DVI
)) {
2947 std::optional
<BasicBlock::iterator
> InsertPt
;
2948 if (auto *I
= dyn_cast
<Instruction
>(Storage
)) {
2949 InsertPt
= I
->getInsertionPointAfterDef();
2950 // Update DILocation only in O0 since it is easy to get out of sync in
2951 // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
2953 if (!OptimizeFrame
&& I
->getDebugLoc())
2954 DVI
.setDebugLoc(I
->getDebugLoc());
2955 } else if (isa
<Argument
>(Storage
))
2956 InsertPt
= F
->getEntryBlock().begin();
2958 DVI
.moveBefore(*(*InsertPt
)->getParent(), *InsertPt
);
2962 void coro::salvageDebugInfo(
2963 SmallDenseMap
<Argument
*, AllocaInst
*, 4> &ArgToAllocaMap
, DPValue
&DPV
,
2964 bool OptimizeFrame
, bool UseEntryValue
) {
2966 Function
*F
= DPV
.getFunction();
2967 // Follow the pointer arithmetic all the way to the incoming
2968 // function argument and convert into a DIExpression.
2969 bool SkipOutermostLoad
= DPV
.isDbgDeclare();
2970 Value
*OriginalStorage
= DPV
.getVariableLocationOp(0);
2972 auto SalvagedInfo
= ::salvageDebugInfoImpl(
2973 ArgToAllocaMap
, OptimizeFrame
, UseEntryValue
, F
, OriginalStorage
,
2974 DPV
.getExpression(), SkipOutermostLoad
);
2978 Value
*Storage
= &SalvagedInfo
->first
;
2979 DIExpression
*Expr
= &SalvagedInfo
->second
;
2981 DPV
.replaceVariableLocationOp(OriginalStorage
, Storage
);
2982 DPV
.setExpression(Expr
);
2983 // We only hoist dbg.declare today since it doesn't make sense to hoist
2984 // dbg.value since it does not have the same function wide guarantees that
2985 // dbg.declare does.
2986 if (DPV
.getType() == DPValue::LocationType::Declare
) {
2987 std::optional
<BasicBlock::iterator
> InsertPt
;
2988 if (auto *I
= dyn_cast
<Instruction
>(Storage
)) {
2989 InsertPt
= I
->getInsertionPointAfterDef();
2990 // Update DILocation only in O0 since it is easy to get out of sync in
2991 // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
2993 if (!OptimizeFrame
&& I
->getDebugLoc())
2994 DPV
.setDebugLoc(I
->getDebugLoc());
2995 } else if (isa
<Argument
>(Storage
))
2996 InsertPt
= F
->getEntryBlock().begin();
2998 DPV
.removeFromParent();
2999 (*InsertPt
)->getParent()->insertDPValueBefore(&DPV
, *InsertPt
);
3004 static void doRematerializations(
3005 Function
&F
, SuspendCrossingInfo
&Checker
,
3006 const std::function
<bool(Instruction
&)> &MaterializableCallback
) {
3012 // See if there are materializable instructions across suspend points
3013 // We record these as the starting point to also identify materializable
3014 // defs of uses in these operations
3015 for (Instruction
&I
: instructions(F
)) {
3016 if (!MaterializableCallback(I
))
3018 for (User
*U
: I
.users())
3019 if (Checker
.isDefinitionAcrossSuspend(I
, U
))
3020 Spills
[&I
].push_back(cast
<Instruction
>(U
));
3023 // Process each of the identified rematerializable instructions
3024 // and add predecessor instructions that can also be rematerialized.
3025 // This is actually a graph of instructions since we could potentially
3026 // have multiple uses of a def in the set of predecessor instructions.
3027 // The approach here is to maintain a graph of instructions for each bottom
3028 // level instruction - where we have a unique set of instructions (nodes)
3029 // and edges between them. We then walk the graph in reverse post-dominator
3030 // order to insert them past the suspend point, but ensure that ordering is
3031 // correct. We also rely on CSE removing duplicate defs for remats of
3032 // different instructions with a def in common (rather than maintaining more
3033 // complex graphs for each suspend point)
3035 // We can do this by adding new nodes to the list for each suspend
3036 // point. Then using standard GraphTraits to give a reverse post-order
3037 // traversal when we insert the nodes after the suspend
3038 SmallMapVector
<Instruction
*, std::unique_ptr
<RematGraph
>, 8> AllRemats
;
3039 for (auto &E
: Spills
) {
3040 for (Instruction
*U
: E
.second
) {
3041 // Don't process a user twice (this can happen if the instruction uses
3042 // more than one rematerializable def)
3043 if (AllRemats
.count(U
))
3046 // Constructor creates the whole RematGraph for the given Use
3048 std::make_unique
<RematGraph
>(MaterializableCallback
, U
, Checker
);
3050 LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
3051 ReversePostOrderTraversal
<RematGraph
*> RPOT(RematUPtr
.get());
3052 for (auto I
= RPOT
.begin(); I
!= RPOT
.end();
3053 ++I
) { (*I
)->Node
->dump(); } dbgs()
3056 AllRemats
[U
] = std::move(RematUPtr
);
3060 // Rewrite materializable instructions to be materialized at the use
3062 LLVM_DEBUG(dumpRemats("Materializations", AllRemats
));
3063 rewriteMaterializableInstructions(AllRemats
);
3066 void coro::buildCoroutineFrame(
3067 Function
&F
, Shape
&Shape
,
3068 const std::function
<bool(Instruction
&)> &MaterializableCallback
) {
3069 // Don't eliminate swifterror in async functions that won't be split.
3070 if (Shape
.ABI
!= coro::ABI::Async
|| !Shape
.CoroSuspends
.empty())
3071 eliminateSwiftError(F
, Shape
);
3073 if (Shape
.ABI
== coro::ABI::Switch
&&
3074 Shape
.SwitchLowering
.PromiseAlloca
) {
3075 Shape
.getSwitchCoroId()->clearPromise();
3078 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
3079 // intrinsics are in their own blocks to simplify the logic of building up
3080 // SuspendCrossing data.
3081 for (auto *CSI
: Shape
.CoroSuspends
) {
3082 if (auto *Save
= CSI
->getCoroSave())
3083 splitAround(Save
, "CoroSave");
3084 splitAround(CSI
, "CoroSuspend");
3087 // Put CoroEnds into their own blocks.
3088 for (AnyCoroEndInst
*CE
: Shape
.CoroEnds
) {
3089 splitAround(CE
, "CoroEnd");
3091 // Emit the musttail call function in a new block before the CoroEnd.
3092 // We do this here so that the right suspend crossing info is computed for
3093 // the uses of the musttail call function call. (Arguments to the coro.end
3094 // instructions would be ignored)
3095 if (auto *AsyncEnd
= dyn_cast
<CoroAsyncEndInst
>(CE
)) {
3096 auto *MustTailCallFn
= AsyncEnd
->getMustTailCallFunction();
3097 if (!MustTailCallFn
)
3099 IRBuilder
<> Builder(AsyncEnd
);
3100 SmallVector
<Value
*, 8> Args(AsyncEnd
->args());
3101 auto Arguments
= ArrayRef
<Value
*>(Args
).drop_front(3);
3102 auto *Call
= createMustTailCall(AsyncEnd
->getDebugLoc(), MustTailCallFn
,
3103 Arguments
, Builder
);
3104 splitAround(Call
, "MustTailCall.Before.CoroEnd");
3108 // Later code makes structural assumptions about single predecessors phis e.g
3109 // that they are not live across a suspend point.
3110 cleanupSinglePredPHIs(F
);
3112 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
3113 // never has its definition separated from the PHI by the suspend point.
3116 // Build suspend crossing info.
3117 SuspendCrossingInfo
Checker(F
, Shape
);
3119 doRematerializations(F
, Checker
, MaterializableCallback
);
3121 FrameDataInfo FrameData
;
3122 SmallVector
<CoroAllocaAllocInst
*, 4> LocalAllocas
;
3123 SmallVector
<Instruction
*, 4> DeadInstructions
;
3124 if (Shape
.ABI
!= coro::ABI::Async
&& Shape
.ABI
!= coro::ABI::Retcon
&&
3125 Shape
.ABI
!= coro::ABI::RetconOnce
)
3126 sinkLifetimeStartMarkers(F
, Shape
, Checker
);
3128 // Collect the spills for arguments and other not-materializable values.
3129 for (Argument
&A
: F
.args())
3130 for (User
*U
: A
.users())
3131 if (Checker
.isDefinitionAcrossSuspend(A
, U
))
3132 FrameData
.Spills
[&A
].push_back(cast
<Instruction
>(U
));
3134 const DominatorTree
DT(F
);
3135 for (Instruction
&I
: instructions(F
)) {
3136 // Values returned from coroutine structure intrinsics should not be part
3137 // of the Coroutine Frame.
3138 if (isCoroutineStructureIntrinsic(I
) || &I
== Shape
.CoroBegin
)
3141 // Handle alloca.alloc specially here.
3142 if (auto AI
= dyn_cast
<CoroAllocaAllocInst
>(&I
)) {
3143 // Check whether the alloca's lifetime is bounded by suspend points.
3144 if (isLocalAlloca(AI
)) {
3145 LocalAllocas
.push_back(AI
);
3149 // If not, do a quick rewrite of the alloca and then add spills of
3150 // the rewritten value. The rewrite doesn't invalidate anything in
3151 // Spills because the other alloca intrinsics have no other operands
3152 // besides AI, and it doesn't invalidate the iteration because we delay
3154 auto Alloc
= lowerNonLocalAlloca(AI
, Shape
, DeadInstructions
);
3156 for (User
*U
: Alloc
->users()) {
3157 if (Checker
.isDefinitionAcrossSuspend(*Alloc
, U
))
3158 FrameData
.Spills
[Alloc
].push_back(cast
<Instruction
>(U
));
3163 // Ignore alloca.get; we process this as part of coro.alloca.alloc.
3164 if (isa
<CoroAllocaGetInst
>(I
))
3167 if (auto *AI
= dyn_cast
<AllocaInst
>(&I
)) {
3168 collectFrameAlloca(AI
, Shape
, Checker
, FrameData
.Allocas
, DT
);
3172 for (User
*U
: I
.users())
3173 if (Checker
.isDefinitionAcrossSuspend(I
, U
)) {
3174 // We cannot spill a token.
3175 if (I
.getType()->isTokenTy())
3177 "token definition is separated from the use by a suspend point");
3178 FrameData
.Spills
[&I
].push_back(cast
<Instruction
>(U
));
3182 LLVM_DEBUG(dumpAllocas(FrameData
.Allocas
));
3184 // We don't want the layout of coroutine frame to be affected
3185 // by debug information. So we only choose to salvage DbgValueInst for
3186 // whose value is already in the frame.
3187 // We would handle the dbg.values for allocas specially
3188 for (auto &Iter
: FrameData
.Spills
) {
3189 auto *V
= Iter
.first
;
3190 SmallVector
<DbgValueInst
*, 16> DVIs
;
3191 SmallVector
<DPValue
*, 16> DPVs
;
3192 findDbgValues(DVIs
, V
, &DPVs
);
3193 for (DbgValueInst
*DVI
: DVIs
)
3194 if (Checker
.isDefinitionAcrossSuspend(*V
, DVI
))
3195 FrameData
.Spills
[V
].push_back(DVI
);
3196 // Add the instructions which carry debug info that is in the frame.
3197 for (DPValue
*DPV
: DPVs
)
3198 if (Checker
.isDefinitionAcrossSuspend(*V
, DPV
->Marker
->MarkedInstr
))
3199 FrameData
.Spills
[V
].push_back(DPV
->Marker
->MarkedInstr
);
3202 LLVM_DEBUG(dumpSpills("Spills", FrameData
.Spills
));
3203 if (Shape
.ABI
== coro::ABI::Retcon
|| Shape
.ABI
== coro::ABI::RetconOnce
||
3204 Shape
.ABI
== coro::ABI::Async
)
3205 sinkSpillUsesAfterCoroBegin(F
, FrameData
, Shape
.CoroBegin
);
3206 Shape
.FrameTy
= buildFrameType(F
, Shape
, FrameData
);
3207 Shape
.FramePtr
= Shape
.CoroBegin
;
3208 // For now, this works for C++ programs only.
3209 buildFrameDebugInfo(F
, Shape
, FrameData
);
3210 insertSpills(FrameData
, Shape
);
3211 lowerLocalAllocas(LocalAllocas
, DeadInstructions
);
3213 for (auto *I
: DeadInstructions
)
3214 I
->eraseFromParent();