1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // its definition precedes and its uses follow a suspend block. This is
10 // referred to as a suspend crossing value.
12 // Using the information discovered we form a Coroutine Frame structure to
13 // contain those values. All uses of those values are replaced with appropriate
14 // GEP + load from the coroutine frame. At the point of the definition we spill
15 // the value into the coroutine frame.
16 //===----------------------------------------------------------------------===//
18 #include "CoroInternal.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Analysis/StackLifetime.h"
22 #include "llvm/IR/DIBuilder.h"
23 #include "llvm/IR/DebugInfo.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstIterator.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/OptimizedStructLayout.h"
31 #include "llvm/Transforms/Coroutines/ABI.h"
32 #include "llvm/Transforms/Coroutines/CoroInstr.h"
33 #include "llvm/Transforms/Coroutines/MaterializationUtils.h"
34 #include "llvm/Transforms/Coroutines/SpillUtils.h"
35 #include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
44 extern cl::opt
<bool> UseNewDbgInfoFormat
;
46 #define DEBUG_TYPE "coro-frame"
49 class FrameTypeBuilder
;
50 // Mapping from the to-be-spilled value to all the users that need reload.
51 struct FrameDataInfo
{
52 // All the values (that are not allocas) that needs to be spilled to the
54 coro::SpillInfo
&Spills
;
55 // Allocas contains all values defined as allocas that need to live in the
57 SmallVectorImpl
<coro::AllocaInfo
> &Allocas
;
59 FrameDataInfo(coro::SpillInfo
&Spills
,
60 SmallVectorImpl
<coro::AllocaInfo
> &Allocas
)
61 : Spills(Spills
), Allocas(Allocas
) {}
63 SmallVector
<Value
*, 8> getAllDefs() const {
64 SmallVector
<Value
*, 8> Defs
;
65 for (const auto &P
: Spills
)
66 Defs
.push_back(P
.first
);
67 for (const auto &A
: Allocas
)
68 Defs
.push_back(A
.Alloca
);
72 uint32_t getFieldIndex(Value
*V
) const {
73 auto Itr
= FieldIndexMap
.find(V
);
74 assert(Itr
!= FieldIndexMap
.end() &&
75 "Value does not have a frame field index");
79 void setFieldIndex(Value
*V
, uint32_t Index
) {
80 assert((LayoutIndexUpdateStarted
|| FieldIndexMap
.count(V
) == 0) &&
81 "Cannot set the index for the same field twice.");
82 FieldIndexMap
[V
] = Index
;
85 Align
getAlign(Value
*V
) const {
86 auto Iter
= FieldAlignMap
.find(V
);
87 assert(Iter
!= FieldAlignMap
.end());
91 void setAlign(Value
*V
, Align AL
) {
92 assert(FieldAlignMap
.count(V
) == 0);
93 FieldAlignMap
.insert({V
, AL
});
96 uint64_t getDynamicAlign(Value
*V
) const {
97 auto Iter
= FieldDynamicAlignMap
.find(V
);
98 assert(Iter
!= FieldDynamicAlignMap
.end());
102 void setDynamicAlign(Value
*V
, uint64_t Align
) {
103 assert(FieldDynamicAlignMap
.count(V
) == 0);
104 FieldDynamicAlignMap
.insert({V
, Align
});
107 uint64_t getOffset(Value
*V
) const {
108 auto Iter
= FieldOffsetMap
.find(V
);
109 assert(Iter
!= FieldOffsetMap
.end());
113 void setOffset(Value
*V
, uint64_t Offset
) {
114 assert(FieldOffsetMap
.count(V
) == 0);
115 FieldOffsetMap
.insert({V
, Offset
});
118 // Remap the index of every field in the frame, using the final layout index.
119 void updateLayoutIndex(FrameTypeBuilder
&B
);
122 // LayoutIndexUpdateStarted is used to avoid updating the index of any field
124 bool LayoutIndexUpdateStarted
= false;
125 // Map from values to their slot indexes on the frame. They will be first set
126 // with their original insertion field index. After the frame is built, their
127 // indexes will be updated into the final layout index.
128 DenseMap
<Value
*, uint32_t> FieldIndexMap
;
129 // Map from values to their alignment on the frame. They would be set after
130 // the frame is built.
131 DenseMap
<Value
*, Align
> FieldAlignMap
;
132 DenseMap
<Value
*, uint64_t> FieldDynamicAlignMap
;
133 // Map from values to their offset on the frame. They would be set after
134 // the frame is built.
135 DenseMap
<Value
*, uint64_t> FieldOffsetMap
;
140 static void dumpSpills(StringRef Title
, const coro::SpillInfo
&Spills
) {
141 dbgs() << "------------- " << Title
<< " --------------\n";
142 for (const auto &E
: Spills
) {
145 for (auto *I
: E
.second
)
150 static void dumpAllocas(const SmallVectorImpl
<coro::AllocaInfo
> &Allocas
) {
151 dbgs() << "------------- Allocas --------------\n";
152 for (const auto &A
: Allocas
) {
159 using FieldIDType
= size_t;
160 // We cannot rely solely on natural alignment of a type when building a
161 // coroutine frame and if the alignment specified on the Alloca instruction
162 // differs from the natural alignment of the alloca type we will need to insert
164 class FrameTypeBuilder
{
170 FieldIDType LayoutFieldIndex
;
173 uint64_t DynamicAlignBuffer
;
176 const DataLayout
&DL
;
177 LLVMContext
&Context
;
178 uint64_t StructSize
= 0;
180 bool IsFinished
= false;
182 std::optional
<Align
> MaxFrameAlignment
;
184 SmallVector
<Field
, 8> Fields
;
185 DenseMap
<Value
*, unsigned> FieldIndexByKey
;
188 FrameTypeBuilder(LLVMContext
&Context
, const DataLayout
&DL
,
189 std::optional
<Align
> MaxFrameAlignment
)
190 : DL(DL
), Context(Context
), MaxFrameAlignment(MaxFrameAlignment
) {}
192 /// Add a field to this structure for the storage of an `alloca`
194 [[nodiscard
]] FieldIDType
addFieldForAlloca(AllocaInst
*AI
,
195 bool IsHeader
= false) {
196 Type
*Ty
= AI
->getAllocatedType();
198 // Make an array type if this is a static array allocation.
199 if (AI
->isArrayAllocation()) {
200 if (auto *CI
= dyn_cast
<ConstantInt
>(AI
->getArraySize()))
201 Ty
= ArrayType::get(Ty
, CI
->getValue().getZExtValue());
203 report_fatal_error("Coroutines cannot handle non static allocas yet");
206 return addField(Ty
, AI
->getAlign(), IsHeader
);
209 /// We want to put the allocas whose lifetime-ranges are not overlapped
210 /// into one slot of coroutine frame.
211 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
213 /// cppcoro::task<void> alternative_paths(bool cond) {
217 /// co_await something();
221 /// co_await something();
225 /// We want to put variable a and variable b in the same slot to
226 /// reduce the size of coroutine frame.
228 /// This function use StackLifetime algorithm to partition the AllocaInsts in
229 /// Spills to non-overlapped sets in order to put Alloca in the same
230 /// non-overlapped set into the same slot in the Coroutine Frame. Then add
231 /// field for the allocas in the same non-overlapped set by using the largest
232 /// type as the field type.
234 /// Side Effects: Because We sort the allocas, the order of allocas in the
235 /// frame may be different with the order in the source code.
236 void addFieldForAllocas(const Function
&F
, FrameDataInfo
&FrameData
,
237 coro::Shape
&Shape
, bool OptimizeFrame
);
239 /// Add a field to this structure.
240 [[nodiscard
]] FieldIDType
addField(Type
*Ty
, MaybeAlign MaybeFieldAlignment
,
241 bool IsHeader
= false,
242 bool IsSpillOfValue
= false) {
243 assert(!IsFinished
&& "adding fields to a finished builder");
244 assert(Ty
&& "must provide a type for a field");
246 // The field size is always the alloc size of the type.
247 uint64_t FieldSize
= DL
.getTypeAllocSize(Ty
);
249 // For an alloca with size=0, we don't need to add a field and they
250 // can just point to any index in the frame. Use index 0.
251 if (FieldSize
== 0) {
255 // The field alignment might not be the type alignment, but we need
256 // to remember the type alignment anyway to build the type.
257 // If we are spilling values we don't need to worry about ABI alignment
259 Align ABIAlign
= DL
.getABITypeAlign(Ty
);
260 Align TyAlignment
= ABIAlign
;
261 if (IsSpillOfValue
&& MaxFrameAlignment
&& *MaxFrameAlignment
< ABIAlign
)
262 TyAlignment
= *MaxFrameAlignment
;
263 Align FieldAlignment
= MaybeFieldAlignment
.value_or(TyAlignment
);
265 // The field alignment could be bigger than the max frame case, in that case
266 // we request additional storage to be able to dynamically align the
268 uint64_t DynamicAlignBuffer
= 0;
269 if (MaxFrameAlignment
&& (FieldAlignment
> *MaxFrameAlignment
)) {
271 offsetToAlignment(MaxFrameAlignment
->value(), FieldAlignment
);
272 FieldAlignment
= *MaxFrameAlignment
;
273 FieldSize
= FieldSize
+ DynamicAlignBuffer
;
276 // Lay out header fields immediately.
279 Offset
= alignTo(StructSize
, FieldAlignment
);
280 StructSize
= Offset
+ FieldSize
;
282 // Everything else has a flexible offset.
284 Offset
= OptimizedStructLayoutField::FlexibleOffset
;
287 Fields
.push_back({FieldSize
, Offset
, Ty
, 0, FieldAlignment
, TyAlignment
,
288 DynamicAlignBuffer
});
289 return Fields
.size() - 1;
292 /// Finish the layout and create the struct type with the given name.
293 StructType
*finish(StringRef Name
);
295 uint64_t getStructSize() const {
296 assert(IsFinished
&& "not yet finished!");
300 Align
getStructAlign() const {
301 assert(IsFinished
&& "not yet finished!");
305 FieldIDType
getLayoutFieldIndex(FieldIDType Id
) const {
306 assert(IsFinished
&& "not yet finished!");
307 return Fields
[Id
].LayoutFieldIndex
;
310 Field
getLayoutField(FieldIDType Id
) const {
311 assert(IsFinished
&& "not yet finished!");
317 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder
&B
) {
318 auto Updater
= [&](Value
*I
) {
319 auto Field
= B
.getLayoutField(getFieldIndex(I
));
320 setFieldIndex(I
, Field
.LayoutFieldIndex
);
321 setAlign(I
, Field
.Alignment
);
322 uint64_t dynamicAlign
=
323 Field
.DynamicAlignBuffer
324 ? Field
.DynamicAlignBuffer
+ Field
.Alignment
.value()
326 setDynamicAlign(I
, dynamicAlign
);
327 setOffset(I
, Field
.Offset
);
329 LayoutIndexUpdateStarted
= true;
330 for (auto &S
: Spills
)
332 for (const auto &A
: Allocas
)
334 LayoutIndexUpdateStarted
= false;
337 void FrameTypeBuilder::addFieldForAllocas(const Function
&F
,
338 FrameDataInfo
&FrameData
,
340 bool OptimizeFrame
) {
341 using AllocaSetType
= SmallVector
<AllocaInst
*, 4>;
342 SmallVector
<AllocaSetType
, 4> NonOverlapedAllocas
;
344 // We need to add field for allocas at the end of this function.
345 auto AddFieldForAllocasAtExit
= make_scope_exit([&]() {
346 for (auto AllocaList
: NonOverlapedAllocas
) {
347 auto *LargestAI
= *AllocaList
.begin();
348 FieldIDType Id
= addFieldForAlloca(LargestAI
);
349 for (auto *Alloca
: AllocaList
)
350 FrameData
.setFieldIndex(Alloca
, Id
);
354 if (!OptimizeFrame
) {
355 for (const auto &A
: FrameData
.Allocas
) {
356 AllocaInst
*Alloca
= A
.Alloca
;
357 NonOverlapedAllocas
.emplace_back(AllocaSetType(1, Alloca
));
362 // Because there are paths from the lifetime.start to coro.end
363 // for each alloca, the liferanges for every alloca is overlaped
364 // in the blocks who contain coro.end and the successor blocks.
365 // So we choose to skip there blocks when we calculate the liferange
366 // for each alloca. It should be reasonable since there shouldn't be uses
367 // in these blocks and the coroutine frame shouldn't be used outside the
370 // Note that the user of coro.suspend may not be SwitchInst. However, this
371 // case seems too complex to handle. And it is harmless to skip these
372 // patterns since it just prevend putting the allocas to live in the same
374 DenseMap
<SwitchInst
*, BasicBlock
*> DefaultSuspendDest
;
375 for (auto *CoroSuspendInst
: Shape
.CoroSuspends
) {
376 for (auto *U
: CoroSuspendInst
->users()) {
377 if (auto *ConstSWI
= dyn_cast
<SwitchInst
>(U
)) {
378 auto *SWI
= const_cast<SwitchInst
*>(ConstSWI
);
379 DefaultSuspendDest
[SWI
] = SWI
->getDefaultDest();
380 SWI
->setDefaultDest(SWI
->getSuccessor(1));
385 auto ExtractAllocas
= [&]() {
386 AllocaSetType Allocas
;
387 Allocas
.reserve(FrameData
.Allocas
.size());
388 for (const auto &A
: FrameData
.Allocas
)
389 Allocas
.push_back(A
.Alloca
);
392 StackLifetime
StackLifetimeAnalyzer(F
, ExtractAllocas(),
393 StackLifetime::LivenessType::May
);
394 StackLifetimeAnalyzer
.run();
395 auto DoAllocasInterfere
= [&](const AllocaInst
*AI1
, const AllocaInst
*AI2
) {
396 return StackLifetimeAnalyzer
.getLiveRange(AI1
).overlaps(
397 StackLifetimeAnalyzer
.getLiveRange(AI2
));
399 auto GetAllocaSize
= [&](const coro::AllocaInfo
&A
) {
400 std::optional
<TypeSize
> RetSize
= A
.Alloca
->getAllocationSize(DL
);
401 assert(RetSize
&& "Variable Length Arrays (VLA) are not supported.\n");
402 assert(!RetSize
->isScalable() && "Scalable vectors are not yet supported");
403 return RetSize
->getFixedValue();
405 // Put larger allocas in the front. So the larger allocas have higher
406 // priority to merge, which can save more space potentially. Also each
407 // AllocaSet would be ordered. So we can get the largest Alloca in one
409 sort(FrameData
.Allocas
, [&](const auto &Iter1
, const auto &Iter2
) {
410 return GetAllocaSize(Iter1
) > GetAllocaSize(Iter2
);
412 for (const auto &A
: FrameData
.Allocas
) {
413 AllocaInst
*Alloca
= A
.Alloca
;
415 // Try to find if the Alloca does not interfere with any existing
416 // NonOverlappedAllocaSet. If it is true, insert the alloca to that
417 // NonOverlappedAllocaSet.
418 for (auto &AllocaSet
: NonOverlapedAllocas
) {
419 assert(!AllocaSet
.empty() && "Processing Alloca Set is not empty.\n");
420 bool NoInterference
= none_of(AllocaSet
, [&](auto Iter
) {
421 return DoAllocasInterfere(Alloca
, Iter
);
423 // If the alignment of A is multiple of the alignment of B, the address
424 // of A should satisfy the requirement for aligning for B.
426 // There may be other more fine-grained strategies to handle the alignment
427 // infomation during the merging process. But it seems hard to handle
428 // these strategies and benefit little.
429 bool Alignable
= [&]() -> bool {
430 auto *LargestAlloca
= *AllocaSet
.begin();
431 return LargestAlloca
->getAlign().value() % Alloca
->getAlign().value() ==
434 bool CouldMerge
= NoInterference
&& Alignable
;
437 AllocaSet
.push_back(Alloca
);
442 NonOverlapedAllocas
.emplace_back(AllocaSetType(1, Alloca
));
445 // Recover the default target destination for each Switch statement
447 for (auto SwitchAndDefaultDest
: DefaultSuspendDest
) {
448 SwitchInst
*SWI
= SwitchAndDefaultDest
.first
;
449 BasicBlock
*DestBB
= SwitchAndDefaultDest
.second
;
450 SWI
->setDefaultDest(DestBB
);
452 // This Debug Info could tell us which allocas are merged into one slot.
453 LLVM_DEBUG(for (auto &AllocaSet
454 : NonOverlapedAllocas
) {
455 if (AllocaSet
.size() > 1) {
456 dbgs() << "In Function:" << F
.getName() << "\n";
457 dbgs() << "Find Union Set "
459 dbgs() << "\tAllocas are \n";
460 for (auto Alloca
: AllocaSet
)
461 dbgs() << "\t\t" << *Alloca
<< "\n";
466 StructType
*FrameTypeBuilder::finish(StringRef Name
) {
467 assert(!IsFinished
&& "already finished!");
469 // Prepare the optimal-layout field array.
470 // The Id in the layout field is a pointer to our Field for it.
471 SmallVector
<OptimizedStructLayoutField
, 8> LayoutFields
;
472 LayoutFields
.reserve(Fields
.size());
473 for (auto &Field
: Fields
) {
474 LayoutFields
.emplace_back(&Field
, Field
.Size
, Field
.Alignment
,
479 auto SizeAndAlign
= performOptimizedStructLayout(LayoutFields
);
480 StructSize
= SizeAndAlign
.first
;
481 StructAlign
= SizeAndAlign
.second
;
483 auto getField
= [](const OptimizedStructLayoutField
&LayoutField
) -> Field
& {
484 return *static_cast<Field
*>(const_cast<void*>(LayoutField
.Id
));
487 // We need to produce a packed struct type if there's a field whose
488 // assigned offset isn't a multiple of its natural type alignment.
490 for (auto &LayoutField
: LayoutFields
) {
491 auto &F
= getField(LayoutField
);
492 if (!isAligned(F
.TyAlignment
, LayoutField
.Offset
))
498 // Build the struct body.
499 SmallVector
<Type
*, 16> FieldTypes
;
500 FieldTypes
.reserve(LayoutFields
.size() * 3 / 2);
501 uint64_t LastOffset
= 0;
502 for (auto &LayoutField
: LayoutFields
) {
503 auto &F
= getField(LayoutField
);
505 auto Offset
= LayoutField
.Offset
;
507 // Add a padding field if there's a padding gap and we're either
508 // building a packed struct or the padding gap is more than we'd
509 // get from aligning to the field type's natural alignment.
510 assert(Offset
>= LastOffset
);
511 if (Offset
!= LastOffset
) {
512 if (Packed
|| alignTo(LastOffset
, F
.TyAlignment
) != Offset
)
513 FieldTypes
.push_back(ArrayType::get(Type::getInt8Ty(Context
),
514 Offset
- LastOffset
));
518 F
.LayoutFieldIndex
= FieldTypes
.size();
520 FieldTypes
.push_back(F
.Ty
);
521 if (F
.DynamicAlignBuffer
) {
522 FieldTypes
.push_back(
523 ArrayType::get(Type::getInt8Ty(Context
), F
.DynamicAlignBuffer
));
525 LastOffset
= Offset
+ F
.Size
;
528 StructType
*Ty
= StructType::create(Context
, FieldTypes
, Name
, Packed
);
531 // Check that the IR layout matches the offsets we expect.
532 auto Layout
= DL
.getStructLayout(Ty
);
533 for (auto &F
: Fields
) {
534 assert(Ty
->getElementType(F
.LayoutFieldIndex
) == F
.Ty
);
535 assert(Layout
->getElementOffset(F
.LayoutFieldIndex
) == F
.Offset
);
544 static void cacheDIVar(FrameDataInfo
&FrameData
,
545 DenseMap
<Value
*, DILocalVariable
*> &DIVarCache
) {
546 for (auto *V
: FrameData
.getAllDefs()) {
547 if (DIVarCache
.contains(V
))
550 auto CacheIt
= [&DIVarCache
, V
](const auto &Container
) {
551 auto *I
= llvm::find_if(Container
, [](auto *DDI
) {
552 return DDI
->getExpression()->getNumElements() == 0;
554 if (I
!= Container
.end())
555 DIVarCache
.insert({V
, (*I
)->getVariable()});
557 CacheIt(findDbgDeclares(V
));
558 CacheIt(findDVRDeclares(V
));
562 /// Create name for Type. It uses MDString to store new created string to
563 /// avoid memory leak.
564 static StringRef
solveTypeName(Type
*Ty
) {
565 if (Ty
->isIntegerTy()) {
566 // The longest name in common may be '__int_128', which has 9 bits.
567 SmallString
<16> Buffer
;
568 raw_svector_ostream
OS(Buffer
);
569 OS
<< "__int_" << cast
<IntegerType
>(Ty
)->getBitWidth();
570 auto *MDName
= MDString::get(Ty
->getContext(), OS
.str());
571 return MDName
->getString();
574 if (Ty
->isFloatingPointTy()) {
577 if (Ty
->isDoubleTy())
579 return "__floating_type_";
582 if (Ty
->isPointerTy())
583 return "PointerType";
585 if (Ty
->isStructTy()) {
586 if (!cast
<StructType
>(Ty
)->hasName())
587 return "__LiteralStructType_";
589 auto Name
= Ty
->getStructName();
591 SmallString
<16> Buffer(Name
);
592 for (auto &Iter
: Buffer
)
593 if (Iter
== '.' || Iter
== ':')
595 auto *MDName
= MDString::get(Ty
->getContext(), Buffer
.str());
596 return MDName
->getString();
599 return "UnknownType";
602 static DIType
*solveDIType(DIBuilder
&Builder
, Type
*Ty
,
603 const DataLayout
&Layout
, DIScope
*Scope
,
605 DenseMap
<Type
*, DIType
*> &DITypeCache
) {
606 if (DIType
*DT
= DITypeCache
.lookup(Ty
))
609 StringRef Name
= solveTypeName(Ty
);
611 DIType
*RetType
= nullptr;
613 if (Ty
->isIntegerTy()) {
614 auto BitWidth
= cast
<IntegerType
>(Ty
)->getBitWidth();
615 RetType
= Builder
.createBasicType(Name
, BitWidth
, dwarf::DW_ATE_signed
,
616 llvm::DINode::FlagArtificial
);
617 } else if (Ty
->isFloatingPointTy()) {
618 RetType
= Builder
.createBasicType(Name
, Layout
.getTypeSizeInBits(Ty
),
620 llvm::DINode::FlagArtificial
);
621 } else if (Ty
->isPointerTy()) {
622 // Construct PointerType points to null (aka void *) instead of exploring
623 // pointee type to avoid infinite search problem. For example, we would be
624 // in trouble if we traverse recursively:
630 Builder
.createPointerType(nullptr, Layout
.getTypeSizeInBits(Ty
),
631 Layout
.getABITypeAlign(Ty
).value() * CHAR_BIT
,
632 /*DWARFAddressSpace=*/std::nullopt
, Name
);
633 } else if (Ty
->isStructTy()) {
634 auto *DIStruct
= Builder
.createStructType(
635 Scope
, Name
, Scope
->getFile(), LineNum
, Layout
.getTypeSizeInBits(Ty
),
636 Layout
.getPrefTypeAlign(Ty
).value() * CHAR_BIT
,
637 llvm::DINode::FlagArtificial
, nullptr, llvm::DINodeArray());
639 auto *StructTy
= cast
<StructType
>(Ty
);
640 SmallVector
<Metadata
*, 16> Elements
;
641 for (unsigned I
= 0; I
< StructTy
->getNumElements(); I
++) {
642 DIType
*DITy
= solveDIType(Builder
, StructTy
->getElementType(I
), Layout
,
643 Scope
, LineNum
, DITypeCache
);
645 Elements
.push_back(Builder
.createMemberType(
646 Scope
, DITy
->getName(), Scope
->getFile(), LineNum
,
647 DITy
->getSizeInBits(), DITy
->getAlignInBits(),
648 Layout
.getStructLayout(StructTy
)->getElementOffsetInBits(I
),
649 llvm::DINode::FlagArtificial
, DITy
));
652 Builder
.replaceArrays(DIStruct
, Builder
.getOrCreateArray(Elements
));
656 LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty
<< "\n");
657 TypeSize Size
= Layout
.getTypeSizeInBits(Ty
);
658 auto *CharSizeType
= Builder
.createBasicType(
659 Name
, 8, dwarf::DW_ATE_unsigned_char
, llvm::DINode::FlagArtificial
);
662 RetType
= CharSizeType
;
665 Size
= TypeSize::getFixed(Size
+ 8 - (Size
% 8));
667 RetType
= Builder
.createArrayType(
668 Size
, Layout
.getPrefTypeAlign(Ty
).value(), CharSizeType
,
669 Builder
.getOrCreateArray(Builder
.getOrCreateSubrange(0, Size
/ 8)));
673 DITypeCache
.insert({Ty
, RetType
});
677 /// Build artificial debug info for C++ coroutine frames to allow users to
678 /// inspect the contents of the frame directly
680 /// Create Debug information for coroutine frame with debug name "__coro_frame".
681 /// The debug information for the fields of coroutine frame is constructed from
682 /// the following way:
683 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
684 /// the corresponding debug variables for the value. If we can find the
685 /// debug variable, we can get full and accurate debug information.
686 /// 2. If we can't get debug information in step 1 and 2, we could only try to
687 /// build the DIType by Type. We did this in solveDIType. We only handle
688 /// integer, float, double, integer type and struct type for now.
689 static void buildFrameDebugInfo(Function
&F
, coro::Shape
&Shape
,
690 FrameDataInfo
&FrameData
) {
691 DISubprogram
*DIS
= F
.getSubprogram();
692 // If there is no DISubprogram for F, it implies the Function are not compiled
693 // with debug info. So we also don't need to generate debug info for the frame
695 if (!DIS
|| !DIS
->getUnit() ||
697 (dwarf::SourceLanguage
)DIS
->getUnit()->getSourceLanguage()) ||
698 DIS
->getUnit()->getEmissionKind() != DICompileUnit::DebugEmissionKind::FullDebug
)
701 assert(Shape
.ABI
== coro::ABI::Switch
&&
702 "We could only build debug infomation for C++ coroutine now.\n");
704 DIBuilder
DBuilder(*F
.getParent(), /*AllowUnresolved*/ false);
706 assert(Shape
.getPromiseAlloca() &&
707 "Coroutine with switch ABI should own Promise alloca");
709 DIFile
*DFile
= DIS
->getFile();
710 unsigned LineNum
= DIS
->getLine();
712 DICompositeType
*FrameDITy
= DBuilder
.createStructType(
713 DIS
->getUnit(), Twine(F
.getName() + ".coro_frame_ty").str(),
714 DFile
, LineNum
, Shape
.FrameSize
* 8,
715 Shape
.FrameAlign
.value() * 8, llvm::DINode::FlagArtificial
, nullptr,
716 llvm::DINodeArray());
717 StructType
*FrameTy
= Shape
.FrameTy
;
718 SmallVector
<Metadata
*, 16> Elements
;
719 DataLayout Layout
= F
.getDataLayout();
721 DenseMap
<Value
*, DILocalVariable
*> DIVarCache
;
722 cacheDIVar(FrameData
, DIVarCache
);
724 unsigned ResumeIndex
= coro::Shape::SwitchFieldIndex::Resume
;
725 unsigned DestroyIndex
= coro::Shape::SwitchFieldIndex::Destroy
;
726 unsigned IndexIndex
= Shape
.SwitchLowering
.IndexField
;
728 DenseMap
<unsigned, StringRef
> NameCache
;
729 NameCache
.insert({ResumeIndex
, "__resume_fn"});
730 NameCache
.insert({DestroyIndex
, "__destroy_fn"});
731 NameCache
.insert({IndexIndex
, "__coro_index"});
733 Type
*ResumeFnTy
= FrameTy
->getElementType(ResumeIndex
),
734 *DestroyFnTy
= FrameTy
->getElementType(DestroyIndex
),
735 *IndexTy
= FrameTy
->getElementType(IndexIndex
);
737 DenseMap
<unsigned, DIType
*> TyCache
;
739 {ResumeIndex
, DBuilder
.createPointerType(
740 nullptr, Layout
.getTypeSizeInBits(ResumeFnTy
))});
742 {DestroyIndex
, DBuilder
.createPointerType(
743 nullptr, Layout
.getTypeSizeInBits(DestroyFnTy
))});
745 /// FIXME: If we fill the field `SizeInBits` with the actual size of
746 /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
747 TyCache
.insert({IndexIndex
, DBuilder
.createBasicType(
749 (Layout
.getTypeSizeInBits(IndexTy
) < 8)
751 : Layout
.getTypeSizeInBits(IndexTy
),
752 dwarf::DW_ATE_unsigned_char
)});
754 for (auto *V
: FrameData
.getAllDefs()) {
755 if (!DIVarCache
.contains(V
))
758 auto Index
= FrameData
.getFieldIndex(V
);
760 NameCache
.insert({Index
, DIVarCache
[V
]->getName()});
761 TyCache
.insert({Index
, DIVarCache
[V
]->getType()});
764 // Cache from index to (Align, Offset Pair)
765 DenseMap
<unsigned, std::pair
<unsigned, unsigned>> OffsetCache
;
766 // The Align and Offset of Resume function and Destroy function are fixed.
767 OffsetCache
.insert({ResumeIndex
, {8, 0}});
768 OffsetCache
.insert({DestroyIndex
, {8, 8}});
771 {Shape
.SwitchLowering
.IndexAlign
, Shape
.SwitchLowering
.IndexOffset
}});
773 for (auto *V
: FrameData
.getAllDefs()) {
774 auto Index
= FrameData
.getFieldIndex(V
);
777 {Index
, {FrameData
.getAlign(V
).value(), FrameData
.getOffset(V
)}});
780 DenseMap
<Type
*, DIType
*> DITypeCache
;
781 // This counter is used to avoid same type names. e.g., there would be
782 // many i32 and i64 types in one coroutine. And we would use i32_0 and
783 // i32_1 to avoid the same type. Since it makes no sense the name of the
784 // fields confilicts with each other.
785 unsigned UnknownTypeNum
= 0;
786 for (unsigned Index
= 0; Index
< FrameTy
->getNumElements(); Index
++) {
787 if (!OffsetCache
.contains(Index
))
792 uint32_t AlignInBits
;
793 uint64_t OffsetInBits
;
794 DIType
*DITy
= nullptr;
796 Type
*Ty
= FrameTy
->getElementType(Index
);
797 assert(Ty
->isSized() && "We can't handle type which is not sized.\n");
798 SizeInBits
= Layout
.getTypeSizeInBits(Ty
).getFixedValue();
799 AlignInBits
= OffsetCache
[Index
].first
* 8;
800 OffsetInBits
= OffsetCache
[Index
].second
* 8;
802 if (auto It
= NameCache
.find(Index
); It
!= NameCache
.end()) {
803 Name
= It
->second
.str();
804 DITy
= TyCache
[Index
];
806 DITy
= solveDIType(DBuilder
, Ty
, Layout
, FrameDITy
, LineNum
, DITypeCache
);
807 assert(DITy
&& "SolveDIType shouldn't return nullptr.\n");
808 Name
= DITy
->getName().str();
809 Name
+= "_" + std::to_string(UnknownTypeNum
);
813 Elements
.push_back(DBuilder
.createMemberType(
814 FrameDITy
, Name
, DFile
, LineNum
, SizeInBits
, AlignInBits
, OffsetInBits
,
815 llvm::DINode::FlagArtificial
, DITy
));
818 DBuilder
.replaceArrays(FrameDITy
, DBuilder
.getOrCreateArray(Elements
));
821 DBuilder
.createAutoVariable(DIS
, "__coro_frame", DFile
, LineNum
,
822 FrameDITy
, true, DINode::FlagArtificial
);
824 // Subprogram would have ContainedNodes field which records the debug
825 // variables it contained. So we need to add __coro_frame to the
826 // ContainedNodes of it.
828 // If we don't add __coro_frame to the RetainedNodes, user may get
829 // `no symbol __coro_frame in context` rather than `__coro_frame`
830 // is optimized out, which is more precise.
831 auto RetainedNodes
= DIS
->getRetainedNodes();
832 SmallVector
<Metadata
*, 32> RetainedNodesVec(RetainedNodes
.begin(),
833 RetainedNodes
.end());
834 RetainedNodesVec
.push_back(FrameDIVar
);
835 DIS
->replaceOperandWith(7, (MDTuple::get(F
.getContext(), RetainedNodesVec
)));
837 // Construct the location for the frame debug variable. The column number
838 // is fake but it should be fine.
840 DILocation::get(DIS
->getContext(), LineNum
, /*Column=*/1, DIS
);
841 assert(FrameDIVar
->isValidLocationForIntrinsic(DILoc
));
843 if (UseNewDbgInfoFormat
) {
844 DbgVariableRecord
*NewDVR
=
845 new DbgVariableRecord(ValueAsMetadata::get(Shape
.FramePtr
), FrameDIVar
,
846 DBuilder
.createExpression(), DILoc
,
847 DbgVariableRecord::LocationType::Declare
);
848 BasicBlock::iterator It
= Shape
.getInsertPtAfterFramePtr();
849 It
->getParent()->insertDbgRecordBefore(NewDVR
, It
);
851 DBuilder
.insertDeclare(Shape
.FramePtr
, FrameDIVar
,
852 DBuilder
.createExpression(), DILoc
,
853 &*Shape
.getInsertPtAfterFramePtr());
857 // Build a struct that will keep state for an active coroutine.
859 // ResumeFnTy ResumeFnAddr;
860 // ResumeFnTy DestroyFnAddr;
861 // ... promise (if present) ...
865 static StructType
*buildFrameType(Function
&F
, coro::Shape
&Shape
,
866 FrameDataInfo
&FrameData
,
867 bool OptimizeFrame
) {
868 LLVMContext
&C
= F
.getContext();
869 const DataLayout
&DL
= F
.getDataLayout();
871 // We will use this value to cap the alignment of spilled values.
872 std::optional
<Align
> MaxFrameAlignment
;
873 if (Shape
.ABI
== coro::ABI::Async
)
874 MaxFrameAlignment
= Shape
.AsyncLowering
.getContextAlignment();
875 FrameTypeBuilder
B(C
, DL
, MaxFrameAlignment
);
877 AllocaInst
*PromiseAlloca
= Shape
.getPromiseAlloca();
878 std::optional
<FieldIDType
> SwitchIndexFieldId
;
880 if (Shape
.ABI
== coro::ABI::Switch
) {
881 auto *FnPtrTy
= PointerType::getUnqual(C
);
883 // Add header fields for the resume and destroy functions.
884 // We can rely on these being perfectly packed.
885 (void)B
.addField(FnPtrTy
, std::nullopt
, /*header*/ true);
886 (void)B
.addField(FnPtrTy
, std::nullopt
, /*header*/ true);
888 // PromiseAlloca field needs to be explicitly added here because it's
889 // a header field with a fixed offset based on its alignment. Hence it
890 // needs special handling and cannot be added to FrameData.Allocas.
892 FrameData
.setFieldIndex(
893 PromiseAlloca
, B
.addFieldForAlloca(PromiseAlloca
, /*header*/ true));
895 // Add a field to store the suspend index. This doesn't need to
897 unsigned IndexBits
= std::max(1U, Log2_64_Ceil(Shape
.CoroSuspends
.size()));
898 Type
*IndexType
= Type::getIntNTy(C
, IndexBits
);
900 SwitchIndexFieldId
= B
.addField(IndexType
, std::nullopt
);
902 assert(PromiseAlloca
== nullptr && "lowering doesn't support promises");
905 // Because multiple allocas may own the same field slot,
906 // we add allocas to field here.
907 B
.addFieldForAllocas(F
, FrameData
, Shape
, OptimizeFrame
);
908 // Add PromiseAlloca to Allocas list so that
909 // 1. updateLayoutIndex could update its index after
910 // `performOptimizedStructLayout`
911 // 2. it is processed in insertSpills.
912 if (Shape
.ABI
== coro::ABI::Switch
&& PromiseAlloca
)
913 // We assume that the promise alloca won't be modified before
914 // CoroBegin and no alias will be create before CoroBegin.
915 FrameData
.Allocas
.emplace_back(
916 PromiseAlloca
, DenseMap
<Instruction
*, std::optional
<APInt
>>{}, false);
917 // Create an entry for every spilled value.
918 for (auto &S
: FrameData
.Spills
) {
919 Type
*FieldType
= S
.first
->getType();
920 // For byval arguments, we need to store the pointed value in the frame,
921 // instead of the pointer itself.
922 if (const Argument
*A
= dyn_cast
<Argument
>(S
.first
))
923 if (A
->hasByValAttr())
924 FieldType
= A
->getParamByValType();
925 FieldIDType Id
= B
.addField(FieldType
, std::nullopt
, false /*header*/,
926 true /*IsSpillOfValue*/);
927 FrameData
.setFieldIndex(S
.first
, Id
);
930 StructType
*FrameTy
= [&] {
931 SmallString
<32> Name(F
.getName());
932 Name
.append(".Frame");
933 return B
.finish(Name
);
936 FrameData
.updateLayoutIndex(B
);
937 Shape
.FrameAlign
= B
.getStructAlign();
938 Shape
.FrameSize
= B
.getStructSize();
941 case coro::ABI::Switch
: {
942 // In the switch ABI, remember the switch-index field.
943 auto IndexField
= B
.getLayoutField(*SwitchIndexFieldId
);
944 Shape
.SwitchLowering
.IndexField
= IndexField
.LayoutFieldIndex
;
945 Shape
.SwitchLowering
.IndexAlign
= IndexField
.Alignment
.value();
946 Shape
.SwitchLowering
.IndexOffset
= IndexField
.Offset
;
948 // Also round the frame size up to a multiple of its alignment, as is
949 // generally expected in C/C++.
950 Shape
.FrameSize
= alignTo(Shape
.FrameSize
, Shape
.FrameAlign
);
954 // In the retcon ABI, remember whether the frame is inline in the storage.
955 case coro::ABI::Retcon
:
956 case coro::ABI::RetconOnce
: {
957 auto Id
= Shape
.getRetconCoroId();
958 Shape
.RetconLowering
.IsFrameInlineInStorage
959 = (B
.getStructSize() <= Id
->getStorageSize() &&
960 B
.getStructAlign() <= Id
->getStorageAlignment());
963 case coro::ABI::Async
: {
964 Shape
.AsyncLowering
.FrameOffset
=
965 alignTo(Shape
.AsyncLowering
.ContextHeaderSize
, Shape
.FrameAlign
);
966 // Also make the final context size a multiple of the context alignment to
967 // make allocation easier for allocators.
968 Shape
.AsyncLowering
.ContextSize
=
969 alignTo(Shape
.AsyncLowering
.FrameOffset
+ Shape
.FrameSize
,
970 Shape
.AsyncLowering
.getContextAlignment());
971 if (Shape
.AsyncLowering
.getContextAlignment() < Shape
.FrameAlign
) {
973 "The alignment requirment of frame variables cannot be higher than "
974 "the alignment of the async function context");
983 // Replace all alloca and SSA values that are accessed across suspend points
984 // with GetElementPointer from coroutine frame + loads and stores. Create an
985 // AllocaSpillBB that will become the new entry block for the resume parts of
988 // %hdl = coro.begin(...)
993 // %hdl = coro.begin(...)
994 // br label %AllocaSpillBB
997 // ; geps corresponding to allocas that were moved to coroutine frame
998 // br label PostSpill
1004 static void insertSpills(const FrameDataInfo
&FrameData
, coro::Shape
&Shape
) {
1005 LLVMContext
&C
= Shape
.CoroBegin
->getContext();
1006 Function
*F
= Shape
.CoroBegin
->getFunction();
1007 IRBuilder
<> Builder(C
);
1008 StructType
*FrameTy
= Shape
.FrameTy
;
1009 Value
*FramePtr
= Shape
.FramePtr
;
1010 DominatorTree
DT(*F
);
1011 SmallDenseMap
<Argument
*, AllocaInst
*, 4> ArgToAllocaMap
;
1013 // Create a GEP with the given index into the coroutine frame for the original
1014 // value Orig. Appends an extra 0 index for array-allocas, preserving the
1016 auto GetFramePointer
= [&](Value
*Orig
) -> Value
* {
1017 FieldIDType Index
= FrameData
.getFieldIndex(Orig
);
1018 SmallVector
<Value
*, 3> Indices
= {
1019 ConstantInt::get(Type::getInt32Ty(C
), 0),
1020 ConstantInt::get(Type::getInt32Ty(C
), Index
),
1023 if (auto *AI
= dyn_cast
<AllocaInst
>(Orig
)) {
1024 if (auto *CI
= dyn_cast
<ConstantInt
>(AI
->getArraySize())) {
1025 auto Count
= CI
->getValue().getZExtValue();
1027 Indices
.push_back(ConstantInt::get(Type::getInt32Ty(C
), 0));
1030 report_fatal_error("Coroutines cannot handle non static allocas yet");
1034 auto GEP
= cast
<GetElementPtrInst
>(
1035 Builder
.CreateInBoundsGEP(FrameTy
, FramePtr
, Indices
));
1036 if (auto *AI
= dyn_cast
<AllocaInst
>(Orig
)) {
1037 if (FrameData
.getDynamicAlign(Orig
) != 0) {
1038 assert(FrameData
.getDynamicAlign(Orig
) == AI
->getAlign().value());
1039 auto *M
= AI
->getModule();
1040 auto *IntPtrTy
= M
->getDataLayout().getIntPtrType(AI
->getType());
1041 auto *PtrValue
= Builder
.CreatePtrToInt(GEP
, IntPtrTy
);
1043 ConstantInt::get(IntPtrTy
, AI
->getAlign().value() - 1);
1044 PtrValue
= Builder
.CreateAdd(PtrValue
, AlignMask
);
1045 PtrValue
= Builder
.CreateAnd(PtrValue
, Builder
.CreateNot(AlignMask
));
1046 return Builder
.CreateIntToPtr(PtrValue
, AI
->getType());
1048 // If the type of GEP is not equal to the type of AllocaInst, it implies
1049 // that the AllocaInst may be reused in the Frame slot of other
1050 // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1051 // the Frame storage.
1053 // Note: If we change the strategy dealing with alignment, we need to refine
1055 if (GEP
->getType() != Orig
->getType())
1056 return Builder
.CreateAddrSpaceCast(GEP
, Orig
->getType(),
1057 Orig
->getName() + Twine(".cast"));
1062 for (auto const &E
: FrameData
.Spills
) {
1063 Value
*Def
= E
.first
;
1064 auto SpillAlignment
= Align(FrameData
.getAlign(Def
));
1065 // Create a store instruction storing the value into the
1067 BasicBlock::iterator InsertPt
= coro::getSpillInsertionPt(Shape
, Def
, DT
);
1069 Type
*ByValTy
= nullptr;
1070 if (auto *Arg
= dyn_cast
<Argument
>(Def
)) {
1071 // If we're spilling an Argument, make sure we clear 'nocapture'
1072 // from the coroutine function.
1073 Arg
->getParent()->removeParamAttr(Arg
->getArgNo(), Attribute::NoCapture
);
1075 if (Arg
->hasByValAttr())
1076 ByValTy
= Arg
->getParamByValType();
1079 auto Index
= FrameData
.getFieldIndex(Def
);
1080 Builder
.SetInsertPoint(InsertPt
->getParent(), InsertPt
);
1081 auto *G
= Builder
.CreateConstInBoundsGEP2_32(
1082 FrameTy
, FramePtr
, 0, Index
, Def
->getName() + Twine(".spill.addr"));
1084 // For byval arguments, we need to store the pointed value in the frame,
1085 // instead of the pointer itself.
1086 auto *Value
= Builder
.CreateLoad(ByValTy
, Def
);
1087 Builder
.CreateAlignedStore(Value
, G
, SpillAlignment
);
1089 Builder
.CreateAlignedStore(Def
, G
, SpillAlignment
);
1092 BasicBlock
*CurrentBlock
= nullptr;
1093 Value
*CurrentReload
= nullptr;
1094 for (auto *U
: E
.second
) {
1095 // If we have not seen the use block, create a load instruction to reload
1096 // the spilled value from the coroutine frame. Populates the Value pointer
1097 // reference provided with the frame GEP.
1098 if (CurrentBlock
!= U
->getParent()) {
1099 CurrentBlock
= U
->getParent();
1100 Builder
.SetInsertPoint(CurrentBlock
,
1101 CurrentBlock
->getFirstInsertionPt());
1103 auto *GEP
= GetFramePointer(E
.first
);
1104 GEP
->setName(E
.first
->getName() + Twine(".reload.addr"));
1106 CurrentReload
= GEP
;
1108 CurrentReload
= Builder
.CreateAlignedLoad(
1109 FrameTy
->getElementType(FrameData
.getFieldIndex(E
.first
)), GEP
,
1110 SpillAlignment
, E
.first
->getName() + Twine(".reload"));
1112 TinyPtrVector
<DbgDeclareInst
*> DIs
= findDbgDeclares(Def
);
1113 TinyPtrVector
<DbgVariableRecord
*> DVRs
= findDVRDeclares(Def
);
1114 // Try best to find dbg.declare. If the spill is a temp, there may not
1115 // be a direct dbg.declare. Walk up the load chain to find one from an
1117 if (F
->getSubprogram()) {
1119 while (DIs
.empty() && DVRs
.empty() && isa
<LoadInst
>(CurDef
)) {
1120 auto *LdInst
= cast
<LoadInst
>(CurDef
);
1121 // Only consider ptr to ptr same type load.
1122 if (LdInst
->getPointerOperandType() != LdInst
->getType())
1124 CurDef
= LdInst
->getPointerOperand();
1125 if (!isa
<AllocaInst
, LoadInst
>(CurDef
))
1127 DIs
= findDbgDeclares(CurDef
);
1128 DVRs
= findDVRDeclares(CurDef
);
1132 auto SalvageOne
= [&](auto *DDI
) {
1133 bool AllowUnresolved
= false;
1134 // This dbg.declare is preserved for all coro-split function
1135 // fragments. It will be unreachable in the main function, and
1136 // processed by coro::salvageDebugInfo() by the Cloner.
1137 if (UseNewDbgInfoFormat
) {
1138 DbgVariableRecord
*NewDVR
= new DbgVariableRecord(
1139 ValueAsMetadata::get(CurrentReload
), DDI
->getVariable(),
1140 DDI
->getExpression(), DDI
->getDebugLoc(),
1141 DbgVariableRecord::LocationType::Declare
);
1142 Builder
.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1143 NewDVR
, Builder
.GetInsertPoint());
1145 DIBuilder(*CurrentBlock
->getParent()->getParent(), AllowUnresolved
)
1146 .insertDeclare(CurrentReload
, DDI
->getVariable(),
1147 DDI
->getExpression(), DDI
->getDebugLoc(),
1148 &*Builder
.GetInsertPoint());
1150 // This dbg.declare is for the main function entry point. It
1151 // will be deleted in all coro-split functions.
1152 coro::salvageDebugInfo(ArgToAllocaMap
, *DDI
, false /*UseEntryValue*/);
1154 for_each(DIs
, SalvageOne
);
1155 for_each(DVRs
, SalvageOne
);
1158 // If we have a single edge PHINode, remove it and replace it with a
1159 // reload from the coroutine frame. (We already took care of multi edge
1160 // PHINodes by normalizing them in the rewritePHIs function).
1161 if (auto *PN
= dyn_cast
<PHINode
>(U
)) {
1162 assert(PN
->getNumIncomingValues() == 1 &&
1163 "unexpected number of incoming "
1164 "values in the PHINode");
1165 PN
->replaceAllUsesWith(CurrentReload
);
1166 PN
->eraseFromParent();
1170 // Replace all uses of CurrentValue in the current instruction with
1172 U
->replaceUsesOfWith(Def
, CurrentReload
);
1173 // Instructions are added to Def's user list if the attached
1174 // debug records use Def. Update those now.
1175 for (DbgVariableRecord
&DVR
: filterDbgVars(U
->getDbgRecordRange()))
1176 DVR
.replaceVariableLocationOp(Def
, CurrentReload
, true);
1180 BasicBlock
*FramePtrBB
= Shape
.getInsertPtAfterFramePtr()->getParent();
1182 auto SpillBlock
= FramePtrBB
->splitBasicBlock(
1183 Shape
.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1184 SpillBlock
->splitBasicBlock(&SpillBlock
->front(), "PostSpill");
1185 Shape
.AllocaSpillBlock
= SpillBlock
;
1187 // retcon and retcon.once lowering assumes all uses have been sunk.
1188 if (Shape
.ABI
== coro::ABI::Retcon
|| Shape
.ABI
== coro::ABI::RetconOnce
||
1189 Shape
.ABI
== coro::ABI::Async
) {
1190 // If we found any allocas, replace all of their remaining uses with Geps.
1191 Builder
.SetInsertPoint(SpillBlock
, SpillBlock
->begin());
1192 for (const auto &P
: FrameData
.Allocas
) {
1193 AllocaInst
*Alloca
= P
.Alloca
;
1194 auto *G
= GetFramePointer(Alloca
);
1196 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1197 // here, as we are changing location of the instruction.
1198 G
->takeName(Alloca
);
1199 Alloca
->replaceAllUsesWith(G
);
1200 Alloca
->eraseFromParent();
1205 // If we found any alloca, replace all of their remaining uses with GEP
1206 // instructions. To remain debugbility, we replace the uses of allocas for
1207 // dbg.declares and dbg.values with the reload from the frame.
1208 // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1209 // as some of the uses may not be dominated by CoroBegin.
1210 Builder
.SetInsertPoint(Shape
.AllocaSpillBlock
,
1211 Shape
.AllocaSpillBlock
->begin());
1212 SmallVector
<Instruction
*, 4> UsersToUpdate
;
1213 for (const auto &A
: FrameData
.Allocas
) {
1214 AllocaInst
*Alloca
= A
.Alloca
;
1215 UsersToUpdate
.clear();
1216 for (User
*U
: Alloca
->users()) {
1217 auto *I
= cast
<Instruction
>(U
);
1218 if (DT
.dominates(Shape
.CoroBegin
, I
))
1219 UsersToUpdate
.push_back(I
);
1221 if (UsersToUpdate
.empty())
1223 auto *G
= GetFramePointer(Alloca
);
1224 G
->setName(Alloca
->getName() + Twine(".reload.addr"));
1226 SmallVector
<DbgVariableIntrinsic
*, 4> DIs
;
1227 SmallVector
<DbgVariableRecord
*> DbgVariableRecords
;
1228 findDbgUsers(DIs
, Alloca
, &DbgVariableRecords
);
1229 for (auto *DVI
: DIs
)
1230 DVI
->replaceUsesOfWith(Alloca
, G
);
1231 for (auto *DVR
: DbgVariableRecords
)
1232 DVR
->replaceVariableLocationOp(Alloca
, G
);
1234 for (Instruction
*I
: UsersToUpdate
) {
1235 // It is meaningless to retain the lifetime intrinsics refer for the
1236 // member of coroutine frames and the meaningless lifetime intrinsics
1237 // are possible to block further optimizations.
1238 if (I
->isLifetimeStartOrEnd()) {
1239 I
->eraseFromParent();
1243 I
->replaceUsesOfWith(Alloca
, G
);
1246 Builder
.SetInsertPoint(&*Shape
.getInsertPtAfterFramePtr());
1247 for (const auto &A
: FrameData
.Allocas
) {
1248 AllocaInst
*Alloca
= A
.Alloca
;
1249 if (A
.MayWriteBeforeCoroBegin
) {
1250 // isEscaped really means potentially modified before CoroBegin.
1251 if (Alloca
->isArrayAllocation())
1253 "Coroutines cannot handle copying of array allocas yet");
1255 auto *G
= GetFramePointer(Alloca
);
1256 auto *Value
= Builder
.CreateLoad(Alloca
->getAllocatedType(), Alloca
);
1257 Builder
.CreateStore(Value
, G
);
1259 // For each alias to Alloca created before CoroBegin but used after
1260 // CoroBegin, we recreate them after CoroBegin by applying the offset
1261 // to the pointer in the frame.
1262 for (const auto &Alias
: A
.Aliases
) {
1263 auto *FramePtr
= GetFramePointer(Alloca
);
1264 auto &Value
= *Alias
.second
;
1265 auto ITy
= IntegerType::get(C
, Value
.getBitWidth());
1267 Builder
.CreatePtrAdd(FramePtr
, ConstantInt::get(ITy
, Value
));
1268 Alias
.first
->replaceUsesWithIf(
1269 AliasPtr
, [&](Use
&U
) { return DT
.dominates(Shape
.CoroBegin
, U
); });
1273 // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
1274 // the case that the PromiseAlloca may have writes before CoroBegin in the
1275 // above codes. And it may be problematic in edge cases. See
1276 // https://github.com/llvm/llvm-project/issues/57861 for an example.
1277 if (Shape
.ABI
== coro::ABI::Switch
&& Shape
.SwitchLowering
.PromiseAlloca
) {
1278 AllocaInst
*PA
= Shape
.SwitchLowering
.PromiseAlloca
;
1279 // If there is memory accessing to promise alloca before CoroBegin;
1280 bool HasAccessingPromiseBeforeCB
= llvm::any_of(PA
->uses(), [&](Use
&U
) {
1281 auto *Inst
= dyn_cast
<Instruction
>(U
.getUser());
1282 if (!Inst
|| DT
.dominates(Shape
.CoroBegin
, Inst
))
1285 if (auto *CI
= dyn_cast
<CallInst
>(Inst
)) {
1286 // It is fine if the call wouldn't write to the Promise.
1287 // This is possible for @llvm.coro.id intrinsics, which
1288 // would take the promise as the second argument as a
1290 if (CI
->onlyReadsMemory() ||
1291 CI
->onlyReadsMemory(CI
->getArgOperandNo(&U
)))
1296 return isa
<StoreInst
>(Inst
) ||
1297 // It may take too much time to track the uses.
1298 // Be conservative about the case the use may escape.
1299 isa
<GetElementPtrInst
>(Inst
) ||
1300 // There would always be a bitcast for the promise alloca
1301 // before we enabled Opaque pointers. And now given
1302 // opaque pointers are enabled by default. This should be
1304 isa
<BitCastInst
>(Inst
);
1306 if (HasAccessingPromiseBeforeCB
) {
1307 Builder
.SetInsertPoint(&*Shape
.getInsertPtAfterFramePtr());
1308 auto *G
= GetFramePointer(PA
);
1309 auto *Value
= Builder
.CreateLoad(PA
->getAllocatedType(), PA
);
1310 Builder
.CreateStore(Value
, G
);
1315 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1316 // PHI in InsertedBB.
1317 static void movePHIValuesToInsertedBlock(BasicBlock
*SuccBB
,
1318 BasicBlock
*InsertedBB
,
1320 PHINode
*UntilPHI
= nullptr) {
1321 auto *PN
= cast
<PHINode
>(&SuccBB
->front());
1323 int Index
= PN
->getBasicBlockIndex(InsertedBB
);
1324 Value
*V
= PN
->getIncomingValue(Index
);
1325 PHINode
*InputV
= PHINode::Create(
1326 V
->getType(), 1, V
->getName() + Twine(".") + SuccBB
->getName());
1327 InputV
->insertBefore(InsertedBB
->begin());
1328 InputV
->addIncoming(V
, PredBB
);
1329 PN
->setIncomingValue(Index
, InputV
);
1330 PN
= dyn_cast
<PHINode
>(PN
->getNextNode());
1331 } while (PN
!= UntilPHI
);
1334 // Rewrites the PHI Nodes in a cleanuppad.
1335 static void rewritePHIsForCleanupPad(BasicBlock
*CleanupPadBB
,
1336 CleanupPadInst
*CleanupPad
) {
1337 // For every incoming edge to a CleanupPad we will create a new block holding
1338 // all incoming values in single-value PHI nodes. We will then create another
1339 // block to act as a dispather (as all unwind edges for related EH blocks
1340 // must be the same).
1343 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1344 // %3 = cleanuppad within none []
1348 // cleanuppad.corodispatch
1349 // %2 = phi i8[0, %catchswitch], [1, %catch.1]
1350 // %3 = cleanuppad within none []
1351 // switch i8 % 2, label %unreachable
1352 // [i8 0, label %cleanuppad.from.catchswitch
1353 // i8 1, label %cleanuppad.from.catch.1]
1354 // cleanuppad.from.catchswitch:
1355 // %4 = phi i32 [%0, %catchswitch]
1356 // br %label cleanuppad
1357 // cleanuppad.from.catch.1:
1358 // %6 = phi i32 [%1, %catch.1]
1359 // br %label cleanuppad
1361 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1362 // [%6, %cleanuppad.from.catch.1]
1364 // Unreachable BB, in case switching on an invalid value in the dispatcher.
1365 auto *UnreachBB
= BasicBlock::Create(
1366 CleanupPadBB
->getContext(), "unreachable", CleanupPadBB
->getParent());
1367 IRBuilder
<> Builder(UnreachBB
);
1368 Builder
.CreateUnreachable();
1370 // Create a new cleanuppad which will be the dispatcher.
1371 auto *NewCleanupPadBB
=
1372 BasicBlock::Create(CleanupPadBB
->getContext(),
1373 CleanupPadBB
->getName() + Twine(".corodispatch"),
1374 CleanupPadBB
->getParent(), CleanupPadBB
);
1375 Builder
.SetInsertPoint(NewCleanupPadBB
);
1376 auto *SwitchType
= Builder
.getInt8Ty();
1377 auto *SetDispatchValuePN
=
1378 Builder
.CreatePHI(SwitchType
, pred_size(CleanupPadBB
));
1379 CleanupPad
->removeFromParent();
1380 CleanupPad
->insertAfter(SetDispatchValuePN
);
1381 auto *SwitchOnDispatch
= Builder
.CreateSwitch(SetDispatchValuePN
, UnreachBB
,
1382 pred_size(CleanupPadBB
));
1384 int SwitchIndex
= 0;
1385 SmallVector
<BasicBlock
*, 8> Preds(predecessors(CleanupPadBB
));
1386 for (BasicBlock
*Pred
: Preds
) {
1387 // Create a new cleanuppad and move the PHI values to there.
1388 auto *CaseBB
= BasicBlock::Create(CleanupPadBB
->getContext(),
1389 CleanupPadBB
->getName() +
1390 Twine(".from.") + Pred
->getName(),
1391 CleanupPadBB
->getParent(), CleanupPadBB
);
1392 updatePhiNodes(CleanupPadBB
, Pred
, CaseBB
);
1393 CaseBB
->setName(CleanupPadBB
->getName() + Twine(".from.") +
1395 Builder
.SetInsertPoint(CaseBB
);
1396 Builder
.CreateBr(CleanupPadBB
);
1397 movePHIValuesToInsertedBlock(CleanupPadBB
, CaseBB
, NewCleanupPadBB
);
1399 // Update this Pred to the new unwind point.
1400 setUnwindEdgeTo(Pred
->getTerminator(), NewCleanupPadBB
);
1402 // Setup the switch in the dispatcher.
1403 auto *SwitchConstant
= ConstantInt::get(SwitchType
, SwitchIndex
);
1404 SetDispatchValuePN
->addIncoming(SwitchConstant
, Pred
);
1405 SwitchOnDispatch
->addCase(SwitchConstant
, CaseBB
);
1410 static void cleanupSinglePredPHIs(Function
&F
) {
1411 SmallVector
<PHINode
*, 32> Worklist
;
1412 for (auto &BB
: F
) {
1413 for (auto &Phi
: BB
.phis()) {
1414 if (Phi
.getNumIncomingValues() == 1) {
1415 Worklist
.push_back(&Phi
);
1420 while (!Worklist
.empty()) {
1421 auto *Phi
= Worklist
.pop_back_val();
1422 auto *OriginalValue
= Phi
->getIncomingValue(0);
1423 Phi
->replaceAllUsesWith(OriginalValue
);
1427 static void rewritePHIs(BasicBlock
&BB
) {
1428 // For every incoming edge we will create a block holding all
1429 // incoming values in a single PHI nodes.
1432 // %n.val = phi i32[%n, %entry], [%inc, %loop]
1437 // %n.loop.pre = phi i32 [%n, %entry]
1440 // %inc.loop.pre = phi i32 [%inc, %loop]
1443 // After this rewrite, further analysis will ignore any phi nodes with more
1444 // than one incoming edge.
1446 // TODO: Simplify PHINodes in the basic block to remove duplicate
1449 // Special case for CleanupPad: all EH blocks must have the same unwind edge
1450 // so we need to create an additional "dispatcher" block.
1451 if (auto *CleanupPad
=
1452 dyn_cast_or_null
<CleanupPadInst
>(BB
.getFirstNonPHI())) {
1453 SmallVector
<BasicBlock
*, 8> Preds(predecessors(&BB
));
1454 for (BasicBlock
*Pred
: Preds
) {
1455 if (CatchSwitchInst
*CS
=
1456 dyn_cast
<CatchSwitchInst
>(Pred
->getTerminator())) {
1457 // CleanupPad with a CatchSwitch predecessor: therefore this is an
1458 // unwind destination that needs to be handle specially.
1459 assert(CS
->getUnwindDest() == &BB
);
1461 rewritePHIsForCleanupPad(&BB
, CleanupPad
);
1467 LandingPadInst
*LandingPad
= nullptr;
1468 PHINode
*ReplPHI
= nullptr;
1469 if ((LandingPad
= dyn_cast_or_null
<LandingPadInst
>(BB
.getFirstNonPHI()))) {
1470 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1471 // We replace the original landing pad with a PHINode that will collect the
1472 // results from all of them.
1473 ReplPHI
= PHINode::Create(LandingPad
->getType(), 1, "");
1474 ReplPHI
->insertBefore(LandingPad
->getIterator());
1475 ReplPHI
->takeName(LandingPad
);
1476 LandingPad
->replaceAllUsesWith(ReplPHI
);
1477 // We will erase the original landing pad at the end of this function after
1478 // ehAwareSplitEdge cloned it in the transition blocks.
1481 SmallVector
<BasicBlock
*, 8> Preds(predecessors(&BB
));
1482 for (BasicBlock
*Pred
: Preds
) {
1483 auto *IncomingBB
= ehAwareSplitEdge(Pred
, &BB
, LandingPad
, ReplPHI
);
1484 IncomingBB
->setName(BB
.getName() + Twine(".from.") + Pred
->getName());
1486 // Stop the moving of values at ReplPHI, as this is either null or the PHI
1487 // that replaced the landing pad.
1488 movePHIValuesToInsertedBlock(&BB
, IncomingBB
, Pred
, ReplPHI
);
1492 // Calls to ehAwareSplitEdge function cloned the original lading pad.
1493 // No longer need it.
1494 LandingPad
->eraseFromParent();
1498 static void rewritePHIs(Function
&F
) {
1499 SmallVector
<BasicBlock
*, 8> WorkList
;
1501 for (BasicBlock
&BB
: F
)
1502 if (auto *PN
= dyn_cast
<PHINode
>(&BB
.front()))
1503 if (PN
->getNumIncomingValues() > 1)
1504 WorkList
.push_back(&BB
);
1506 for (BasicBlock
*BB
: WorkList
)
1510 // Splits the block at a particular instruction unless it is the first
1511 // instruction in the block with a single predecessor.
1512 static BasicBlock
*splitBlockIfNotFirst(Instruction
*I
, const Twine
&Name
) {
1513 auto *BB
= I
->getParent();
1514 if (&BB
->front() == I
) {
1515 if (BB
->getSinglePredecessor()) {
1520 return BB
->splitBasicBlock(I
, Name
);
1523 // Split above and below a particular instruction so that it
1524 // will be all alone by itself in a block.
1525 static void splitAround(Instruction
*I
, const Twine
&Name
) {
1526 splitBlockIfNotFirst(I
, Name
);
1527 splitBlockIfNotFirst(I
->getNextNode(), "After" + Name
);
1530 /// After we split the coroutine, will the given basic block be along
1531 /// an obvious exit path for the resumption function?
1532 static bool willLeaveFunctionImmediatelyAfter(BasicBlock
*BB
,
1533 unsigned depth
= 3) {
1534 // If we've bottomed out our depth count, stop searching and assume
1535 // that the path might loop back.
1536 if (depth
== 0) return false;
1538 // If this is a suspend block, we're about to exit the resumption function.
1539 if (coro::isSuspendBlock(BB
))
1542 // Recurse into the successors.
1543 for (auto *Succ
: successors(BB
)) {
1544 if (!willLeaveFunctionImmediatelyAfter(Succ
, depth
- 1))
1548 // If none of the successors leads back in a loop, we're on an exit/abort.
1552 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst
*AI
) {
1553 // Look for a free that isn't sufficiently obviously followed by
1554 // either a suspend or a termination, i.e. something that will leave
1555 // the coro resumption frame.
1556 for (auto *U
: AI
->users()) {
1557 auto FI
= dyn_cast
<CoroAllocaFreeInst
>(U
);
1560 if (!willLeaveFunctionImmediatelyAfter(FI
->getParent()))
1564 // If we never found one, we don't need a stack save.
1568 /// Turn each of the given local allocas into a normal (dynamic) alloca
1570 static void lowerLocalAllocas(ArrayRef
<CoroAllocaAllocInst
*> LocalAllocas
,
1571 SmallVectorImpl
<Instruction
*> &DeadInsts
) {
1572 for (auto *AI
: LocalAllocas
) {
1573 IRBuilder
<> Builder(AI
);
1575 // Save the stack depth. Try to avoid doing this if the stackrestore
1576 // is going to immediately precede a return or something.
1577 Value
*StackSave
= nullptr;
1578 if (localAllocaNeedsStackSave(AI
))
1579 StackSave
= Builder
.CreateStackSave();
1582 auto Alloca
= Builder
.CreateAlloca(Builder
.getInt8Ty(), AI
->getSize());
1583 Alloca
->setAlignment(AI
->getAlignment());
1585 for (auto *U
: AI
->users()) {
1586 // Replace gets with the allocation.
1587 if (isa
<CoroAllocaGetInst
>(U
)) {
1588 U
->replaceAllUsesWith(Alloca
);
1590 // Replace frees with stackrestores. This is safe because
1591 // alloca.alloc is required to obey a stack discipline, although we
1592 // don't enforce that structurally.
1594 auto FI
= cast
<CoroAllocaFreeInst
>(U
);
1596 Builder
.SetInsertPoint(FI
);
1597 Builder
.CreateStackRestore(StackSave
);
1600 DeadInsts
.push_back(cast
<Instruction
>(U
));
1603 DeadInsts
.push_back(AI
);
1607 /// Get the current swifterror value.
1608 static Value
*emitGetSwiftErrorValue(IRBuilder
<> &Builder
, Type
*ValueTy
,
1609 coro::Shape
&Shape
) {
1610 // Make a fake function pointer as a sort of intrinsic.
1611 auto FnTy
= FunctionType::get(ValueTy
, {}, false);
1612 auto Fn
= ConstantPointerNull::get(Builder
.getPtrTy());
1614 auto Call
= Builder
.CreateCall(FnTy
, Fn
, {});
1615 Shape
.SwiftErrorOps
.push_back(Call
);
1620 /// Set the given value as the current swifterror value.
1622 /// Returns a slot that can be used as a swifterror slot.
1623 static Value
*emitSetSwiftErrorValue(IRBuilder
<> &Builder
, Value
*V
,
1624 coro::Shape
&Shape
) {
1625 // Make a fake function pointer as a sort of intrinsic.
1626 auto FnTy
= FunctionType::get(Builder
.getPtrTy(),
1627 {V
->getType()}, false);
1628 auto Fn
= ConstantPointerNull::get(Builder
.getPtrTy());
1630 auto Call
= Builder
.CreateCall(FnTy
, Fn
, { V
});
1631 Shape
.SwiftErrorOps
.push_back(Call
);
1636 /// Set the swifterror value from the given alloca before a call,
1637 /// then put in back in the alloca afterwards.
1639 /// Returns an address that will stand in for the swifterror slot
1640 /// until splitting.
1641 static Value
*emitSetAndGetSwiftErrorValueAround(Instruction
*Call
,
1643 coro::Shape
&Shape
) {
1644 auto ValueTy
= Alloca
->getAllocatedType();
1645 IRBuilder
<> Builder(Call
);
1647 // Load the current value from the alloca and set it as the
1648 // swifterror value.
1649 auto ValueBeforeCall
= Builder
.CreateLoad(ValueTy
, Alloca
);
1650 auto Addr
= emitSetSwiftErrorValue(Builder
, ValueBeforeCall
, Shape
);
1652 // Move to after the call. Since swifterror only has a guaranteed
1653 // value on normal exits, we can ignore implicit and explicit unwind
1655 if (isa
<CallInst
>(Call
)) {
1656 Builder
.SetInsertPoint(Call
->getNextNode());
1658 auto Invoke
= cast
<InvokeInst
>(Call
);
1659 Builder
.SetInsertPoint(Invoke
->getNormalDest()->getFirstNonPHIOrDbg());
1662 // Get the current swifterror value and store it to the alloca.
1663 auto ValueAfterCall
= emitGetSwiftErrorValue(Builder
, ValueTy
, Shape
);
1664 Builder
.CreateStore(ValueAfterCall
, Alloca
);
1669 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1670 /// intrinsics and attempting to MemToReg the alloca away.
1671 static void eliminateSwiftErrorAlloca(Function
&F
, AllocaInst
*Alloca
,
1672 coro::Shape
&Shape
) {
1673 for (Use
&Use
: llvm::make_early_inc_range(Alloca
->uses())) {
1674 // swifterror values can only be used in very specific ways.
1675 // We take advantage of that here.
1676 auto User
= Use
.getUser();
1677 if (isa
<LoadInst
>(User
) || isa
<StoreInst
>(User
))
1680 assert(isa
<CallInst
>(User
) || isa
<InvokeInst
>(User
));
1681 auto Call
= cast
<Instruction
>(User
);
1683 auto Addr
= emitSetAndGetSwiftErrorValueAround(Call
, Alloca
, Shape
);
1685 // Use the returned slot address as the call argument.
1689 // All the uses should be loads and stores now.
1690 assert(isAllocaPromotable(Alloca
));
1693 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1694 /// and then loading and storing in the prologue and epilog.
1696 /// The argument keeps the swifterror flag.
1697 static void eliminateSwiftErrorArgument(Function
&F
, Argument
&Arg
,
1699 SmallVectorImpl
<AllocaInst
*> &AllocasToPromote
) {
1700 IRBuilder
<> Builder(F
.getEntryBlock().getFirstNonPHIOrDbg());
1702 auto ArgTy
= cast
<PointerType
>(Arg
.getType());
1703 auto ValueTy
= PointerType::getUnqual(F
.getContext());
1705 // Reduce to the alloca case:
1707 // Create an alloca and replace all uses of the arg with it.
1708 auto Alloca
= Builder
.CreateAlloca(ValueTy
, ArgTy
->getAddressSpace());
1709 Arg
.replaceAllUsesWith(Alloca
);
1711 // Set an initial value in the alloca. swifterror is always null on entry.
1712 auto InitialValue
= Constant::getNullValue(ValueTy
);
1713 Builder
.CreateStore(InitialValue
, Alloca
);
1715 // Find all the suspends in the function and save and restore around them.
1716 for (auto *Suspend
: Shape
.CoroSuspends
) {
1717 (void) emitSetAndGetSwiftErrorValueAround(Suspend
, Alloca
, Shape
);
1720 // Find all the coro.ends in the function and restore the error value.
1721 for (auto *End
: Shape
.CoroEnds
) {
1722 Builder
.SetInsertPoint(End
);
1723 auto FinalValue
= Builder
.CreateLoad(ValueTy
, Alloca
);
1724 (void) emitSetSwiftErrorValue(Builder
, FinalValue
, Shape
);
1727 // Now we can use the alloca logic.
1728 AllocasToPromote
.push_back(Alloca
);
1729 eliminateSwiftErrorAlloca(F
, Alloca
, Shape
);
1732 /// Eliminate all problematic uses of swifterror arguments and allocas
1733 /// from the function. We'll fix them up later when splitting the function.
1734 static void eliminateSwiftError(Function
&F
, coro::Shape
&Shape
) {
1735 SmallVector
<AllocaInst
*, 4> AllocasToPromote
;
1737 // Look for a swifterror argument.
1738 for (auto &Arg
: F
.args()) {
1739 if (!Arg
.hasSwiftErrorAttr()) continue;
1741 eliminateSwiftErrorArgument(F
, Arg
, Shape
, AllocasToPromote
);
1745 // Look for swifterror allocas.
1746 for (auto &Inst
: F
.getEntryBlock()) {
1747 auto Alloca
= dyn_cast
<AllocaInst
>(&Inst
);
1748 if (!Alloca
|| !Alloca
->isSwiftError()) continue;
1750 // Clear the swifterror flag.
1751 Alloca
->setSwiftError(false);
1753 AllocasToPromote
.push_back(Alloca
);
1754 eliminateSwiftErrorAlloca(F
, Alloca
, Shape
);
1757 // If we have any allocas to promote, compute a dominator tree and
1758 // promote them en masse.
1759 if (!AllocasToPromote
.empty()) {
1760 DominatorTree
DT(F
);
1761 PromoteMemToReg(AllocasToPromote
, DT
);
1765 /// For each local variable that all of its user are only used inside one of
1766 /// suspended region, we sink their lifetime.start markers to the place where
1767 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1768 /// hence minimizing the amount of data we end up putting on the frame.
1769 static void sinkLifetimeStartMarkers(Function
&F
, coro::Shape
&Shape
,
1770 SuspendCrossingInfo
&Checker
,
1771 const DominatorTree
&DT
) {
1775 // Collect all possible basic blocks which may dominate all uses of allocas.
1776 SmallPtrSet
<BasicBlock
*, 4> DomSet
;
1777 DomSet
.insert(&F
.getEntryBlock());
1778 for (auto *CSI
: Shape
.CoroSuspends
) {
1779 BasicBlock
*SuspendBlock
= CSI
->getParent();
1780 assert(coro::isSuspendBlock(SuspendBlock
) &&
1781 SuspendBlock
->getSingleSuccessor() &&
1782 "should have split coro.suspend into its own block");
1783 DomSet
.insert(SuspendBlock
->getSingleSuccessor());
1786 for (Instruction
&I
: instructions(F
)) {
1787 AllocaInst
* AI
= dyn_cast
<AllocaInst
>(&I
);
1791 for (BasicBlock
*DomBB
: DomSet
) {
1793 SmallVector
<Instruction
*, 1> Lifetimes
;
1795 auto isLifetimeStart
= [](Instruction
* I
) {
1796 if (auto* II
= dyn_cast
<IntrinsicInst
>(I
))
1797 return II
->getIntrinsicID() == Intrinsic::lifetime_start
;
1801 auto collectLifetimeStart
= [&](Instruction
*U
, AllocaInst
*AI
) {
1802 if (isLifetimeStart(U
)) {
1803 Lifetimes
.push_back(U
);
1806 if (!U
->hasOneUse() || U
->stripPointerCasts() != AI
)
1808 if (isLifetimeStart(U
->user_back())) {
1809 Lifetimes
.push_back(U
->user_back());
1815 for (User
*U
: AI
->users()) {
1816 Instruction
*UI
= cast
<Instruction
>(U
);
1817 // For all users except lifetime.start markers, if they are all
1818 // dominated by one of the basic blocks and do not cross
1819 // suspend points as well, then there is no need to spill the
1821 if (!DT
.dominates(DomBB
, UI
->getParent()) ||
1822 Checker
.isDefinitionAcrossSuspend(DomBB
, UI
)) {
1823 // Skip lifetime.start, GEP and bitcast used by lifetime.start
1825 if (collectLifetimeStart(UI
, AI
))
1831 // Sink lifetime.start markers to dominate block when they are
1832 // only used outside the region.
1833 if (Valid
&& Lifetimes
.size() != 0) {
1834 auto *NewLifetime
= Lifetimes
[0]->clone();
1835 NewLifetime
->replaceUsesOfWith(NewLifetime
->getOperand(1), AI
);
1836 NewLifetime
->insertBefore(DomBB
->getTerminator());
1838 // All the outsided lifetime.start markers are no longer necessary.
1839 for (Instruction
*S
: Lifetimes
)
1840 S
->eraseFromParent();
1848 static std::optional
<std::pair
<Value
&, DIExpression
&>>
1849 salvageDebugInfoImpl(SmallDenseMap
<Argument
*, AllocaInst
*, 4> &ArgToAllocaMap
,
1850 bool UseEntryValue
, Function
*F
, Value
*Storage
,
1851 DIExpression
*Expr
, bool SkipOutermostLoad
) {
1852 IRBuilder
<> Builder(F
->getContext());
1853 auto InsertPt
= F
->getEntryBlock().getFirstInsertionPt();
1854 while (isa
<IntrinsicInst
>(InsertPt
))
1856 Builder
.SetInsertPoint(&F
->getEntryBlock(), InsertPt
);
1858 while (auto *Inst
= dyn_cast_or_null
<Instruction
>(Storage
)) {
1859 if (auto *LdInst
= dyn_cast
<LoadInst
>(Inst
)) {
1860 Storage
= LdInst
->getPointerOperand();
1861 // FIXME: This is a heuristic that works around the fact that
1862 // LLVM IR debug intrinsics cannot yet distinguish between
1863 // memory and value locations: Because a dbg.declare(alloca) is
1864 // implicitly a memory location no DW_OP_deref operation for the
1865 // last direct load from an alloca is necessary. This condition
1866 // effectively drops the *last* DW_OP_deref in the expression.
1867 if (!SkipOutermostLoad
)
1868 Expr
= DIExpression::prepend(Expr
, DIExpression::DerefBefore
);
1869 } else if (auto *StInst
= dyn_cast
<StoreInst
>(Inst
)) {
1870 Storage
= StInst
->getValueOperand();
1872 SmallVector
<uint64_t, 16> Ops
;
1873 SmallVector
<Value
*, 0> AdditionalValues
;
1874 Value
*Op
= llvm::salvageDebugInfoImpl(
1875 *Inst
, Expr
? Expr
->getNumLocationOperands() : 0, Ops
,
1877 if (!Op
|| !AdditionalValues
.empty()) {
1878 // If salvaging failed or salvaging produced more than one location
1879 // operand, give up.
1883 Expr
= DIExpression::appendOpsToArg(Expr
, Ops
, 0, /*StackValue*/ false);
1885 SkipOutermostLoad
= false;
1888 return std::nullopt
;
1890 auto *StorageAsArg
= dyn_cast
<Argument
>(Storage
);
1891 const bool IsSwiftAsyncArg
=
1892 StorageAsArg
&& StorageAsArg
->hasAttribute(Attribute::SwiftAsync
);
1894 // Swift async arguments are described by an entry value of the ABI-defined
1895 // register containing the coroutine context.
1896 // Entry values in variadic expressions are not supported.
1897 if (IsSwiftAsyncArg
&& UseEntryValue
&& !Expr
->isEntryValue() &&
1898 Expr
->isSingleLocationExpression())
1899 Expr
= DIExpression::prepend(Expr
, DIExpression::EntryValue
);
1901 // If the coroutine frame is an Argument, store it in an alloca to improve
1902 // its availability (e.g. registers may be clobbered).
1903 // Avoid this if the value is guaranteed to be available through other means
1904 // (e.g. swift ABI guarantees).
1905 if (StorageAsArg
&& !IsSwiftAsyncArg
) {
1906 auto &Cached
= ArgToAllocaMap
[StorageAsArg
];
1908 Cached
= Builder
.CreateAlloca(Storage
->getType(), 0, nullptr,
1909 Storage
->getName() + ".debug");
1910 Builder
.CreateStore(Storage
, Cached
);
1913 // FIXME: LLVM lacks nuanced semantics to differentiate between
1914 // memory and direct locations at the IR level. The backend will
1915 // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
1916 // location. Thus, if there are deref and offset operations in the
1917 // expression, we need to add a DW_OP_deref at the *start* of the
1918 // expression to first load the contents of the alloca before
1919 // adjusting it with the expression.
1920 Expr
= DIExpression::prepend(Expr
, DIExpression::DerefBefore
);
1923 Expr
= Expr
->foldConstantMath();
1924 return {{*Storage
, *Expr
}};
1927 void coro::salvageDebugInfo(
1928 SmallDenseMap
<Argument
*, AllocaInst
*, 4> &ArgToAllocaMap
,
1929 DbgVariableIntrinsic
&DVI
, bool UseEntryValue
) {
1931 Function
*F
= DVI
.getFunction();
1932 // Follow the pointer arithmetic all the way to the incoming
1933 // function argument and convert into a DIExpression.
1934 bool SkipOutermostLoad
= !isa
<DbgValueInst
>(DVI
);
1935 Value
*OriginalStorage
= DVI
.getVariableLocationOp(0);
1938 ::salvageDebugInfoImpl(ArgToAllocaMap
, UseEntryValue
, F
, OriginalStorage
,
1939 DVI
.getExpression(), SkipOutermostLoad
);
1943 Value
*Storage
= &SalvagedInfo
->first
;
1944 DIExpression
*Expr
= &SalvagedInfo
->second
;
1946 DVI
.replaceVariableLocationOp(OriginalStorage
, Storage
);
1947 DVI
.setExpression(Expr
);
1948 // We only hoist dbg.declare today since it doesn't make sense to hoist
1949 // dbg.value since it does not have the same function wide guarantees that
1950 // dbg.declare does.
1951 if (isa
<DbgDeclareInst
>(DVI
)) {
1952 std::optional
<BasicBlock::iterator
> InsertPt
;
1953 if (auto *I
= dyn_cast
<Instruction
>(Storage
)) {
1954 InsertPt
= I
->getInsertionPointAfterDef();
1955 // Update DILocation only if variable was not inlined.
1956 DebugLoc ILoc
= I
->getDebugLoc();
1957 DebugLoc DVILoc
= DVI
.getDebugLoc();
1958 if (ILoc
&& DVILoc
&&
1959 DVILoc
->getScope()->getSubprogram() ==
1960 ILoc
->getScope()->getSubprogram())
1961 DVI
.setDebugLoc(I
->getDebugLoc());
1962 } else if (isa
<Argument
>(Storage
))
1963 InsertPt
= F
->getEntryBlock().begin();
1965 DVI
.moveBefore(*(*InsertPt
)->getParent(), *InsertPt
);
1969 void coro::salvageDebugInfo(
1970 SmallDenseMap
<Argument
*, AllocaInst
*, 4> &ArgToAllocaMap
,
1971 DbgVariableRecord
&DVR
, bool UseEntryValue
) {
1973 Function
*F
= DVR
.getFunction();
1974 // Follow the pointer arithmetic all the way to the incoming
1975 // function argument and convert into a DIExpression.
1976 bool SkipOutermostLoad
= DVR
.isDbgDeclare();
1977 Value
*OriginalStorage
= DVR
.getVariableLocationOp(0);
1980 ::salvageDebugInfoImpl(ArgToAllocaMap
, UseEntryValue
, F
, OriginalStorage
,
1981 DVR
.getExpression(), SkipOutermostLoad
);
1985 Value
*Storage
= &SalvagedInfo
->first
;
1986 DIExpression
*Expr
= &SalvagedInfo
->second
;
1988 DVR
.replaceVariableLocationOp(OriginalStorage
, Storage
);
1989 DVR
.setExpression(Expr
);
1990 // We only hoist dbg.declare today since it doesn't make sense to hoist
1991 // dbg.value since it does not have the same function wide guarantees that
1992 // dbg.declare does.
1993 if (DVR
.getType() == DbgVariableRecord::LocationType::Declare
) {
1994 std::optional
<BasicBlock::iterator
> InsertPt
;
1995 if (auto *I
= dyn_cast
<Instruction
>(Storage
)) {
1996 InsertPt
= I
->getInsertionPointAfterDef();
1997 // Update DILocation only if variable was not inlined.
1998 DebugLoc ILoc
= I
->getDebugLoc();
1999 DebugLoc DVRLoc
= DVR
.getDebugLoc();
2000 if (ILoc
&& DVRLoc
&&
2001 DVRLoc
->getScope()->getSubprogram() ==
2002 ILoc
->getScope()->getSubprogram())
2003 DVR
.setDebugLoc(ILoc
);
2004 } else if (isa
<Argument
>(Storage
))
2005 InsertPt
= F
->getEntryBlock().begin();
2007 DVR
.removeFromParent();
2008 (*InsertPt
)->getParent()->insertDbgRecordBefore(&DVR
, *InsertPt
);
2013 void coro::normalizeCoroutine(Function
&F
, coro::Shape
&Shape
,
2014 TargetTransformInfo
&TTI
) {
2015 // Don't eliminate swifterror in async functions that won't be split.
2016 if (Shape
.ABI
!= coro::ABI::Async
|| !Shape
.CoroSuspends
.empty())
2017 eliminateSwiftError(F
, Shape
);
2019 if (Shape
.ABI
== coro::ABI::Switch
&&
2020 Shape
.SwitchLowering
.PromiseAlloca
) {
2021 Shape
.getSwitchCoroId()->clearPromise();
2024 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
2025 // intrinsics are in their own blocks to simplify the logic of building up
2026 // SuspendCrossing data.
2027 for (auto *CSI
: Shape
.CoroSuspends
) {
2028 if (auto *Save
= CSI
->getCoroSave())
2029 splitAround(Save
, "CoroSave");
2030 splitAround(CSI
, "CoroSuspend");
2033 // Put CoroEnds into their own blocks.
2034 for (AnyCoroEndInst
*CE
: Shape
.CoroEnds
) {
2035 splitAround(CE
, "CoroEnd");
2037 // Emit the musttail call function in a new block before the CoroEnd.
2038 // We do this here so that the right suspend crossing info is computed for
2039 // the uses of the musttail call function call. (Arguments to the coro.end
2040 // instructions would be ignored)
2041 if (auto *AsyncEnd
= dyn_cast
<CoroAsyncEndInst
>(CE
)) {
2042 auto *MustTailCallFn
= AsyncEnd
->getMustTailCallFunction();
2043 if (!MustTailCallFn
)
2045 IRBuilder
<> Builder(AsyncEnd
);
2046 SmallVector
<Value
*, 8> Args(AsyncEnd
->args());
2047 auto Arguments
= ArrayRef
<Value
*>(Args
).drop_front(3);
2048 auto *Call
= coro::createMustTailCall(
2049 AsyncEnd
->getDebugLoc(), MustTailCallFn
, TTI
, Arguments
, Builder
);
2050 splitAround(Call
, "MustTailCall.Before.CoroEnd");
2054 // Later code makes structural assumptions about single predecessors phis e.g
2055 // that they are not live across a suspend point.
2056 cleanupSinglePredPHIs(F
);
2058 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2059 // never have its definition separated from the PHI by the suspend point.
2063 void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame
) {
2064 SuspendCrossingInfo
Checker(F
, Shape
.CoroSuspends
, Shape
.CoroEnds
);
2065 doRematerializations(F
, Checker
, IsMaterializable
);
2067 const DominatorTree
DT(F
);
2068 if (Shape
.ABI
!= coro::ABI::Async
&& Shape
.ABI
!= coro::ABI::Retcon
&&
2069 Shape
.ABI
!= coro::ABI::RetconOnce
)
2070 sinkLifetimeStartMarkers(F
, Shape
, Checker
, DT
);
2072 // All values (that are not allocas) that needs to be spilled to the frame.
2073 coro::SpillInfo Spills
;
2074 // All values defined as allocas that need to live in the frame.
2075 SmallVector
<coro::AllocaInfo
, 8> Allocas
;
2077 // Collect the spills for arguments and other not-materializable values.
2078 coro::collectSpillsFromArgs(Spills
, F
, Checker
);
2079 SmallVector
<Instruction
*, 4> DeadInstructions
;
2080 SmallVector
<CoroAllocaAllocInst
*, 4> LocalAllocas
;
2081 coro::collectSpillsAndAllocasFromInsts(Spills
, Allocas
, DeadInstructions
,
2082 LocalAllocas
, F
, Checker
, DT
, Shape
);
2083 coro::collectSpillsFromDbgInfo(Spills
, F
, Checker
);
2085 LLVM_DEBUG(dumpAllocas(Allocas
));
2086 LLVM_DEBUG(dumpSpills("Spills", Spills
));
2088 if (Shape
.ABI
== coro::ABI::Retcon
|| Shape
.ABI
== coro::ABI::RetconOnce
||
2089 Shape
.ABI
== coro::ABI::Async
)
2090 sinkSpillUsesAfterCoroBegin(DT
, Shape
.CoroBegin
, Spills
, Allocas
);
2093 FrameDataInfo
FrameData(Spills
, Allocas
);
2094 Shape
.FrameTy
= buildFrameType(F
, Shape
, FrameData
, OptimizeFrame
);
2095 Shape
.FramePtr
= Shape
.CoroBegin
;
2096 // For now, this works for C++ programs only.
2097 buildFrameDebugInfo(F
, Shape
, FrameData
);
2098 // Insert spills and reloads
2099 insertSpills(FrameData
, Shape
);
2100 lowerLocalAllocas(LocalAllocas
, DeadInstructions
);
2102 for (auto *I
: DeadInstructions
)
2103 I
->eraseFromParent();