[AMDGPU] Mark AGPR tuple implicit in the first instr of AGPR spills. (#115285)
[llvm-project.git] / llvm / lib / Transforms / Coroutines / CoroFrame.cpp
blobd3732fec603f6fde9c66b6e07ea3387f137022de
1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // its definition precedes and its uses follow a suspend block. This is
10 // referred to as a suspend crossing value.
12 // Using the information discovered we form a Coroutine Frame structure to
13 // contain those values. All uses of those values are replaced with appropriate
14 // GEP + load from the coroutine frame. At the point of the definition we spill
15 // the value into the coroutine frame.
16 //===----------------------------------------------------------------------===//
18 #include "CoroInternal.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Analysis/StackLifetime.h"
22 #include "llvm/IR/DIBuilder.h"
23 #include "llvm/IR/DebugInfo.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstIterator.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/OptimizedStructLayout.h"
31 #include "llvm/Transforms/Coroutines/ABI.h"
32 #include "llvm/Transforms/Coroutines/CoroInstr.h"
33 #include "llvm/Transforms/Coroutines/MaterializationUtils.h"
34 #include "llvm/Transforms/Coroutines/SpillUtils.h"
35 #include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
39 #include <algorithm>
40 #include <optional>
42 using namespace llvm;
44 extern cl::opt<bool> UseNewDbgInfoFormat;
46 #define DEBUG_TYPE "coro-frame"
48 namespace {
49 class FrameTypeBuilder;
50 // Mapping from the to-be-spilled value to all the users that need reload.
51 struct FrameDataInfo {
52 // All the values (that are not allocas) that needs to be spilled to the
53 // frame.
54 coro::SpillInfo &Spills;
55 // Allocas contains all values defined as allocas that need to live in the
56 // frame.
57 SmallVectorImpl<coro::AllocaInfo> &Allocas;
59 FrameDataInfo(coro::SpillInfo &Spills,
60 SmallVectorImpl<coro::AllocaInfo> &Allocas)
61 : Spills(Spills), Allocas(Allocas) {}
63 SmallVector<Value *, 8> getAllDefs() const {
64 SmallVector<Value *, 8> Defs;
65 for (const auto &P : Spills)
66 Defs.push_back(P.first);
67 for (const auto &A : Allocas)
68 Defs.push_back(A.Alloca);
69 return Defs;
72 uint32_t getFieldIndex(Value *V) const {
73 auto Itr = FieldIndexMap.find(V);
74 assert(Itr != FieldIndexMap.end() &&
75 "Value does not have a frame field index");
76 return Itr->second;
79 void setFieldIndex(Value *V, uint32_t Index) {
80 assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
81 "Cannot set the index for the same field twice.");
82 FieldIndexMap[V] = Index;
85 Align getAlign(Value *V) const {
86 auto Iter = FieldAlignMap.find(V);
87 assert(Iter != FieldAlignMap.end());
88 return Iter->second;
91 void setAlign(Value *V, Align AL) {
92 assert(FieldAlignMap.count(V) == 0);
93 FieldAlignMap.insert({V, AL});
96 uint64_t getDynamicAlign(Value *V) const {
97 auto Iter = FieldDynamicAlignMap.find(V);
98 assert(Iter != FieldDynamicAlignMap.end());
99 return Iter->second;
102 void setDynamicAlign(Value *V, uint64_t Align) {
103 assert(FieldDynamicAlignMap.count(V) == 0);
104 FieldDynamicAlignMap.insert({V, Align});
107 uint64_t getOffset(Value *V) const {
108 auto Iter = FieldOffsetMap.find(V);
109 assert(Iter != FieldOffsetMap.end());
110 return Iter->second;
113 void setOffset(Value *V, uint64_t Offset) {
114 assert(FieldOffsetMap.count(V) == 0);
115 FieldOffsetMap.insert({V, Offset});
118 // Remap the index of every field in the frame, using the final layout index.
119 void updateLayoutIndex(FrameTypeBuilder &B);
121 private:
122 // LayoutIndexUpdateStarted is used to avoid updating the index of any field
123 // twice by mistake.
124 bool LayoutIndexUpdateStarted = false;
125 // Map from values to their slot indexes on the frame. They will be first set
126 // with their original insertion field index. After the frame is built, their
127 // indexes will be updated into the final layout index.
128 DenseMap<Value *, uint32_t> FieldIndexMap;
129 // Map from values to their alignment on the frame. They would be set after
130 // the frame is built.
131 DenseMap<Value *, Align> FieldAlignMap;
132 DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
133 // Map from values to their offset on the frame. They would be set after
134 // the frame is built.
135 DenseMap<Value *, uint64_t> FieldOffsetMap;
137 } // namespace
139 #ifndef NDEBUG
140 static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) {
141 dbgs() << "------------- " << Title << " --------------\n";
142 for (const auto &E : Spills) {
143 E.first->dump();
144 dbgs() << " user: ";
145 for (auto *I : E.second)
146 I->dump();
150 static void dumpAllocas(const SmallVectorImpl<coro::AllocaInfo> &Allocas) {
151 dbgs() << "------------- Allocas --------------\n";
152 for (const auto &A : Allocas) {
153 A.Alloca->dump();
156 #endif
158 namespace {
159 using FieldIDType = size_t;
160 // We cannot rely solely on natural alignment of a type when building a
161 // coroutine frame and if the alignment specified on the Alloca instruction
162 // differs from the natural alignment of the alloca type we will need to insert
163 // padding.
164 class FrameTypeBuilder {
165 private:
166 struct Field {
167 uint64_t Size;
168 uint64_t Offset;
169 Type *Ty;
170 FieldIDType LayoutFieldIndex;
171 Align Alignment;
172 Align TyAlignment;
173 uint64_t DynamicAlignBuffer;
176 const DataLayout &DL;
177 LLVMContext &Context;
178 uint64_t StructSize = 0;
179 Align StructAlign;
180 bool IsFinished = false;
182 std::optional<Align> MaxFrameAlignment;
184 SmallVector<Field, 8> Fields;
185 DenseMap<Value*, unsigned> FieldIndexByKey;
187 public:
188 FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
189 std::optional<Align> MaxFrameAlignment)
190 : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
192 /// Add a field to this structure for the storage of an `alloca`
193 /// instruction.
194 [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
195 bool IsHeader = false) {
196 Type *Ty = AI->getAllocatedType();
198 // Make an array type if this is a static array allocation.
199 if (AI->isArrayAllocation()) {
200 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
201 Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
202 else
203 report_fatal_error("Coroutines cannot handle non static allocas yet");
206 return addField(Ty, AI->getAlign(), IsHeader);
209 /// We want to put the allocas whose lifetime-ranges are not overlapped
210 /// into one slot of coroutine frame.
211 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
213 /// cppcoro::task<void> alternative_paths(bool cond) {
214 /// if (cond) {
215 /// big_structure a;
216 /// process(a);
217 /// co_await something();
218 /// } else {
219 /// big_structure b;
220 /// process2(b);
221 /// co_await something();
222 /// }
223 /// }
225 /// We want to put variable a and variable b in the same slot to
226 /// reduce the size of coroutine frame.
228 /// This function use StackLifetime algorithm to partition the AllocaInsts in
229 /// Spills to non-overlapped sets in order to put Alloca in the same
230 /// non-overlapped set into the same slot in the Coroutine Frame. Then add
231 /// field for the allocas in the same non-overlapped set by using the largest
232 /// type as the field type.
234 /// Side Effects: Because We sort the allocas, the order of allocas in the
235 /// frame may be different with the order in the source code.
236 void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
237 coro::Shape &Shape, bool OptimizeFrame);
239 /// Add a field to this structure.
240 [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
241 bool IsHeader = false,
242 bool IsSpillOfValue = false) {
243 assert(!IsFinished && "adding fields to a finished builder");
244 assert(Ty && "must provide a type for a field");
246 // The field size is always the alloc size of the type.
247 uint64_t FieldSize = DL.getTypeAllocSize(Ty);
249 // For an alloca with size=0, we don't need to add a field and they
250 // can just point to any index in the frame. Use index 0.
251 if (FieldSize == 0) {
252 return 0;
255 // The field alignment might not be the type alignment, but we need
256 // to remember the type alignment anyway to build the type.
257 // If we are spilling values we don't need to worry about ABI alignment
258 // concerns.
259 Align ABIAlign = DL.getABITypeAlign(Ty);
260 Align TyAlignment = ABIAlign;
261 if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
262 TyAlignment = *MaxFrameAlignment;
263 Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment);
265 // The field alignment could be bigger than the max frame case, in that case
266 // we request additional storage to be able to dynamically align the
267 // pointer.
268 uint64_t DynamicAlignBuffer = 0;
269 if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
270 DynamicAlignBuffer =
271 offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment);
272 FieldAlignment = *MaxFrameAlignment;
273 FieldSize = FieldSize + DynamicAlignBuffer;
276 // Lay out header fields immediately.
277 uint64_t Offset;
278 if (IsHeader) {
279 Offset = alignTo(StructSize, FieldAlignment);
280 StructSize = Offset + FieldSize;
282 // Everything else has a flexible offset.
283 } else {
284 Offset = OptimizedStructLayoutField::FlexibleOffset;
287 Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment,
288 DynamicAlignBuffer});
289 return Fields.size() - 1;
292 /// Finish the layout and create the struct type with the given name.
293 StructType *finish(StringRef Name);
295 uint64_t getStructSize() const {
296 assert(IsFinished && "not yet finished!");
297 return StructSize;
300 Align getStructAlign() const {
301 assert(IsFinished && "not yet finished!");
302 return StructAlign;
305 FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
306 assert(IsFinished && "not yet finished!");
307 return Fields[Id].LayoutFieldIndex;
310 Field getLayoutField(FieldIDType Id) const {
311 assert(IsFinished && "not yet finished!");
312 return Fields[Id];
315 } // namespace
317 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
318 auto Updater = [&](Value *I) {
319 auto Field = B.getLayoutField(getFieldIndex(I));
320 setFieldIndex(I, Field.LayoutFieldIndex);
321 setAlign(I, Field.Alignment);
322 uint64_t dynamicAlign =
323 Field.DynamicAlignBuffer
324 ? Field.DynamicAlignBuffer + Field.Alignment.value()
325 : 0;
326 setDynamicAlign(I, dynamicAlign);
327 setOffset(I, Field.Offset);
329 LayoutIndexUpdateStarted = true;
330 for (auto &S : Spills)
331 Updater(S.first);
332 for (const auto &A : Allocas)
333 Updater(A.Alloca);
334 LayoutIndexUpdateStarted = false;
337 void FrameTypeBuilder::addFieldForAllocas(const Function &F,
338 FrameDataInfo &FrameData,
339 coro::Shape &Shape,
340 bool OptimizeFrame) {
341 using AllocaSetType = SmallVector<AllocaInst *, 4>;
342 SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
344 // We need to add field for allocas at the end of this function.
345 auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
346 for (auto AllocaList : NonOverlapedAllocas) {
347 auto *LargestAI = *AllocaList.begin();
348 FieldIDType Id = addFieldForAlloca(LargestAI);
349 for (auto *Alloca : AllocaList)
350 FrameData.setFieldIndex(Alloca, Id);
354 if (!OptimizeFrame) {
355 for (const auto &A : FrameData.Allocas) {
356 AllocaInst *Alloca = A.Alloca;
357 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
359 return;
362 // Because there are paths from the lifetime.start to coro.end
363 // for each alloca, the liferanges for every alloca is overlaped
364 // in the blocks who contain coro.end and the successor blocks.
365 // So we choose to skip there blocks when we calculate the liferange
366 // for each alloca. It should be reasonable since there shouldn't be uses
367 // in these blocks and the coroutine frame shouldn't be used outside the
368 // coroutine body.
370 // Note that the user of coro.suspend may not be SwitchInst. However, this
371 // case seems too complex to handle. And it is harmless to skip these
372 // patterns since it just prevend putting the allocas to live in the same
373 // slot.
374 DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
375 for (auto *CoroSuspendInst : Shape.CoroSuspends) {
376 for (auto *U : CoroSuspendInst->users()) {
377 if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
378 auto *SWI = const_cast<SwitchInst *>(ConstSWI);
379 DefaultSuspendDest[SWI] = SWI->getDefaultDest();
380 SWI->setDefaultDest(SWI->getSuccessor(1));
385 auto ExtractAllocas = [&]() {
386 AllocaSetType Allocas;
387 Allocas.reserve(FrameData.Allocas.size());
388 for (const auto &A : FrameData.Allocas)
389 Allocas.push_back(A.Alloca);
390 return Allocas;
392 StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
393 StackLifetime::LivenessType::May);
394 StackLifetimeAnalyzer.run();
395 auto DoAllocasInterfere = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
396 return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
397 StackLifetimeAnalyzer.getLiveRange(AI2));
399 auto GetAllocaSize = [&](const coro::AllocaInfo &A) {
400 std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
401 assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
402 assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
403 return RetSize->getFixedValue();
405 // Put larger allocas in the front. So the larger allocas have higher
406 // priority to merge, which can save more space potentially. Also each
407 // AllocaSet would be ordered. So we can get the largest Alloca in one
408 // AllocaSet easily.
409 sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
410 return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
412 for (const auto &A : FrameData.Allocas) {
413 AllocaInst *Alloca = A.Alloca;
414 bool Merged = false;
415 // Try to find if the Alloca does not interfere with any existing
416 // NonOverlappedAllocaSet. If it is true, insert the alloca to that
417 // NonOverlappedAllocaSet.
418 for (auto &AllocaSet : NonOverlapedAllocas) {
419 assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
420 bool NoInterference = none_of(AllocaSet, [&](auto Iter) {
421 return DoAllocasInterfere(Alloca, Iter);
423 // If the alignment of A is multiple of the alignment of B, the address
424 // of A should satisfy the requirement for aligning for B.
426 // There may be other more fine-grained strategies to handle the alignment
427 // infomation during the merging process. But it seems hard to handle
428 // these strategies and benefit little.
429 bool Alignable = [&]() -> bool {
430 auto *LargestAlloca = *AllocaSet.begin();
431 return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
433 }();
434 bool CouldMerge = NoInterference && Alignable;
435 if (!CouldMerge)
436 continue;
437 AllocaSet.push_back(Alloca);
438 Merged = true;
439 break;
441 if (!Merged) {
442 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
445 // Recover the default target destination for each Switch statement
446 // reserved.
447 for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
448 SwitchInst *SWI = SwitchAndDefaultDest.first;
449 BasicBlock *DestBB = SwitchAndDefaultDest.second;
450 SWI->setDefaultDest(DestBB);
452 // This Debug Info could tell us which allocas are merged into one slot.
453 LLVM_DEBUG(for (auto &AllocaSet
454 : NonOverlapedAllocas) {
455 if (AllocaSet.size() > 1) {
456 dbgs() << "In Function:" << F.getName() << "\n";
457 dbgs() << "Find Union Set "
458 << "\n";
459 dbgs() << "\tAllocas are \n";
460 for (auto Alloca : AllocaSet)
461 dbgs() << "\t\t" << *Alloca << "\n";
466 StructType *FrameTypeBuilder::finish(StringRef Name) {
467 assert(!IsFinished && "already finished!");
469 // Prepare the optimal-layout field array.
470 // The Id in the layout field is a pointer to our Field for it.
471 SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
472 LayoutFields.reserve(Fields.size());
473 for (auto &Field : Fields) {
474 LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
475 Field.Offset);
478 // Perform layout.
479 auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
480 StructSize = SizeAndAlign.first;
481 StructAlign = SizeAndAlign.second;
483 auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
484 return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
487 // We need to produce a packed struct type if there's a field whose
488 // assigned offset isn't a multiple of its natural type alignment.
489 bool Packed = [&] {
490 for (auto &LayoutField : LayoutFields) {
491 auto &F = getField(LayoutField);
492 if (!isAligned(F.TyAlignment, LayoutField.Offset))
493 return true;
495 return false;
496 }();
498 // Build the struct body.
499 SmallVector<Type*, 16> FieldTypes;
500 FieldTypes.reserve(LayoutFields.size() * 3 / 2);
501 uint64_t LastOffset = 0;
502 for (auto &LayoutField : LayoutFields) {
503 auto &F = getField(LayoutField);
505 auto Offset = LayoutField.Offset;
507 // Add a padding field if there's a padding gap and we're either
508 // building a packed struct or the padding gap is more than we'd
509 // get from aligning to the field type's natural alignment.
510 assert(Offset >= LastOffset);
511 if (Offset != LastOffset) {
512 if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
513 FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
514 Offset - LastOffset));
517 F.Offset = Offset;
518 F.LayoutFieldIndex = FieldTypes.size();
520 FieldTypes.push_back(F.Ty);
521 if (F.DynamicAlignBuffer) {
522 FieldTypes.push_back(
523 ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
525 LastOffset = Offset + F.Size;
528 StructType *Ty = StructType::create(Context, FieldTypes, Name, Packed);
530 #ifndef NDEBUG
531 // Check that the IR layout matches the offsets we expect.
532 auto Layout = DL.getStructLayout(Ty);
533 for (auto &F : Fields) {
534 assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
535 assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
537 #endif
539 IsFinished = true;
541 return Ty;
544 static void cacheDIVar(FrameDataInfo &FrameData,
545 DenseMap<Value *, DILocalVariable *> &DIVarCache) {
546 for (auto *V : FrameData.getAllDefs()) {
547 if (DIVarCache.contains(V))
548 continue;
550 auto CacheIt = [&DIVarCache, V](const auto &Container) {
551 auto *I = llvm::find_if(Container, [](auto *DDI) {
552 return DDI->getExpression()->getNumElements() == 0;
554 if (I != Container.end())
555 DIVarCache.insert({V, (*I)->getVariable()});
557 CacheIt(findDbgDeclares(V));
558 CacheIt(findDVRDeclares(V));
562 /// Create name for Type. It uses MDString to store new created string to
563 /// avoid memory leak.
564 static StringRef solveTypeName(Type *Ty) {
565 if (Ty->isIntegerTy()) {
566 // The longest name in common may be '__int_128', which has 9 bits.
567 SmallString<16> Buffer;
568 raw_svector_ostream OS(Buffer);
569 OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
570 auto *MDName = MDString::get(Ty->getContext(), OS.str());
571 return MDName->getString();
574 if (Ty->isFloatingPointTy()) {
575 if (Ty->isFloatTy())
576 return "__float_";
577 if (Ty->isDoubleTy())
578 return "__double_";
579 return "__floating_type_";
582 if (Ty->isPointerTy())
583 return "PointerType";
585 if (Ty->isStructTy()) {
586 if (!cast<StructType>(Ty)->hasName())
587 return "__LiteralStructType_";
589 auto Name = Ty->getStructName();
591 SmallString<16> Buffer(Name);
592 for (auto &Iter : Buffer)
593 if (Iter == '.' || Iter == ':')
594 Iter = '_';
595 auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
596 return MDName->getString();
599 return "UnknownType";
602 static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
603 const DataLayout &Layout, DIScope *Scope,
604 unsigned LineNum,
605 DenseMap<Type *, DIType *> &DITypeCache) {
606 if (DIType *DT = DITypeCache.lookup(Ty))
607 return DT;
609 StringRef Name = solveTypeName(Ty);
611 DIType *RetType = nullptr;
613 if (Ty->isIntegerTy()) {
614 auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
615 RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
616 llvm::DINode::FlagArtificial);
617 } else if (Ty->isFloatingPointTy()) {
618 RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
619 dwarf::DW_ATE_float,
620 llvm::DINode::FlagArtificial);
621 } else if (Ty->isPointerTy()) {
622 // Construct PointerType points to null (aka void *) instead of exploring
623 // pointee type to avoid infinite search problem. For example, we would be
624 // in trouble if we traverse recursively:
626 // struct Node {
627 // Node* ptr;
628 // };
629 RetType =
630 Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
631 Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
632 /*DWARFAddressSpace=*/std::nullopt, Name);
633 } else if (Ty->isStructTy()) {
634 auto *DIStruct = Builder.createStructType(
635 Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
636 Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
637 llvm::DINode::FlagArtificial, nullptr, llvm::DINodeArray());
639 auto *StructTy = cast<StructType>(Ty);
640 SmallVector<Metadata *, 16> Elements;
641 for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
642 DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
643 Scope, LineNum, DITypeCache);
644 assert(DITy);
645 Elements.push_back(Builder.createMemberType(
646 Scope, DITy->getName(), Scope->getFile(), LineNum,
647 DITy->getSizeInBits(), DITy->getAlignInBits(),
648 Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
649 llvm::DINode::FlagArtificial, DITy));
652 Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
654 RetType = DIStruct;
655 } else {
656 LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
657 TypeSize Size = Layout.getTypeSizeInBits(Ty);
658 auto *CharSizeType = Builder.createBasicType(
659 Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
661 if (Size <= 8)
662 RetType = CharSizeType;
663 else {
664 if (Size % 8 != 0)
665 Size = TypeSize::getFixed(Size + 8 - (Size % 8));
667 RetType = Builder.createArrayType(
668 Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
669 Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
673 DITypeCache.insert({Ty, RetType});
674 return RetType;
677 /// Build artificial debug info for C++ coroutine frames to allow users to
678 /// inspect the contents of the frame directly
680 /// Create Debug information for coroutine frame with debug name "__coro_frame".
681 /// The debug information for the fields of coroutine frame is constructed from
682 /// the following way:
683 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
684 /// the corresponding debug variables for the value. If we can find the
685 /// debug variable, we can get full and accurate debug information.
686 /// 2. If we can't get debug information in step 1 and 2, we could only try to
687 /// build the DIType by Type. We did this in solveDIType. We only handle
688 /// integer, float, double, integer type and struct type for now.
689 static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
690 FrameDataInfo &FrameData) {
691 DISubprogram *DIS = F.getSubprogram();
692 // If there is no DISubprogram for F, it implies the Function are not compiled
693 // with debug info. So we also don't need to generate debug info for the frame
694 // neither.
695 if (!DIS || !DIS->getUnit() ||
696 !dwarf::isCPlusPlus(
697 (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()) ||
698 DIS->getUnit()->getEmissionKind() != DICompileUnit::DebugEmissionKind::FullDebug)
699 return;
701 assert(Shape.ABI == coro::ABI::Switch &&
702 "We could only build debug infomation for C++ coroutine now.\n");
704 DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
706 assert(Shape.getPromiseAlloca() &&
707 "Coroutine with switch ABI should own Promise alloca");
709 DIFile *DFile = DIS->getFile();
710 unsigned LineNum = DIS->getLine();
712 DICompositeType *FrameDITy = DBuilder.createStructType(
713 DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
714 DFile, LineNum, Shape.FrameSize * 8,
715 Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
716 llvm::DINodeArray());
717 StructType *FrameTy = Shape.FrameTy;
718 SmallVector<Metadata *, 16> Elements;
719 DataLayout Layout = F.getDataLayout();
721 DenseMap<Value *, DILocalVariable *> DIVarCache;
722 cacheDIVar(FrameData, DIVarCache);
724 unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
725 unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
726 unsigned IndexIndex = Shape.SwitchLowering.IndexField;
728 DenseMap<unsigned, StringRef> NameCache;
729 NameCache.insert({ResumeIndex, "__resume_fn"});
730 NameCache.insert({DestroyIndex, "__destroy_fn"});
731 NameCache.insert({IndexIndex, "__coro_index"});
733 Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
734 *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
735 *IndexTy = FrameTy->getElementType(IndexIndex);
737 DenseMap<unsigned, DIType *> TyCache;
738 TyCache.insert(
739 {ResumeIndex, DBuilder.createPointerType(
740 nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
741 TyCache.insert(
742 {DestroyIndex, DBuilder.createPointerType(
743 nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
745 /// FIXME: If we fill the field `SizeInBits` with the actual size of
746 /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
747 TyCache.insert({IndexIndex, DBuilder.createBasicType(
748 "__coro_index",
749 (Layout.getTypeSizeInBits(IndexTy) < 8)
751 : Layout.getTypeSizeInBits(IndexTy),
752 dwarf::DW_ATE_unsigned_char)});
754 for (auto *V : FrameData.getAllDefs()) {
755 if (!DIVarCache.contains(V))
756 continue;
758 auto Index = FrameData.getFieldIndex(V);
760 NameCache.insert({Index, DIVarCache[V]->getName()});
761 TyCache.insert({Index, DIVarCache[V]->getType()});
764 // Cache from index to (Align, Offset Pair)
765 DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
766 // The Align and Offset of Resume function and Destroy function are fixed.
767 OffsetCache.insert({ResumeIndex, {8, 0}});
768 OffsetCache.insert({DestroyIndex, {8, 8}});
769 OffsetCache.insert(
770 {IndexIndex,
771 {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
773 for (auto *V : FrameData.getAllDefs()) {
774 auto Index = FrameData.getFieldIndex(V);
776 OffsetCache.insert(
777 {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
780 DenseMap<Type *, DIType *> DITypeCache;
781 // This counter is used to avoid same type names. e.g., there would be
782 // many i32 and i64 types in one coroutine. And we would use i32_0 and
783 // i32_1 to avoid the same type. Since it makes no sense the name of the
784 // fields confilicts with each other.
785 unsigned UnknownTypeNum = 0;
786 for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
787 if (!OffsetCache.contains(Index))
788 continue;
790 std::string Name;
791 uint64_t SizeInBits;
792 uint32_t AlignInBits;
793 uint64_t OffsetInBits;
794 DIType *DITy = nullptr;
796 Type *Ty = FrameTy->getElementType(Index);
797 assert(Ty->isSized() && "We can't handle type which is not sized.\n");
798 SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
799 AlignInBits = OffsetCache[Index].first * 8;
800 OffsetInBits = OffsetCache[Index].second * 8;
802 if (auto It = NameCache.find(Index); It != NameCache.end()) {
803 Name = It->second.str();
804 DITy = TyCache[Index];
805 } else {
806 DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
807 assert(DITy && "SolveDIType shouldn't return nullptr.\n");
808 Name = DITy->getName().str();
809 Name += "_" + std::to_string(UnknownTypeNum);
810 UnknownTypeNum++;
813 Elements.push_back(DBuilder.createMemberType(
814 FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
815 llvm::DINode::FlagArtificial, DITy));
818 DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
820 auto *FrameDIVar =
821 DBuilder.createAutoVariable(DIS, "__coro_frame", DFile, LineNum,
822 FrameDITy, true, DINode::FlagArtificial);
824 // Subprogram would have ContainedNodes field which records the debug
825 // variables it contained. So we need to add __coro_frame to the
826 // ContainedNodes of it.
828 // If we don't add __coro_frame to the RetainedNodes, user may get
829 // `no symbol __coro_frame in context` rather than `__coro_frame`
830 // is optimized out, which is more precise.
831 auto RetainedNodes = DIS->getRetainedNodes();
832 SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
833 RetainedNodes.end());
834 RetainedNodesVec.push_back(FrameDIVar);
835 DIS->replaceOperandWith(7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
837 // Construct the location for the frame debug variable. The column number
838 // is fake but it should be fine.
839 DILocation *DILoc =
840 DILocation::get(DIS->getContext(), LineNum, /*Column=*/1, DIS);
841 assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
843 if (UseNewDbgInfoFormat) {
844 DbgVariableRecord *NewDVR =
845 new DbgVariableRecord(ValueAsMetadata::get(Shape.FramePtr), FrameDIVar,
846 DBuilder.createExpression(), DILoc,
847 DbgVariableRecord::LocationType::Declare);
848 BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
849 It->getParent()->insertDbgRecordBefore(NewDVR, It);
850 } else {
851 DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
852 DBuilder.createExpression(), DILoc,
853 &*Shape.getInsertPtAfterFramePtr());
857 // Build a struct that will keep state for an active coroutine.
858 // struct f.frame {
859 // ResumeFnTy ResumeFnAddr;
860 // ResumeFnTy DestroyFnAddr;
861 // ... promise (if present) ...
862 // int ResumeIndex;
863 // ... spills ...
864 // };
865 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
866 FrameDataInfo &FrameData,
867 bool OptimizeFrame) {
868 LLVMContext &C = F.getContext();
869 const DataLayout &DL = F.getDataLayout();
871 // We will use this value to cap the alignment of spilled values.
872 std::optional<Align> MaxFrameAlignment;
873 if (Shape.ABI == coro::ABI::Async)
874 MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
875 FrameTypeBuilder B(C, DL, MaxFrameAlignment);
877 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
878 std::optional<FieldIDType> SwitchIndexFieldId;
880 if (Shape.ABI == coro::ABI::Switch) {
881 auto *FnPtrTy = PointerType::getUnqual(C);
883 // Add header fields for the resume and destroy functions.
884 // We can rely on these being perfectly packed.
885 (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
886 (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
888 // PromiseAlloca field needs to be explicitly added here because it's
889 // a header field with a fixed offset based on its alignment. Hence it
890 // needs special handling and cannot be added to FrameData.Allocas.
891 if (PromiseAlloca)
892 FrameData.setFieldIndex(
893 PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
895 // Add a field to store the suspend index. This doesn't need to
896 // be in the header.
897 unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
898 Type *IndexType = Type::getIntNTy(C, IndexBits);
900 SwitchIndexFieldId = B.addField(IndexType, std::nullopt);
901 } else {
902 assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
905 // Because multiple allocas may own the same field slot,
906 // we add allocas to field here.
907 B.addFieldForAllocas(F, FrameData, Shape, OptimizeFrame);
908 // Add PromiseAlloca to Allocas list so that
909 // 1. updateLayoutIndex could update its index after
910 // `performOptimizedStructLayout`
911 // 2. it is processed in insertSpills.
912 if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
913 // We assume that the promise alloca won't be modified before
914 // CoroBegin and no alias will be create before CoroBegin.
915 FrameData.Allocas.emplace_back(
916 PromiseAlloca, DenseMap<Instruction *, std::optional<APInt>>{}, false);
917 // Create an entry for every spilled value.
918 for (auto &S : FrameData.Spills) {
919 Type *FieldType = S.first->getType();
920 // For byval arguments, we need to store the pointed value in the frame,
921 // instead of the pointer itself.
922 if (const Argument *A = dyn_cast<Argument>(S.first))
923 if (A->hasByValAttr())
924 FieldType = A->getParamByValType();
925 FieldIDType Id = B.addField(FieldType, std::nullopt, false /*header*/,
926 true /*IsSpillOfValue*/);
927 FrameData.setFieldIndex(S.first, Id);
930 StructType *FrameTy = [&] {
931 SmallString<32> Name(F.getName());
932 Name.append(".Frame");
933 return B.finish(Name);
934 }();
936 FrameData.updateLayoutIndex(B);
937 Shape.FrameAlign = B.getStructAlign();
938 Shape.FrameSize = B.getStructSize();
940 switch (Shape.ABI) {
941 case coro::ABI::Switch: {
942 // In the switch ABI, remember the switch-index field.
943 auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
944 Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
945 Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
946 Shape.SwitchLowering.IndexOffset = IndexField.Offset;
948 // Also round the frame size up to a multiple of its alignment, as is
949 // generally expected in C/C++.
950 Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
951 break;
954 // In the retcon ABI, remember whether the frame is inline in the storage.
955 case coro::ABI::Retcon:
956 case coro::ABI::RetconOnce: {
957 auto Id = Shape.getRetconCoroId();
958 Shape.RetconLowering.IsFrameInlineInStorage
959 = (B.getStructSize() <= Id->getStorageSize() &&
960 B.getStructAlign() <= Id->getStorageAlignment());
961 break;
963 case coro::ABI::Async: {
964 Shape.AsyncLowering.FrameOffset =
965 alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
966 // Also make the final context size a multiple of the context alignment to
967 // make allocation easier for allocators.
968 Shape.AsyncLowering.ContextSize =
969 alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
970 Shape.AsyncLowering.getContextAlignment());
971 if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
972 report_fatal_error(
973 "The alignment requirment of frame variables cannot be higher than "
974 "the alignment of the async function context");
976 break;
980 return FrameTy;
983 // Replace all alloca and SSA values that are accessed across suspend points
984 // with GetElementPointer from coroutine frame + loads and stores. Create an
985 // AllocaSpillBB that will become the new entry block for the resume parts of
986 // the coroutine:
988 // %hdl = coro.begin(...)
989 // whatever
991 // becomes:
993 // %hdl = coro.begin(...)
994 // br label %AllocaSpillBB
996 // AllocaSpillBB:
997 // ; geps corresponding to allocas that were moved to coroutine frame
998 // br label PostSpill
1000 // PostSpill:
1001 // whatever
1004 static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1005 LLVMContext &C = Shape.CoroBegin->getContext();
1006 Function *F = Shape.CoroBegin->getFunction();
1007 IRBuilder<> Builder(C);
1008 StructType *FrameTy = Shape.FrameTy;
1009 Value *FramePtr = Shape.FramePtr;
1010 DominatorTree DT(*F);
1011 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1013 // Create a GEP with the given index into the coroutine frame for the original
1014 // value Orig. Appends an extra 0 index for array-allocas, preserving the
1015 // original type.
1016 auto GetFramePointer = [&](Value *Orig) -> Value * {
1017 FieldIDType Index = FrameData.getFieldIndex(Orig);
1018 SmallVector<Value *, 3> Indices = {
1019 ConstantInt::get(Type::getInt32Ty(C), 0),
1020 ConstantInt::get(Type::getInt32Ty(C), Index),
1023 if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1024 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1025 auto Count = CI->getValue().getZExtValue();
1026 if (Count > 1) {
1027 Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
1029 } else {
1030 report_fatal_error("Coroutines cannot handle non static allocas yet");
1034 auto GEP = cast<GetElementPtrInst>(
1035 Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1036 if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1037 if (FrameData.getDynamicAlign(Orig) != 0) {
1038 assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1039 auto *M = AI->getModule();
1040 auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1041 auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1042 auto *AlignMask =
1043 ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1);
1044 PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1045 PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1046 return Builder.CreateIntToPtr(PtrValue, AI->getType());
1048 // If the type of GEP is not equal to the type of AllocaInst, it implies
1049 // that the AllocaInst may be reused in the Frame slot of other
1050 // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1051 // the Frame storage.
1053 // Note: If we change the strategy dealing with alignment, we need to refine
1054 // this casting.
1055 if (GEP->getType() != Orig->getType())
1056 return Builder.CreateAddrSpaceCast(GEP, Orig->getType(),
1057 Orig->getName() + Twine(".cast"));
1059 return GEP;
1062 for (auto const &E : FrameData.Spills) {
1063 Value *Def = E.first;
1064 auto SpillAlignment = Align(FrameData.getAlign(Def));
1065 // Create a store instruction storing the value into the
1066 // coroutine frame.
1067 BasicBlock::iterator InsertPt = coro::getSpillInsertionPt(Shape, Def, DT);
1069 Type *ByValTy = nullptr;
1070 if (auto *Arg = dyn_cast<Argument>(Def)) {
1071 // If we're spilling an Argument, make sure we clear 'nocapture'
1072 // from the coroutine function.
1073 Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1075 if (Arg->hasByValAttr())
1076 ByValTy = Arg->getParamByValType();
1079 auto Index = FrameData.getFieldIndex(Def);
1080 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1081 auto *G = Builder.CreateConstInBoundsGEP2_32(
1082 FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1083 if (ByValTy) {
1084 // For byval arguments, we need to store the pointed value in the frame,
1085 // instead of the pointer itself.
1086 auto *Value = Builder.CreateLoad(ByValTy, Def);
1087 Builder.CreateAlignedStore(Value, G, SpillAlignment);
1088 } else {
1089 Builder.CreateAlignedStore(Def, G, SpillAlignment);
1092 BasicBlock *CurrentBlock = nullptr;
1093 Value *CurrentReload = nullptr;
1094 for (auto *U : E.second) {
1095 // If we have not seen the use block, create a load instruction to reload
1096 // the spilled value from the coroutine frame. Populates the Value pointer
1097 // reference provided with the frame GEP.
1098 if (CurrentBlock != U->getParent()) {
1099 CurrentBlock = U->getParent();
1100 Builder.SetInsertPoint(CurrentBlock,
1101 CurrentBlock->getFirstInsertionPt());
1103 auto *GEP = GetFramePointer(E.first);
1104 GEP->setName(E.first->getName() + Twine(".reload.addr"));
1105 if (ByValTy)
1106 CurrentReload = GEP;
1107 else
1108 CurrentReload = Builder.CreateAlignedLoad(
1109 FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1110 SpillAlignment, E.first->getName() + Twine(".reload"));
1112 TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(Def);
1113 TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(Def);
1114 // Try best to find dbg.declare. If the spill is a temp, there may not
1115 // be a direct dbg.declare. Walk up the load chain to find one from an
1116 // alias.
1117 if (F->getSubprogram()) {
1118 auto *CurDef = Def;
1119 while (DIs.empty() && DVRs.empty() && isa<LoadInst>(CurDef)) {
1120 auto *LdInst = cast<LoadInst>(CurDef);
1121 // Only consider ptr to ptr same type load.
1122 if (LdInst->getPointerOperandType() != LdInst->getType())
1123 break;
1124 CurDef = LdInst->getPointerOperand();
1125 if (!isa<AllocaInst, LoadInst>(CurDef))
1126 break;
1127 DIs = findDbgDeclares(CurDef);
1128 DVRs = findDVRDeclares(CurDef);
1132 auto SalvageOne = [&](auto *DDI) {
1133 bool AllowUnresolved = false;
1134 // This dbg.declare is preserved for all coro-split function
1135 // fragments. It will be unreachable in the main function, and
1136 // processed by coro::salvageDebugInfo() by the Cloner.
1137 if (UseNewDbgInfoFormat) {
1138 DbgVariableRecord *NewDVR = new DbgVariableRecord(
1139 ValueAsMetadata::get(CurrentReload), DDI->getVariable(),
1140 DDI->getExpression(), DDI->getDebugLoc(),
1141 DbgVariableRecord::LocationType::Declare);
1142 Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1143 NewDVR, Builder.GetInsertPoint());
1144 } else {
1145 DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1146 .insertDeclare(CurrentReload, DDI->getVariable(),
1147 DDI->getExpression(), DDI->getDebugLoc(),
1148 &*Builder.GetInsertPoint());
1150 // This dbg.declare is for the main function entry point. It
1151 // will be deleted in all coro-split functions.
1152 coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/);
1154 for_each(DIs, SalvageOne);
1155 for_each(DVRs, SalvageOne);
1158 // If we have a single edge PHINode, remove it and replace it with a
1159 // reload from the coroutine frame. (We already took care of multi edge
1160 // PHINodes by normalizing them in the rewritePHIs function).
1161 if (auto *PN = dyn_cast<PHINode>(U)) {
1162 assert(PN->getNumIncomingValues() == 1 &&
1163 "unexpected number of incoming "
1164 "values in the PHINode");
1165 PN->replaceAllUsesWith(CurrentReload);
1166 PN->eraseFromParent();
1167 continue;
1170 // Replace all uses of CurrentValue in the current instruction with
1171 // reload.
1172 U->replaceUsesOfWith(Def, CurrentReload);
1173 // Instructions are added to Def's user list if the attached
1174 // debug records use Def. Update those now.
1175 for (DbgVariableRecord &DVR : filterDbgVars(U->getDbgRecordRange()))
1176 DVR.replaceVariableLocationOp(Def, CurrentReload, true);
1180 BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1182 auto SpillBlock = FramePtrBB->splitBasicBlock(
1183 Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1184 SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1185 Shape.AllocaSpillBlock = SpillBlock;
1187 // retcon and retcon.once lowering assumes all uses have been sunk.
1188 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1189 Shape.ABI == coro::ABI::Async) {
1190 // If we found any allocas, replace all of their remaining uses with Geps.
1191 Builder.SetInsertPoint(SpillBlock, SpillBlock->begin());
1192 for (const auto &P : FrameData.Allocas) {
1193 AllocaInst *Alloca = P.Alloca;
1194 auto *G = GetFramePointer(Alloca);
1196 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1197 // here, as we are changing location of the instruction.
1198 G->takeName(Alloca);
1199 Alloca->replaceAllUsesWith(G);
1200 Alloca->eraseFromParent();
1202 return;
1205 // If we found any alloca, replace all of their remaining uses with GEP
1206 // instructions. To remain debugbility, we replace the uses of allocas for
1207 // dbg.declares and dbg.values with the reload from the frame.
1208 // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1209 // as some of the uses may not be dominated by CoroBegin.
1210 Builder.SetInsertPoint(Shape.AllocaSpillBlock,
1211 Shape.AllocaSpillBlock->begin());
1212 SmallVector<Instruction *, 4> UsersToUpdate;
1213 for (const auto &A : FrameData.Allocas) {
1214 AllocaInst *Alloca = A.Alloca;
1215 UsersToUpdate.clear();
1216 for (User *U : Alloca->users()) {
1217 auto *I = cast<Instruction>(U);
1218 if (DT.dominates(Shape.CoroBegin, I))
1219 UsersToUpdate.push_back(I);
1221 if (UsersToUpdate.empty())
1222 continue;
1223 auto *G = GetFramePointer(Alloca);
1224 G->setName(Alloca->getName() + Twine(".reload.addr"));
1226 SmallVector<DbgVariableIntrinsic *, 4> DIs;
1227 SmallVector<DbgVariableRecord *> DbgVariableRecords;
1228 findDbgUsers(DIs, Alloca, &DbgVariableRecords);
1229 for (auto *DVI : DIs)
1230 DVI->replaceUsesOfWith(Alloca, G);
1231 for (auto *DVR : DbgVariableRecords)
1232 DVR->replaceVariableLocationOp(Alloca, G);
1234 for (Instruction *I : UsersToUpdate) {
1235 // It is meaningless to retain the lifetime intrinsics refer for the
1236 // member of coroutine frames and the meaningless lifetime intrinsics
1237 // are possible to block further optimizations.
1238 if (I->isLifetimeStartOrEnd()) {
1239 I->eraseFromParent();
1240 continue;
1243 I->replaceUsesOfWith(Alloca, G);
1246 Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1247 for (const auto &A : FrameData.Allocas) {
1248 AllocaInst *Alloca = A.Alloca;
1249 if (A.MayWriteBeforeCoroBegin) {
1250 // isEscaped really means potentially modified before CoroBegin.
1251 if (Alloca->isArrayAllocation())
1252 report_fatal_error(
1253 "Coroutines cannot handle copying of array allocas yet");
1255 auto *G = GetFramePointer(Alloca);
1256 auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
1257 Builder.CreateStore(Value, G);
1259 // For each alias to Alloca created before CoroBegin but used after
1260 // CoroBegin, we recreate them after CoroBegin by applying the offset
1261 // to the pointer in the frame.
1262 for (const auto &Alias : A.Aliases) {
1263 auto *FramePtr = GetFramePointer(Alloca);
1264 auto &Value = *Alias.second;
1265 auto ITy = IntegerType::get(C, Value.getBitWidth());
1266 auto *AliasPtr =
1267 Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value));
1268 Alias.first->replaceUsesWithIf(
1269 AliasPtr, [&](Use &U) { return DT.dominates(Shape.CoroBegin, U); });
1273 // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
1274 // the case that the PromiseAlloca may have writes before CoroBegin in the
1275 // above codes. And it may be problematic in edge cases. See
1276 // https://github.com/llvm/llvm-project/issues/57861 for an example.
1277 if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
1278 AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
1279 // If there is memory accessing to promise alloca before CoroBegin;
1280 bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
1281 auto *Inst = dyn_cast<Instruction>(U.getUser());
1282 if (!Inst || DT.dominates(Shape.CoroBegin, Inst))
1283 return false;
1285 if (auto *CI = dyn_cast<CallInst>(Inst)) {
1286 // It is fine if the call wouldn't write to the Promise.
1287 // This is possible for @llvm.coro.id intrinsics, which
1288 // would take the promise as the second argument as a
1289 // marker.
1290 if (CI->onlyReadsMemory() ||
1291 CI->onlyReadsMemory(CI->getArgOperandNo(&U)))
1292 return false;
1293 return true;
1296 return isa<StoreInst>(Inst) ||
1297 // It may take too much time to track the uses.
1298 // Be conservative about the case the use may escape.
1299 isa<GetElementPtrInst>(Inst) ||
1300 // There would always be a bitcast for the promise alloca
1301 // before we enabled Opaque pointers. And now given
1302 // opaque pointers are enabled by default. This should be
1303 // fine.
1304 isa<BitCastInst>(Inst);
1306 if (HasAccessingPromiseBeforeCB) {
1307 Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1308 auto *G = GetFramePointer(PA);
1309 auto *Value = Builder.CreateLoad(PA->getAllocatedType(), PA);
1310 Builder.CreateStore(Value, G);
1315 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1316 // PHI in InsertedBB.
1317 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1318 BasicBlock *InsertedBB,
1319 BasicBlock *PredBB,
1320 PHINode *UntilPHI = nullptr) {
1321 auto *PN = cast<PHINode>(&SuccBB->front());
1322 do {
1323 int Index = PN->getBasicBlockIndex(InsertedBB);
1324 Value *V = PN->getIncomingValue(Index);
1325 PHINode *InputV = PHINode::Create(
1326 V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName());
1327 InputV->insertBefore(InsertedBB->begin());
1328 InputV->addIncoming(V, PredBB);
1329 PN->setIncomingValue(Index, InputV);
1330 PN = dyn_cast<PHINode>(PN->getNextNode());
1331 } while (PN != UntilPHI);
1334 // Rewrites the PHI Nodes in a cleanuppad.
1335 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1336 CleanupPadInst *CleanupPad) {
1337 // For every incoming edge to a CleanupPad we will create a new block holding
1338 // all incoming values in single-value PHI nodes. We will then create another
1339 // block to act as a dispather (as all unwind edges for related EH blocks
1340 // must be the same).
1342 // cleanuppad:
1343 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1344 // %3 = cleanuppad within none []
1346 // It will create:
1348 // cleanuppad.corodispatch
1349 // %2 = phi i8[0, %catchswitch], [1, %catch.1]
1350 // %3 = cleanuppad within none []
1351 // switch i8 % 2, label %unreachable
1352 // [i8 0, label %cleanuppad.from.catchswitch
1353 // i8 1, label %cleanuppad.from.catch.1]
1354 // cleanuppad.from.catchswitch:
1355 // %4 = phi i32 [%0, %catchswitch]
1356 // br %label cleanuppad
1357 // cleanuppad.from.catch.1:
1358 // %6 = phi i32 [%1, %catch.1]
1359 // br %label cleanuppad
1360 // cleanuppad:
1361 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1362 // [%6, %cleanuppad.from.catch.1]
1364 // Unreachable BB, in case switching on an invalid value in the dispatcher.
1365 auto *UnreachBB = BasicBlock::Create(
1366 CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
1367 IRBuilder<> Builder(UnreachBB);
1368 Builder.CreateUnreachable();
1370 // Create a new cleanuppad which will be the dispatcher.
1371 auto *NewCleanupPadBB =
1372 BasicBlock::Create(CleanupPadBB->getContext(),
1373 CleanupPadBB->getName() + Twine(".corodispatch"),
1374 CleanupPadBB->getParent(), CleanupPadBB);
1375 Builder.SetInsertPoint(NewCleanupPadBB);
1376 auto *SwitchType = Builder.getInt8Ty();
1377 auto *SetDispatchValuePN =
1378 Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
1379 CleanupPad->removeFromParent();
1380 CleanupPad->insertAfter(SetDispatchValuePN);
1381 auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
1382 pred_size(CleanupPadBB));
1384 int SwitchIndex = 0;
1385 SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
1386 for (BasicBlock *Pred : Preds) {
1387 // Create a new cleanuppad and move the PHI values to there.
1388 auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
1389 CleanupPadBB->getName() +
1390 Twine(".from.") + Pred->getName(),
1391 CleanupPadBB->getParent(), CleanupPadBB);
1392 updatePhiNodes(CleanupPadBB, Pred, CaseBB);
1393 CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1394 Pred->getName());
1395 Builder.SetInsertPoint(CaseBB);
1396 Builder.CreateBr(CleanupPadBB);
1397 movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
1399 // Update this Pred to the new unwind point.
1400 setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
1402 // Setup the switch in the dispatcher.
1403 auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
1404 SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
1405 SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
1406 SwitchIndex++;
1410 static void cleanupSinglePredPHIs(Function &F) {
1411 SmallVector<PHINode *, 32> Worklist;
1412 for (auto &BB : F) {
1413 for (auto &Phi : BB.phis()) {
1414 if (Phi.getNumIncomingValues() == 1) {
1415 Worklist.push_back(&Phi);
1416 } else
1417 break;
1420 while (!Worklist.empty()) {
1421 auto *Phi = Worklist.pop_back_val();
1422 auto *OriginalValue = Phi->getIncomingValue(0);
1423 Phi->replaceAllUsesWith(OriginalValue);
1427 static void rewritePHIs(BasicBlock &BB) {
1428 // For every incoming edge we will create a block holding all
1429 // incoming values in a single PHI nodes.
1431 // loop:
1432 // %n.val = phi i32[%n, %entry], [%inc, %loop]
1434 // It will create:
1436 // loop.from.entry:
1437 // %n.loop.pre = phi i32 [%n, %entry]
1438 // br %label loop
1439 // loop.from.loop:
1440 // %inc.loop.pre = phi i32 [%inc, %loop]
1441 // br %label loop
1443 // After this rewrite, further analysis will ignore any phi nodes with more
1444 // than one incoming edge.
1446 // TODO: Simplify PHINodes in the basic block to remove duplicate
1447 // predecessors.
1449 // Special case for CleanupPad: all EH blocks must have the same unwind edge
1450 // so we need to create an additional "dispatcher" block.
1451 if (auto *CleanupPad =
1452 dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
1453 SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1454 for (BasicBlock *Pred : Preds) {
1455 if (CatchSwitchInst *CS =
1456 dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
1457 // CleanupPad with a CatchSwitch predecessor: therefore this is an
1458 // unwind destination that needs to be handle specially.
1459 assert(CS->getUnwindDest() == &BB);
1460 (void)CS;
1461 rewritePHIsForCleanupPad(&BB, CleanupPad);
1462 return;
1467 LandingPadInst *LandingPad = nullptr;
1468 PHINode *ReplPHI = nullptr;
1469 if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1470 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1471 // We replace the original landing pad with a PHINode that will collect the
1472 // results from all of them.
1473 ReplPHI = PHINode::Create(LandingPad->getType(), 1, "");
1474 ReplPHI->insertBefore(LandingPad->getIterator());
1475 ReplPHI->takeName(LandingPad);
1476 LandingPad->replaceAllUsesWith(ReplPHI);
1477 // We will erase the original landing pad at the end of this function after
1478 // ehAwareSplitEdge cloned it in the transition blocks.
1481 SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1482 for (BasicBlock *Pred : Preds) {
1483 auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1484 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1486 // Stop the moving of values at ReplPHI, as this is either null or the PHI
1487 // that replaced the landing pad.
1488 movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
1491 if (LandingPad) {
1492 // Calls to ehAwareSplitEdge function cloned the original lading pad.
1493 // No longer need it.
1494 LandingPad->eraseFromParent();
1498 static void rewritePHIs(Function &F) {
1499 SmallVector<BasicBlock *, 8> WorkList;
1501 for (BasicBlock &BB : F)
1502 if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1503 if (PN->getNumIncomingValues() > 1)
1504 WorkList.push_back(&BB);
1506 for (BasicBlock *BB : WorkList)
1507 rewritePHIs(*BB);
1510 // Splits the block at a particular instruction unless it is the first
1511 // instruction in the block with a single predecessor.
1512 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1513 auto *BB = I->getParent();
1514 if (&BB->front() == I) {
1515 if (BB->getSinglePredecessor()) {
1516 BB->setName(Name);
1517 return BB;
1520 return BB->splitBasicBlock(I, Name);
1523 // Split above and below a particular instruction so that it
1524 // will be all alone by itself in a block.
1525 static void splitAround(Instruction *I, const Twine &Name) {
1526 splitBlockIfNotFirst(I, Name);
1527 splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1530 /// After we split the coroutine, will the given basic block be along
1531 /// an obvious exit path for the resumption function?
1532 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1533 unsigned depth = 3) {
1534 // If we've bottomed out our depth count, stop searching and assume
1535 // that the path might loop back.
1536 if (depth == 0) return false;
1538 // If this is a suspend block, we're about to exit the resumption function.
1539 if (coro::isSuspendBlock(BB))
1540 return true;
1542 // Recurse into the successors.
1543 for (auto *Succ : successors(BB)) {
1544 if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1545 return false;
1548 // If none of the successors leads back in a loop, we're on an exit/abort.
1549 return true;
1552 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1553 // Look for a free that isn't sufficiently obviously followed by
1554 // either a suspend or a termination, i.e. something that will leave
1555 // the coro resumption frame.
1556 for (auto *U : AI->users()) {
1557 auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1558 if (!FI) continue;
1560 if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1561 return true;
1564 // If we never found one, we don't need a stack save.
1565 return false;
1568 /// Turn each of the given local allocas into a normal (dynamic) alloca
1569 /// instruction.
1570 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1571 SmallVectorImpl<Instruction*> &DeadInsts) {
1572 for (auto *AI : LocalAllocas) {
1573 IRBuilder<> Builder(AI);
1575 // Save the stack depth. Try to avoid doing this if the stackrestore
1576 // is going to immediately precede a return or something.
1577 Value *StackSave = nullptr;
1578 if (localAllocaNeedsStackSave(AI))
1579 StackSave = Builder.CreateStackSave();
1581 // Allocate memory.
1582 auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1583 Alloca->setAlignment(AI->getAlignment());
1585 for (auto *U : AI->users()) {
1586 // Replace gets with the allocation.
1587 if (isa<CoroAllocaGetInst>(U)) {
1588 U->replaceAllUsesWith(Alloca);
1590 // Replace frees with stackrestores. This is safe because
1591 // alloca.alloc is required to obey a stack discipline, although we
1592 // don't enforce that structurally.
1593 } else {
1594 auto FI = cast<CoroAllocaFreeInst>(U);
1595 if (StackSave) {
1596 Builder.SetInsertPoint(FI);
1597 Builder.CreateStackRestore(StackSave);
1600 DeadInsts.push_back(cast<Instruction>(U));
1603 DeadInsts.push_back(AI);
1607 /// Get the current swifterror value.
1608 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1609 coro::Shape &Shape) {
1610 // Make a fake function pointer as a sort of intrinsic.
1611 auto FnTy = FunctionType::get(ValueTy, {}, false);
1612 auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1614 auto Call = Builder.CreateCall(FnTy, Fn, {});
1615 Shape.SwiftErrorOps.push_back(Call);
1617 return Call;
1620 /// Set the given value as the current swifterror value.
1622 /// Returns a slot that can be used as a swifterror slot.
1623 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1624 coro::Shape &Shape) {
1625 // Make a fake function pointer as a sort of intrinsic.
1626 auto FnTy = FunctionType::get(Builder.getPtrTy(),
1627 {V->getType()}, false);
1628 auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1630 auto Call = Builder.CreateCall(FnTy, Fn, { V });
1631 Shape.SwiftErrorOps.push_back(Call);
1633 return Call;
1636 /// Set the swifterror value from the given alloca before a call,
1637 /// then put in back in the alloca afterwards.
1639 /// Returns an address that will stand in for the swifterror slot
1640 /// until splitting.
1641 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1642 AllocaInst *Alloca,
1643 coro::Shape &Shape) {
1644 auto ValueTy = Alloca->getAllocatedType();
1645 IRBuilder<> Builder(Call);
1647 // Load the current value from the alloca and set it as the
1648 // swifterror value.
1649 auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1650 auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1652 // Move to after the call. Since swifterror only has a guaranteed
1653 // value on normal exits, we can ignore implicit and explicit unwind
1654 // edges.
1655 if (isa<CallInst>(Call)) {
1656 Builder.SetInsertPoint(Call->getNextNode());
1657 } else {
1658 auto Invoke = cast<InvokeInst>(Call);
1659 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1662 // Get the current swifterror value and store it to the alloca.
1663 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1664 Builder.CreateStore(ValueAfterCall, Alloca);
1666 return Addr;
1669 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1670 /// intrinsics and attempting to MemToReg the alloca away.
1671 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1672 coro::Shape &Shape) {
1673 for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
1674 // swifterror values can only be used in very specific ways.
1675 // We take advantage of that here.
1676 auto User = Use.getUser();
1677 if (isa<LoadInst>(User) || isa<StoreInst>(User))
1678 continue;
1680 assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1681 auto Call = cast<Instruction>(User);
1683 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1685 // Use the returned slot address as the call argument.
1686 Use.set(Addr);
1689 // All the uses should be loads and stores now.
1690 assert(isAllocaPromotable(Alloca));
1693 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1694 /// and then loading and storing in the prologue and epilog.
1696 /// The argument keeps the swifterror flag.
1697 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1698 coro::Shape &Shape,
1699 SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1700 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1702 auto ArgTy = cast<PointerType>(Arg.getType());
1703 auto ValueTy = PointerType::getUnqual(F.getContext());
1705 // Reduce to the alloca case:
1707 // Create an alloca and replace all uses of the arg with it.
1708 auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1709 Arg.replaceAllUsesWith(Alloca);
1711 // Set an initial value in the alloca. swifterror is always null on entry.
1712 auto InitialValue = Constant::getNullValue(ValueTy);
1713 Builder.CreateStore(InitialValue, Alloca);
1715 // Find all the suspends in the function and save and restore around them.
1716 for (auto *Suspend : Shape.CoroSuspends) {
1717 (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1720 // Find all the coro.ends in the function and restore the error value.
1721 for (auto *End : Shape.CoroEnds) {
1722 Builder.SetInsertPoint(End);
1723 auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1724 (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1727 // Now we can use the alloca logic.
1728 AllocasToPromote.push_back(Alloca);
1729 eliminateSwiftErrorAlloca(F, Alloca, Shape);
1732 /// Eliminate all problematic uses of swifterror arguments and allocas
1733 /// from the function. We'll fix them up later when splitting the function.
1734 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1735 SmallVector<AllocaInst*, 4> AllocasToPromote;
1737 // Look for a swifterror argument.
1738 for (auto &Arg : F.args()) {
1739 if (!Arg.hasSwiftErrorAttr()) continue;
1741 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1742 break;
1745 // Look for swifterror allocas.
1746 for (auto &Inst : F.getEntryBlock()) {
1747 auto Alloca = dyn_cast<AllocaInst>(&Inst);
1748 if (!Alloca || !Alloca->isSwiftError()) continue;
1750 // Clear the swifterror flag.
1751 Alloca->setSwiftError(false);
1753 AllocasToPromote.push_back(Alloca);
1754 eliminateSwiftErrorAlloca(F, Alloca, Shape);
1757 // If we have any allocas to promote, compute a dominator tree and
1758 // promote them en masse.
1759 if (!AllocasToPromote.empty()) {
1760 DominatorTree DT(F);
1761 PromoteMemToReg(AllocasToPromote, DT);
1765 /// For each local variable that all of its user are only used inside one of
1766 /// suspended region, we sink their lifetime.start markers to the place where
1767 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1768 /// hence minimizing the amount of data we end up putting on the frame.
1769 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1770 SuspendCrossingInfo &Checker,
1771 const DominatorTree &DT) {
1772 if (F.hasOptNone())
1773 return;
1775 // Collect all possible basic blocks which may dominate all uses of allocas.
1776 SmallPtrSet<BasicBlock *, 4> DomSet;
1777 DomSet.insert(&F.getEntryBlock());
1778 for (auto *CSI : Shape.CoroSuspends) {
1779 BasicBlock *SuspendBlock = CSI->getParent();
1780 assert(coro::isSuspendBlock(SuspendBlock) &&
1781 SuspendBlock->getSingleSuccessor() &&
1782 "should have split coro.suspend into its own block");
1783 DomSet.insert(SuspendBlock->getSingleSuccessor());
1786 for (Instruction &I : instructions(F)) {
1787 AllocaInst* AI = dyn_cast<AllocaInst>(&I);
1788 if (!AI)
1789 continue;
1791 for (BasicBlock *DomBB : DomSet) {
1792 bool Valid = true;
1793 SmallVector<Instruction *, 1> Lifetimes;
1795 auto isLifetimeStart = [](Instruction* I) {
1796 if (auto* II = dyn_cast<IntrinsicInst>(I))
1797 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1798 return false;
1801 auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
1802 if (isLifetimeStart(U)) {
1803 Lifetimes.push_back(U);
1804 return true;
1806 if (!U->hasOneUse() || U->stripPointerCasts() != AI)
1807 return false;
1808 if (isLifetimeStart(U->user_back())) {
1809 Lifetimes.push_back(U->user_back());
1810 return true;
1812 return false;
1815 for (User *U : AI->users()) {
1816 Instruction *UI = cast<Instruction>(U);
1817 // For all users except lifetime.start markers, if they are all
1818 // dominated by one of the basic blocks and do not cross
1819 // suspend points as well, then there is no need to spill the
1820 // instruction.
1821 if (!DT.dominates(DomBB, UI->getParent()) ||
1822 Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
1823 // Skip lifetime.start, GEP and bitcast used by lifetime.start
1824 // markers.
1825 if (collectLifetimeStart(UI, AI))
1826 continue;
1827 Valid = false;
1828 break;
1831 // Sink lifetime.start markers to dominate block when they are
1832 // only used outside the region.
1833 if (Valid && Lifetimes.size() != 0) {
1834 auto *NewLifetime = Lifetimes[0]->clone();
1835 NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI);
1836 NewLifetime->insertBefore(DomBB->getTerminator());
1838 // All the outsided lifetime.start markers are no longer necessary.
1839 for (Instruction *S : Lifetimes)
1840 S->eraseFromParent();
1842 break;
1848 static std::optional<std::pair<Value &, DIExpression &>>
1849 salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1850 bool UseEntryValue, Function *F, Value *Storage,
1851 DIExpression *Expr, bool SkipOutermostLoad) {
1852 IRBuilder<> Builder(F->getContext());
1853 auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
1854 while (isa<IntrinsicInst>(InsertPt))
1855 ++InsertPt;
1856 Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
1858 while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
1859 if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
1860 Storage = LdInst->getPointerOperand();
1861 // FIXME: This is a heuristic that works around the fact that
1862 // LLVM IR debug intrinsics cannot yet distinguish between
1863 // memory and value locations: Because a dbg.declare(alloca) is
1864 // implicitly a memory location no DW_OP_deref operation for the
1865 // last direct load from an alloca is necessary. This condition
1866 // effectively drops the *last* DW_OP_deref in the expression.
1867 if (!SkipOutermostLoad)
1868 Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1869 } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
1870 Storage = StInst->getValueOperand();
1871 } else {
1872 SmallVector<uint64_t, 16> Ops;
1873 SmallVector<Value *, 0> AdditionalValues;
1874 Value *Op = llvm::salvageDebugInfoImpl(
1875 *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
1876 AdditionalValues);
1877 if (!Op || !AdditionalValues.empty()) {
1878 // If salvaging failed or salvaging produced more than one location
1879 // operand, give up.
1880 break;
1882 Storage = Op;
1883 Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
1885 SkipOutermostLoad = false;
1887 if (!Storage)
1888 return std::nullopt;
1890 auto *StorageAsArg = dyn_cast<Argument>(Storage);
1891 const bool IsSwiftAsyncArg =
1892 StorageAsArg && StorageAsArg->hasAttribute(Attribute::SwiftAsync);
1894 // Swift async arguments are described by an entry value of the ABI-defined
1895 // register containing the coroutine context.
1896 // Entry values in variadic expressions are not supported.
1897 if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
1898 Expr->isSingleLocationExpression())
1899 Expr = DIExpression::prepend(Expr, DIExpression::EntryValue);
1901 // If the coroutine frame is an Argument, store it in an alloca to improve
1902 // its availability (e.g. registers may be clobbered).
1903 // Avoid this if the value is guaranteed to be available through other means
1904 // (e.g. swift ABI guarantees).
1905 if (StorageAsArg && !IsSwiftAsyncArg) {
1906 auto &Cached = ArgToAllocaMap[StorageAsArg];
1907 if (!Cached) {
1908 Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
1909 Storage->getName() + ".debug");
1910 Builder.CreateStore(Storage, Cached);
1912 Storage = Cached;
1913 // FIXME: LLVM lacks nuanced semantics to differentiate between
1914 // memory and direct locations at the IR level. The backend will
1915 // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
1916 // location. Thus, if there are deref and offset operations in the
1917 // expression, we need to add a DW_OP_deref at the *start* of the
1918 // expression to first load the contents of the alloca before
1919 // adjusting it with the expression.
1920 Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1923 Expr = Expr->foldConstantMath();
1924 return {{*Storage, *Expr}};
1927 void coro::salvageDebugInfo(
1928 SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1929 DbgVariableIntrinsic &DVI, bool UseEntryValue) {
1931 Function *F = DVI.getFunction();
1932 // Follow the pointer arithmetic all the way to the incoming
1933 // function argument and convert into a DIExpression.
1934 bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
1935 Value *OriginalStorage = DVI.getVariableLocationOp(0);
1937 auto SalvagedInfo =
1938 ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1939 DVI.getExpression(), SkipOutermostLoad);
1940 if (!SalvagedInfo)
1941 return;
1943 Value *Storage = &SalvagedInfo->first;
1944 DIExpression *Expr = &SalvagedInfo->second;
1946 DVI.replaceVariableLocationOp(OriginalStorage, Storage);
1947 DVI.setExpression(Expr);
1948 // We only hoist dbg.declare today since it doesn't make sense to hoist
1949 // dbg.value since it does not have the same function wide guarantees that
1950 // dbg.declare does.
1951 if (isa<DbgDeclareInst>(DVI)) {
1952 std::optional<BasicBlock::iterator> InsertPt;
1953 if (auto *I = dyn_cast<Instruction>(Storage)) {
1954 InsertPt = I->getInsertionPointAfterDef();
1955 // Update DILocation only if variable was not inlined.
1956 DebugLoc ILoc = I->getDebugLoc();
1957 DebugLoc DVILoc = DVI.getDebugLoc();
1958 if (ILoc && DVILoc &&
1959 DVILoc->getScope()->getSubprogram() ==
1960 ILoc->getScope()->getSubprogram())
1961 DVI.setDebugLoc(I->getDebugLoc());
1962 } else if (isa<Argument>(Storage))
1963 InsertPt = F->getEntryBlock().begin();
1964 if (InsertPt)
1965 DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
1969 void coro::salvageDebugInfo(
1970 SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1971 DbgVariableRecord &DVR, bool UseEntryValue) {
1973 Function *F = DVR.getFunction();
1974 // Follow the pointer arithmetic all the way to the incoming
1975 // function argument and convert into a DIExpression.
1976 bool SkipOutermostLoad = DVR.isDbgDeclare();
1977 Value *OriginalStorage = DVR.getVariableLocationOp(0);
1979 auto SalvagedInfo =
1980 ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1981 DVR.getExpression(), SkipOutermostLoad);
1982 if (!SalvagedInfo)
1983 return;
1985 Value *Storage = &SalvagedInfo->first;
1986 DIExpression *Expr = &SalvagedInfo->second;
1988 DVR.replaceVariableLocationOp(OriginalStorage, Storage);
1989 DVR.setExpression(Expr);
1990 // We only hoist dbg.declare today since it doesn't make sense to hoist
1991 // dbg.value since it does not have the same function wide guarantees that
1992 // dbg.declare does.
1993 if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
1994 std::optional<BasicBlock::iterator> InsertPt;
1995 if (auto *I = dyn_cast<Instruction>(Storage)) {
1996 InsertPt = I->getInsertionPointAfterDef();
1997 // Update DILocation only if variable was not inlined.
1998 DebugLoc ILoc = I->getDebugLoc();
1999 DebugLoc DVRLoc = DVR.getDebugLoc();
2000 if (ILoc && DVRLoc &&
2001 DVRLoc->getScope()->getSubprogram() ==
2002 ILoc->getScope()->getSubprogram())
2003 DVR.setDebugLoc(ILoc);
2004 } else if (isa<Argument>(Storage))
2005 InsertPt = F->getEntryBlock().begin();
2006 if (InsertPt) {
2007 DVR.removeFromParent();
2008 (*InsertPt)->getParent()->insertDbgRecordBefore(&DVR, *InsertPt);
2013 void coro::normalizeCoroutine(Function &F, coro::Shape &Shape,
2014 TargetTransformInfo &TTI) {
2015 // Don't eliminate swifterror in async functions that won't be split.
2016 if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2017 eliminateSwiftError(F, Shape);
2019 if (Shape.ABI == coro::ABI::Switch &&
2020 Shape.SwitchLowering.PromiseAlloca) {
2021 Shape.getSwitchCoroId()->clearPromise();
2024 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
2025 // intrinsics are in their own blocks to simplify the logic of building up
2026 // SuspendCrossing data.
2027 for (auto *CSI : Shape.CoroSuspends) {
2028 if (auto *Save = CSI->getCoroSave())
2029 splitAround(Save, "CoroSave");
2030 splitAround(CSI, "CoroSuspend");
2033 // Put CoroEnds into their own blocks.
2034 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
2035 splitAround(CE, "CoroEnd");
2037 // Emit the musttail call function in a new block before the CoroEnd.
2038 // We do this here so that the right suspend crossing info is computed for
2039 // the uses of the musttail call function call. (Arguments to the coro.end
2040 // instructions would be ignored)
2041 if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
2042 auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
2043 if (!MustTailCallFn)
2044 continue;
2045 IRBuilder<> Builder(AsyncEnd);
2046 SmallVector<Value *, 8> Args(AsyncEnd->args());
2047 auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
2048 auto *Call = coro::createMustTailCall(
2049 AsyncEnd->getDebugLoc(), MustTailCallFn, TTI, Arguments, Builder);
2050 splitAround(Call, "MustTailCall.Before.CoroEnd");
2054 // Later code makes structural assumptions about single predecessors phis e.g
2055 // that they are not live across a suspend point.
2056 cleanupSinglePredPHIs(F);
2058 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2059 // never have its definition separated from the PHI by the suspend point.
2060 rewritePHIs(F);
2063 void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) {
2064 SuspendCrossingInfo Checker(F, Shape.CoroSuspends, Shape.CoroEnds);
2065 doRematerializations(F, Checker, IsMaterializable);
2067 const DominatorTree DT(F);
2068 if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2069 Shape.ABI != coro::ABI::RetconOnce)
2070 sinkLifetimeStartMarkers(F, Shape, Checker, DT);
2072 // All values (that are not allocas) that needs to be spilled to the frame.
2073 coro::SpillInfo Spills;
2074 // All values defined as allocas that need to live in the frame.
2075 SmallVector<coro::AllocaInfo, 8> Allocas;
2077 // Collect the spills for arguments and other not-materializable values.
2078 coro::collectSpillsFromArgs(Spills, F, Checker);
2079 SmallVector<Instruction *, 4> DeadInstructions;
2080 SmallVector<CoroAllocaAllocInst *, 4> LocalAllocas;
2081 coro::collectSpillsAndAllocasFromInsts(Spills, Allocas, DeadInstructions,
2082 LocalAllocas, F, Checker, DT, Shape);
2083 coro::collectSpillsFromDbgInfo(Spills, F, Checker);
2085 LLVM_DEBUG(dumpAllocas(Allocas));
2086 LLVM_DEBUG(dumpSpills("Spills", Spills));
2088 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
2089 Shape.ABI == coro::ABI::Async)
2090 sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas);
2092 // Build frame
2093 FrameDataInfo FrameData(Spills, Allocas);
2094 Shape.FrameTy = buildFrameType(F, Shape, FrameData, OptimizeFrame);
2095 Shape.FramePtr = Shape.CoroBegin;
2096 // For now, this works for C++ programs only.
2097 buildFrameDebugInfo(F, Shape, FrameData);
2098 // Insert spills and reloads
2099 insertSpills(FrameData, Shape);
2100 lowerLocalAllocas(LocalAllocas, DeadInstructions);
2102 for (auto *I : DeadInstructions)
2103 I->eraseFromParent();