llvm/lib/Transforms/Coroutines/CoroFrame.cpp

   1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 // This file contains classes used to discover if for a particular value
   9 // its definition precedes and its uses follow a suspend block. This is
  10 // referred to as a suspend crossing value.
  11 //
  12 // Using the information discovered we form a Coroutine Frame structure to
  13 // contain those values. All uses of those values are replaced with appropriate
  14 // GEP + load from the coroutine frame. At the point of the definition we spill
  15 // the value into the coroutine frame.
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "CoroInternal.h"
  19 #include "llvm/ADT/ScopeExit.h"
  20 #include "llvm/ADT/SmallString.h"
  21 #include "llvm/Analysis/StackLifetime.h"
  22 #include "llvm/IR/DIBuilder.h"
  23 #include "llvm/IR/DebugInfo.h"
  24 #include "llvm/IR/Dominators.h"
  25 #include "llvm/IR/IRBuilder.h"
  26 #include "llvm/IR/InstIterator.h"
  27 #include "llvm/IR/IntrinsicInst.h"
  28 #include "llvm/IR/Module.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/OptimizedStructLayout.h"
  31 #include "llvm/Transforms/Coroutines/ABI.h"
  32 #include "llvm/Transforms/Coroutines/CoroInstr.h"
  33 #include "llvm/Transforms/Coroutines/MaterializationUtils.h"
  34 #include "llvm/Transforms/Coroutines/SpillUtils.h"
  35 #include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h"
  36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  37 #include "llvm/Transforms/Utils/Local.h"
  38 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  39 #include <algorithm>
  40 #include <optional>
  41
  42 using namespace llvm;
  43
  44 extern cl::opt<bool> UseNewDbgInfoFormat;
  45
  46 #define DEBUG_TYPE "coro-frame"
  47
  48 namespace {
  49 class FrameTypeBuilder;
  50 // Mapping from the to-be-spilled value to all the users that need reload.
  51 struct FrameDataInfo {
  52   // All the values (that are not allocas) that needs to be spilled to the
  53   // frame.
  54   coro::SpillInfo &Spills;
  55   // Allocas contains all values defined as allocas that need to live in the
  56   // frame.
  57   SmallVectorImpl<coro::AllocaInfo> &Allocas;
  58
  59   FrameDataInfo(coro::SpillInfo &Spills,
  60                 SmallVectorImpl<coro::AllocaInfo> &Allocas)
  61       : Spills(Spills), Allocas(Allocas) {}
  62
  63   SmallVector<Value *, 8> getAllDefs() const {
  64     SmallVector<Value *, 8> Defs;
  65     for (const auto &P : Spills)
  66       Defs.push_back(P.first);
  67     for (const auto &A : Allocas)
  68       Defs.push_back(A.Alloca);
  69     return Defs;
  70   }
  71
  72   uint32_t getFieldIndex(Value *V) const {
  73     auto Itr = FieldIndexMap.find(V);
  74     assert(Itr != FieldIndexMap.end() &&
  75            "Value does not have a frame field index");
  76     return Itr->second;
  77   }
  78
  79   void setFieldIndex(Value *V, uint32_t Index) {
  80     assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
  81            "Cannot set the index for the same field twice.");
  82     FieldIndexMap[V] = Index;
  83   }
  84
  85   Align getAlign(Value *V) const {
  86     auto Iter = FieldAlignMap.find(V);
  87     assert(Iter != FieldAlignMap.end());
  88     return Iter->second;
  89   }
  90
  91   void setAlign(Value *V, Align AL) {
  92     assert(FieldAlignMap.count(V) == 0);
  93     FieldAlignMap.insert({V, AL});
  94   }
  95
  96   uint64_t getDynamicAlign(Value *V) const {
  97     auto Iter = FieldDynamicAlignMap.find(V);
  98     assert(Iter != FieldDynamicAlignMap.end());
  99     return Iter->second;
 100   }
 101
 102   void setDynamicAlign(Value *V, uint64_t Align) {
 103     assert(FieldDynamicAlignMap.count(V) == 0);
 104     FieldDynamicAlignMap.insert({V, Align});
 105   }
 106
 107   uint64_t getOffset(Value *V) const {
 108     auto Iter = FieldOffsetMap.find(V);
 109     assert(Iter != FieldOffsetMap.end());
 110     return Iter->second;
 111   }
 112
 113   void setOffset(Value *V, uint64_t Offset) {
 114     assert(FieldOffsetMap.count(V) == 0);
 115     FieldOffsetMap.insert({V, Offset});
 116   }
 117
 118   // Remap the index of every field in the frame, using the final layout index.
 119   void updateLayoutIndex(FrameTypeBuilder &B);
 120
 121 private:
 122   // LayoutIndexUpdateStarted is used to avoid updating the index of any field
 123   // twice by mistake.
 124   bool LayoutIndexUpdateStarted = false;
 125   // Map from values to their slot indexes on the frame. They will be first set
 126   // with their original insertion field index. After the frame is built, their
 127   // indexes will be updated into the final layout index.
 128   DenseMap<Value *, uint32_t> FieldIndexMap;
 129   // Map from values to their alignment on the frame. They would be set after
 130   // the frame is built.
 131   DenseMap<Value *, Align> FieldAlignMap;
 132   DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
 133   // Map from values to their offset on the frame. They would be set after
 134   // the frame is built.
 135   DenseMap<Value *, uint64_t> FieldOffsetMap;
 136 };
 137 } // namespace
 138
 139 #ifndef NDEBUG
 140 static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) {
 141   dbgs() << "------------- " << Title << " --------------\n";
 142   for (const auto &E : Spills) {
 143     E.first->dump();
 144     dbgs() << "   user: ";
 145     for (auto *I : E.second)
 146       I->dump();
 147   }
 148 }
 149
 150 static void dumpAllocas(const SmallVectorImpl<coro::AllocaInfo> &Allocas) {
 151   dbgs() << "------------- Allocas --------------\n";
 152   for (const auto &A : Allocas) {
 153     A.Alloca->dump();
 154   }
 155 }
 156 #endif
 157
 158 namespace {
 159 using FieldIDType = size_t;
 160 // We cannot rely solely on natural alignment of a type when building a
 161 // coroutine frame and if the alignment specified on the Alloca instruction
 162 // differs from the natural alignment of the alloca type we will need to insert
 163 // padding.
 164 class FrameTypeBuilder {
 165 private:
 166   struct Field {
 167     uint64_t Size;
 168     uint64_t Offset;
 169     Type *Ty;
 170     FieldIDType LayoutFieldIndex;
 171     Align Alignment;
 172     Align TyAlignment;
 173     uint64_t DynamicAlignBuffer;
 174   };
 175
 176   const DataLayout &DL;
 177   LLVMContext &Context;
 178   uint64_t StructSize = 0;
 179   Align StructAlign;
 180   bool IsFinished = false;
 181
 182   std::optional<Align> MaxFrameAlignment;
 183
 184   SmallVector<Field, 8> Fields;
 185   DenseMap<Value*, unsigned> FieldIndexByKey;
 186
 187 public:
 188   FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
 189                    std::optional<Align> MaxFrameAlignment)
 190       : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
 191
 192   /// Add a field to this structure for the storage of an `alloca`
 193   /// instruction.
 194   [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
 195                                               bool IsHeader = false) {
 196     Type *Ty = AI->getAllocatedType();
 197
 198     // Make an array type if this is a static array allocation.
 199     if (AI->isArrayAllocation()) {
 200       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
 201         Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
 202       else
 203         report_fatal_error("Coroutines cannot handle non static allocas yet");
 204     }
 205
 206     return addField(Ty, AI->getAlign(), IsHeader);
 207   }
 208
 209   /// We want to put the allocas whose lifetime-ranges are not overlapped
 210   /// into one slot of coroutine frame.
 211   /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
 212   ///
 213   ///     cppcoro::task<void> alternative_paths(bool cond) {
 214   ///         if (cond) {
 215   ///             big_structure a;
 216   ///             process(a);
 217   ///             co_await something();
 218   ///         } else {
 219   ///             big_structure b;
 220   ///             process2(b);
 221   ///             co_await something();
 222   ///         }
 223   ///     }
 224   ///
 225   /// We want to put variable a and variable b in the same slot to
 226   /// reduce the size of coroutine frame.
 227   ///
 228   /// This function use StackLifetime algorithm to partition the AllocaInsts in
 229   /// Spills to non-overlapped sets in order to put Alloca in the same
 230   /// non-overlapped set into the same slot in the Coroutine Frame. Then add
 231   /// field for the allocas in the same non-overlapped set by using the largest
 232   /// type as the field type.
 233   ///
 234   /// Side Effects: Because We sort the allocas, the order of allocas in the
 235   /// frame may be different with the order in the source code.
 236   void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
 237                           coro::Shape &Shape, bool OptimizeFrame);
 238
 239   /// Add a field to this structure.
 240   [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
 241                                      bool IsHeader = false,
 242                                      bool IsSpillOfValue = false) {
 243     assert(!IsFinished && "adding fields to a finished builder");
 244     assert(Ty && "must provide a type for a field");
 245
 246     // The field size is always the alloc size of the type.
 247     uint64_t FieldSize = DL.getTypeAllocSize(Ty);
 248
 249     // For an alloca with size=0, we don't need to add a field and they
 250     // can just point to any index in the frame. Use index 0.
 251     if (FieldSize == 0) {
 252       return 0;
 253     }
 254
 255     // The field alignment might not be the type alignment, but we need
 256     // to remember the type alignment anyway to build the type.
 257     // If we are spilling values we don't need to worry about ABI alignment
 258     // concerns.
 259     Align ABIAlign = DL.getABITypeAlign(Ty);
 260     Align TyAlignment = ABIAlign;
 261     if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
 262       TyAlignment = *MaxFrameAlignment;
 263     Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment);
 264
 265     // The field alignment could be bigger than the max frame case, in that case
 266     // we request additional storage to be able to dynamically align the
 267     // pointer.
 268     uint64_t DynamicAlignBuffer = 0;
 269     if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
 270       DynamicAlignBuffer =
 271           offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment);
 272       FieldAlignment = *MaxFrameAlignment;
 273       FieldSize = FieldSize + DynamicAlignBuffer;
 274     }
 275
 276     // Lay out header fields immediately.
 277     uint64_t Offset;
 278     if (IsHeader) {
 279       Offset = alignTo(StructSize, FieldAlignment);
 280       StructSize = Offset + FieldSize;
 281
 282       // Everything else has a flexible offset.
 283     } else {
 284       Offset = OptimizedStructLayoutField::FlexibleOffset;
 285     }
 286
 287     Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment,
 288                       DynamicAlignBuffer});
 289     return Fields.size() - 1;
 290   }
 291
 292   /// Finish the layout and create the struct type with the given name.
 293   StructType *finish(StringRef Name);
 294
 295   uint64_t getStructSize() const {
 296     assert(IsFinished && "not yet finished!");
 297     return StructSize;
 298   }
 299
 300   Align getStructAlign() const {
 301     assert(IsFinished && "not yet finished!");
 302     return StructAlign;
 303   }
 304
 305   FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
 306     assert(IsFinished && "not yet finished!");
 307     return Fields[Id].LayoutFieldIndex;
 308   }
 309
 310   Field getLayoutField(FieldIDType Id) const {
 311     assert(IsFinished && "not yet finished!");
 312     return Fields[Id];
 313   }
 314 };
 315 } // namespace
 316
 317 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
 318   auto Updater = [&](Value *I) {
 319     auto Field = B.getLayoutField(getFieldIndex(I));
 320     setFieldIndex(I, Field.LayoutFieldIndex);
 321     setAlign(I, Field.Alignment);
 322     uint64_t dynamicAlign =
 323         Field.DynamicAlignBuffer
 324             ? Field.DynamicAlignBuffer + Field.Alignment.value()
 325             : 0;
 326     setDynamicAlign(I, dynamicAlign);
 327     setOffset(I, Field.Offset);
 328   };
 329   LayoutIndexUpdateStarted = true;
 330   for (auto &S : Spills)
 331     Updater(S.first);
 332   for (const auto &A : Allocas)
 333     Updater(A.Alloca);
 334   LayoutIndexUpdateStarted = false;
 335 }
 336
 337 void FrameTypeBuilder::addFieldForAllocas(const Function &F,
 338                                           FrameDataInfo &FrameData,
 339                                           coro::Shape &Shape,
 340                                           bool OptimizeFrame) {
 341   using AllocaSetType = SmallVector<AllocaInst *, 4>;
 342   SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
 343
 344   // We need to add field for allocas at the end of this function.
 345   auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
 346     for (auto AllocaList : NonOverlapedAllocas) {
 347       auto *LargestAI = *AllocaList.begin();
 348       FieldIDType Id = addFieldForAlloca(LargestAI);
 349       for (auto *Alloca : AllocaList)
 350         FrameData.setFieldIndex(Alloca, Id);
 351     }
 352   });
 353
 354   if (!OptimizeFrame) {
 355     for (const auto &A : FrameData.Allocas) {
 356       AllocaInst *Alloca = A.Alloca;
 357       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
 358     }
 359     return;
 360   }
 361
 362   // Because there are paths from the lifetime.start to coro.end
 363   // for each alloca, the liferanges for every alloca is overlaped
 364   // in the blocks who contain coro.end and the successor blocks.
 365   // So we choose to skip there blocks when we calculate the liferange
 366   // for each alloca. It should be reasonable since there shouldn't be uses
 367   // in these blocks and the coroutine frame shouldn't be used outside the
 368   // coroutine body.
 369   //
 370   // Note that the user of coro.suspend may not be SwitchInst. However, this
 371   // case seems too complex to handle. And it is harmless to skip these
 372   // patterns since it just prevend putting the allocas to live in the same
 373   // slot.
 374   DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
 375   for (auto *CoroSuspendInst : Shape.CoroSuspends) {
 376     for (auto *U : CoroSuspendInst->users()) {
 377       if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
 378         auto *SWI = const_cast<SwitchInst *>(ConstSWI);
 379         DefaultSuspendDest[SWI] = SWI->getDefaultDest();
 380         SWI->setDefaultDest(SWI->getSuccessor(1));
 381       }
 382     }
 383   }
 384
 385   auto ExtractAllocas = [&]() {
 386     AllocaSetType Allocas;
 387     Allocas.reserve(FrameData.Allocas.size());
 388     for (const auto &A : FrameData.Allocas)
 389       Allocas.push_back(A.Alloca);
 390     return Allocas;
 391   };
 392   StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
 393                                       StackLifetime::LivenessType::May);
 394   StackLifetimeAnalyzer.run();
 395   auto DoAllocasInterfere = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
 396     return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
 397         StackLifetimeAnalyzer.getLiveRange(AI2));
 398   };
 399   auto GetAllocaSize = [&](const coro::AllocaInfo &A) {
 400     std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
 401     assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
 402     assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
 403     return RetSize->getFixedValue();
 404   };
 405   // Put larger allocas in the front. So the larger allocas have higher
 406   // priority to merge, which can save more space potentially. Also each
 407   // AllocaSet would be ordered. So we can get the largest Alloca in one
 408   // AllocaSet easily.
 409   sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
 410     return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
 411   });
 412   for (const auto &A : FrameData.Allocas) {
 413     AllocaInst *Alloca = A.Alloca;
 414     bool Merged = false;
 415     // Try to find if the Alloca does not interfere with any existing
 416     // NonOverlappedAllocaSet. If it is true, insert the alloca to that
 417     // NonOverlappedAllocaSet.
 418     for (auto &AllocaSet : NonOverlapedAllocas) {
 419       assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
 420       bool NoInterference = none_of(AllocaSet, [&](auto Iter) {
 421         return DoAllocasInterfere(Alloca, Iter);
 422       });
 423       // If the alignment of A is multiple of the alignment of B, the address
 424       // of A should satisfy the requirement for aligning for B.
 425       //
 426       // There may be other more fine-grained strategies to handle the alignment
 427       // infomation during the merging process. But it seems hard to handle
 428       // these strategies and benefit little.
 429       bool Alignable = [&]() -> bool {
 430         auto *LargestAlloca = *AllocaSet.begin();
 431         return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
 432                0;
 433       }();
 434       bool CouldMerge = NoInterference && Alignable;
 435       if (!CouldMerge)
 436         continue;
 437       AllocaSet.push_back(Alloca);
 438       Merged = true;
 439       break;
 440     }
 441     if (!Merged) {
 442       NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
 443     }
 444   }
 445   // Recover the default target destination for each Switch statement
 446   // reserved.
 447   for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
 448     SwitchInst *SWI = SwitchAndDefaultDest.first;
 449     BasicBlock *DestBB = SwitchAndDefaultDest.second;
 450     SWI->setDefaultDest(DestBB);
 451   }
 452   // This Debug Info could tell us which allocas are merged into one slot.
 453   LLVM_DEBUG(for (auto &AllocaSet
 454                   : NonOverlapedAllocas) {
 455     if (AllocaSet.size() > 1) {
 456       dbgs() << "In Function:" << F.getName() << "\n";
 457       dbgs() << "Find Union Set "
 458              << "\n";
 459       dbgs() << "\tAllocas are \n";
 460       for (auto Alloca : AllocaSet)
 461         dbgs() << "\t\t" << *Alloca << "\n";
 462     }
 463   });
 464 }
 465
 466 StructType *FrameTypeBuilder::finish(StringRef Name) {
 467   assert(!IsFinished && "already finished!");
 468
 469   // Prepare the optimal-layout field array.
 470   // The Id in the layout field is a pointer to our Field for it.
 471   SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
 472   LayoutFields.reserve(Fields.size());
 473   for (auto &Field : Fields) {
 474     LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
 475                               Field.Offset);
 476   }
 477
 478   // Perform layout.
 479   auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
 480   StructSize = SizeAndAlign.first;
 481   StructAlign = SizeAndAlign.second;
 482
 483   auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
 484     return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
 485   };
 486
 487   // We need to produce a packed struct type if there's a field whose
 488   // assigned offset isn't a multiple of its natural type alignment.
 489   bool Packed = [&] {
 490     for (auto &LayoutField : LayoutFields) {
 491       auto &F = getField(LayoutField);
 492       if (!isAligned(F.TyAlignment, LayoutField.Offset))
 493         return true;
 494     }
 495     return false;
 496   }();
 497
 498   // Build the struct body.
 499   SmallVector<Type*, 16> FieldTypes;
 500   FieldTypes.reserve(LayoutFields.size() * 3 / 2);
 501   uint64_t LastOffset = 0;
 502   for (auto &LayoutField : LayoutFields) {
 503     auto &F = getField(LayoutField);
 504
 505     auto Offset = LayoutField.Offset;
 506
 507     // Add a padding field if there's a padding gap and we're either
 508     // building a packed struct or the padding gap is more than we'd
 509     // get from aligning to the field type's natural alignment.
 510     assert(Offset >= LastOffset);
 511     if (Offset != LastOffset) {
 512       if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
 513         FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
 514                                             Offset - LastOffset));
 515     }
 516
 517     F.Offset = Offset;
 518     F.LayoutFieldIndex = FieldTypes.size();
 519
 520     FieldTypes.push_back(F.Ty);
 521     if (F.DynamicAlignBuffer) {
 522       FieldTypes.push_back(
 523           ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
 524     }
 525     LastOffset = Offset + F.Size;
 526   }
 527
 528   StructType *Ty = StructType::create(Context, FieldTypes, Name, Packed);
 529
 530 #ifndef NDEBUG
 531   // Check that the IR layout matches the offsets we expect.
 532   auto Layout = DL.getStructLayout(Ty);
 533   for (auto &F : Fields) {
 534     assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
 535     assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
 536   }
 537 #endif
 538
 539   IsFinished = true;
 540
 541   return Ty;
 542 }
 543
 544 static void cacheDIVar(FrameDataInfo &FrameData,
 545                        DenseMap<Value *, DILocalVariable *> &DIVarCache) {
 546   for (auto *V : FrameData.getAllDefs()) {
 547     if (DIVarCache.contains(V))
 548       continue;
 549
 550     auto CacheIt = [&DIVarCache, V](const auto &Container) {
 551       auto *I = llvm::find_if(Container, [](auto *DDI) {
 552         return DDI->getExpression()->getNumElements() == 0;
 553       });
 554       if (I != Container.end())
 555         DIVarCache.insert({V, (*I)->getVariable()});
 556     };
 557     CacheIt(findDbgDeclares(V));
 558     CacheIt(findDVRDeclares(V));
 559   }
 560 }
 561
 562 /// Create name for Type. It uses MDString to store new created string to
 563 /// avoid memory leak.
 564 static StringRef solveTypeName(Type *Ty) {
 565   if (Ty->isIntegerTy()) {
 566     // The longest name in common may be '__int_128', which has 9 bits.
 567     SmallString<16> Buffer;
 568     raw_svector_ostream OS(Buffer);
 569     OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
 570     auto *MDName = MDString::get(Ty->getContext(), OS.str());
 571     return MDName->getString();
 572   }
 573
 574   if (Ty->isFloatingPointTy()) {
 575     if (Ty->isFloatTy())
 576       return "__float_";
 577     if (Ty->isDoubleTy())
 578       return "__double_";
 579     return "__floating_type_";
 580   }
 581
 582   if (Ty->isPointerTy())
 583     return "PointerType";
 584
 585   if (Ty->isStructTy()) {
 586     if (!cast<StructType>(Ty)->hasName())
 587       return "__LiteralStructType_";
 588
 589     auto Name = Ty->getStructName();
 590
 591     SmallString<16> Buffer(Name);
 592     for (auto &Iter : Buffer)
 593       if (Iter == '.' || Iter == ':')
 594         Iter = '_';
 595     auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
 596     return MDName->getString();
 597   }
 598
 599   return "UnknownType";
 600 }
 601
 602 static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
 603                            const DataLayout &Layout, DIScope *Scope,
 604                            unsigned LineNum,
 605                            DenseMap<Type *, DIType *> &DITypeCache) {
 606   if (DIType *DT = DITypeCache.lookup(Ty))
 607     return DT;
 608
 609   StringRef Name = solveTypeName(Ty);
 610
 611   DIType *RetType = nullptr;
 612
 613   if (Ty->isIntegerTy()) {
 614     auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
 615     RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
 616                                       llvm::DINode::FlagArtificial);
 617   } else if (Ty->isFloatingPointTy()) {
 618     RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
 619                                       dwarf::DW_ATE_float,
 620                                       llvm::DINode::FlagArtificial);
 621   } else if (Ty->isPointerTy()) {
 622     // Construct PointerType points to null (aka void *) instead of exploring
 623     // pointee type to avoid infinite search problem. For example, we would be
 624     // in trouble if we traverse recursively:
 625     //
 626     //  struct Node {
 627     //      Node* ptr;
 628     //  };
 629     RetType =
 630         Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
 631                                   Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
 632                                   /*DWARFAddressSpace=*/std::nullopt, Name);
 633   } else if (Ty->isStructTy()) {
 634     auto *DIStruct = Builder.createStructType(
 635         Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
 636         Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
 637         llvm::DINode::FlagArtificial, nullptr, llvm::DINodeArray());
 638
 639     auto *StructTy = cast<StructType>(Ty);
 640     SmallVector<Metadata *, 16> Elements;
 641     for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
 642       DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
 643                                  Scope, LineNum, DITypeCache);
 644       assert(DITy);
 645       Elements.push_back(Builder.createMemberType(
 646           Scope, DITy->getName(), Scope->getFile(), LineNum,
 647           DITy->getSizeInBits(), DITy->getAlignInBits(),
 648           Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
 649           llvm::DINode::FlagArtificial, DITy));
 650     }
 651
 652     Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
 653
 654     RetType = DIStruct;
 655   } else {
 656     LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
 657     TypeSize Size = Layout.getTypeSizeInBits(Ty);
 658     auto *CharSizeType = Builder.createBasicType(
 659         Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
 660
 661     if (Size <= 8)
 662       RetType = CharSizeType;
 663     else {
 664       if (Size % 8 != 0)
 665         Size = TypeSize::getFixed(Size + 8 - (Size % 8));
 666
 667       RetType = Builder.createArrayType(
 668           Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
 669           Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
 670     }
 671   }
 672
 673   DITypeCache.insert({Ty, RetType});
 674   return RetType;
 675 }
 676
 677 /// Build artificial debug info for C++ coroutine frames to allow users to
 678 /// inspect the contents of the frame directly
 679 ///
 680 /// Create Debug information for coroutine frame with debug name "__coro_frame".
 681 /// The debug information for the fields of coroutine frame is constructed from
 682 /// the following way:
 683 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
 684 ///    the corresponding debug variables for the value. If we can find the
 685 ///    debug variable, we can get full and accurate debug information.
 686 /// 2. If we can't get debug information in step 1 and 2, we could only try to
 687 ///    build the DIType by Type. We did this in solveDIType. We only handle
 688 ///    integer, float, double, integer type and struct type for now.
 689 static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
 690                                 FrameDataInfo &FrameData) {
 691   DISubprogram *DIS = F.getSubprogram();
 692   // If there is no DISubprogram for F, it implies the Function are not compiled
 693   // with debug info. So we also don't need to generate debug info for the frame
 694   // neither.
 695   if (!DIS || !DIS->getUnit() ||
 696       !dwarf::isCPlusPlus(
 697           (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()) ||
 698       DIS->getUnit()->getEmissionKind() != DICompileUnit::DebugEmissionKind::FullDebug)
 699     return;
 700
 701   assert(Shape.ABI == coro::ABI::Switch &&
 702          "We could only build debug infomation for C++ coroutine now.\n");
 703
 704   DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
 705
 706   assert(Shape.getPromiseAlloca() &&
 707          "Coroutine with switch ABI should own Promise alloca");
 708
 709   DIFile *DFile = DIS->getFile();
 710   unsigned LineNum = DIS->getLine();
 711
 712   DICompositeType *FrameDITy = DBuilder.createStructType(
 713       DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
 714       DFile, LineNum, Shape.FrameSize * 8,
 715       Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
 716       llvm::DINodeArray());
 717   StructType *FrameTy = Shape.FrameTy;
 718   SmallVector<Metadata *, 16> Elements;
 719   DataLayout Layout = F.getDataLayout();
 720
 721   DenseMap<Value *, DILocalVariable *> DIVarCache;
 722   cacheDIVar(FrameData, DIVarCache);
 723
 724   unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
 725   unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
 726   unsigned IndexIndex = Shape.SwitchLowering.IndexField;
 727
 728   DenseMap<unsigned, StringRef> NameCache;
 729   NameCache.insert({ResumeIndex, "__resume_fn"});
 730   NameCache.insert({DestroyIndex, "__destroy_fn"});
 731   NameCache.insert({IndexIndex, "__coro_index"});
 732
 733   Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
 734        *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
 735        *IndexTy = FrameTy->getElementType(IndexIndex);
 736
 737   DenseMap<unsigned, DIType *> TyCache;
 738   TyCache.insert(
 739       {ResumeIndex, DBuilder.createPointerType(
 740                         nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
 741   TyCache.insert(
 742       {DestroyIndex, DBuilder.createPointerType(
 743                          nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
 744
 745   /// FIXME: If we fill the field `SizeInBits` with the actual size of
 746   /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
 747   TyCache.insert({IndexIndex, DBuilder.createBasicType(
 748                                   "__coro_index",
 749                                   (Layout.getTypeSizeInBits(IndexTy) < 8)
 750                                       ? 8
 751                                       : Layout.getTypeSizeInBits(IndexTy),
 752                                   dwarf::DW_ATE_unsigned_char)});
 753
 754   for (auto *V : FrameData.getAllDefs()) {
 755     if (!DIVarCache.contains(V))
 756       continue;
 757
 758     auto Index = FrameData.getFieldIndex(V);
 759
 760     NameCache.insert({Index, DIVarCache[V]->getName()});
 761     TyCache.insert({Index, DIVarCache[V]->getType()});
 762   }
 763
 764   // Cache from index to (Align, Offset Pair)
 765   DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
 766   // The Align and Offset of Resume function and Destroy function are fixed.
 767   OffsetCache.insert({ResumeIndex, {8, 0}});
 768   OffsetCache.insert({DestroyIndex, {8, 8}});
 769   OffsetCache.insert(
 770       {IndexIndex,
 771        {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
 772
 773   for (auto *V : FrameData.getAllDefs()) {
 774     auto Index = FrameData.getFieldIndex(V);
 775
 776     OffsetCache.insert(
 777         {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
 778   }
 779
 780   DenseMap<Type *, DIType *> DITypeCache;
 781   // This counter is used to avoid same type names. e.g., there would be
 782   // many i32 and i64 types in one coroutine. And we would use i32_0 and
 783   // i32_1 to avoid the same type. Since it makes no sense the name of the
 784   // fields confilicts with each other.
 785   unsigned UnknownTypeNum = 0;
 786   for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
 787     if (!OffsetCache.contains(Index))
 788       continue;
 789
 790     std::string Name;
 791     uint64_t SizeInBits;
 792     uint32_t AlignInBits;
 793     uint64_t OffsetInBits;
 794     DIType *DITy = nullptr;
 795
 796     Type *Ty = FrameTy->getElementType(Index);
 797     assert(Ty->isSized() && "We can't handle type which is not sized.\n");
 798     SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
 799     AlignInBits = OffsetCache[Index].first * 8;
 800     OffsetInBits = OffsetCache[Index].second * 8;
 801
 802     if (auto It = NameCache.find(Index); It != NameCache.end()) {
 803       Name = It->second.str();
 804       DITy = TyCache[Index];
 805     } else {
 806       DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
 807       assert(DITy && "SolveDIType shouldn't return nullptr.\n");
 808       Name = DITy->getName().str();
 809       Name += "_" + std::to_string(UnknownTypeNum);
 810       UnknownTypeNum++;
 811     }
 812
 813     Elements.push_back(DBuilder.createMemberType(
 814         FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
 815         llvm::DINode::FlagArtificial, DITy));
 816   }
 817
 818   DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
 819
 820   auto *FrameDIVar =
 821       DBuilder.createAutoVariable(DIS, "__coro_frame", DFile, LineNum,
 822                                   FrameDITy, true, DINode::FlagArtificial);
 823
 824   // Subprogram would have ContainedNodes field which records the debug
 825   // variables it contained. So we need to add __coro_frame to the
 826   // ContainedNodes of it.
 827   //
 828   // If we don't add __coro_frame to the RetainedNodes, user may get
 829   // `no symbol __coro_frame in context` rather than `__coro_frame`
 830   // is optimized out, which is more precise.
 831   auto RetainedNodes = DIS->getRetainedNodes();
 832   SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
 833                                                RetainedNodes.end());
 834   RetainedNodesVec.push_back(FrameDIVar);
 835   DIS->replaceOperandWith(7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
 836
 837   // Construct the location for the frame debug variable. The column number
 838   // is fake but it should be fine.
 839   DILocation *DILoc =
 840       DILocation::get(DIS->getContext(), LineNum, /*Column=*/1, DIS);
 841   assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
 842
 843   if (UseNewDbgInfoFormat) {
 844     DbgVariableRecord *NewDVR =
 845         new DbgVariableRecord(ValueAsMetadata::get(Shape.FramePtr), FrameDIVar,
 846                               DBuilder.createExpression(), DILoc,
 847                               DbgVariableRecord::LocationType::Declare);
 848     BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
 849     It->getParent()->insertDbgRecordBefore(NewDVR, It);
 850   } else {
 851     DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
 852                            DBuilder.createExpression(), DILoc,
 853                            &*Shape.getInsertPtAfterFramePtr());
 854   }
 855 }
 856
 857 // Build a struct that will keep state for an active coroutine.
 858 //   struct f.frame {
 859 //     ResumeFnTy ResumeFnAddr;
 860 //     ResumeFnTy DestroyFnAddr;
 861 //     ... promise (if present) ...
 862 //     int ResumeIndex;
 863 //     ... spills ...
 864 //   };
 865 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
 866                                   FrameDataInfo &FrameData,
 867                                   bool OptimizeFrame) {
 868   LLVMContext &C = F.getContext();
 869   const DataLayout &DL = F.getDataLayout();
 870
 871   // We will use this value to cap the alignment of spilled values.
 872   std::optional<Align> MaxFrameAlignment;
 873   if (Shape.ABI == coro::ABI::Async)
 874     MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
 875   FrameTypeBuilder B(C, DL, MaxFrameAlignment);
 876
 877   AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
 878   std::optional<FieldIDType> SwitchIndexFieldId;
 879
 880   if (Shape.ABI == coro::ABI::Switch) {
 881     auto *FnPtrTy = PointerType::getUnqual(C);
 882
 883     // Add header fields for the resume and destroy functions.
 884     // We can rely on these being perfectly packed.
 885     (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
 886     (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
 887
 888     // PromiseAlloca field needs to be explicitly added here because it's
 889     // a header field with a fixed offset based on its alignment. Hence it
 890     // needs special handling and cannot be added to FrameData.Allocas.
 891     if (PromiseAlloca)
 892       FrameData.setFieldIndex(
 893           PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
 894
 895     // Add a field to store the suspend index.  This doesn't need to
 896     // be in the header.
 897     unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
 898     Type *IndexType = Type::getIntNTy(C, IndexBits);
 899
 900     SwitchIndexFieldId = B.addField(IndexType, std::nullopt);
 901   } else {
 902     assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
 903   }
 904
 905   // Because multiple allocas may own the same field slot,
 906   // we add allocas to field here.
 907   B.addFieldForAllocas(F, FrameData, Shape, OptimizeFrame);
 908   // Add PromiseAlloca to Allocas list so that
 909   // 1. updateLayoutIndex could update its index after
 910   // `performOptimizedStructLayout`
 911   // 2. it is processed in insertSpills.
 912   if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
 913     // We assume that the promise alloca won't be modified before
 914     // CoroBegin and no alias will be create before CoroBegin.
 915     FrameData.Allocas.emplace_back(
 916         PromiseAlloca, DenseMap<Instruction *, std::optional<APInt>>{}, false);
 917   // Create an entry for every spilled value.
 918   for (auto &S : FrameData.Spills) {
 919     Type *FieldType = S.first->getType();
 920     // For byval arguments, we need to store the pointed value in the frame,
 921     // instead of the pointer itself.
 922     if (const Argument *A = dyn_cast<Argument>(S.first))
 923       if (A->hasByValAttr())
 924         FieldType = A->getParamByValType();
 925     FieldIDType Id = B.addField(FieldType, std::nullopt, false /*header*/,
 926                                 true /*IsSpillOfValue*/);
 927     FrameData.setFieldIndex(S.first, Id);
 928   }
 929
 930   StructType *FrameTy = [&] {
 931     SmallString<32> Name(F.getName());
 932     Name.append(".Frame");
 933     return B.finish(Name);
 934   }();
 935
 936   FrameData.updateLayoutIndex(B);
 937   Shape.FrameAlign = B.getStructAlign();
 938   Shape.FrameSize = B.getStructSize();
 939
 940   switch (Shape.ABI) {
 941   case coro::ABI::Switch: {
 942     // In the switch ABI, remember the switch-index field.
 943     auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
 944     Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
 945     Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
 946     Shape.SwitchLowering.IndexOffset = IndexField.Offset;
 947
 948     // Also round the frame size up to a multiple of its alignment, as is
 949     // generally expected in C/C++.
 950     Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
 951     break;
 952   }
 953
 954   // In the retcon ABI, remember whether the frame is inline in the storage.
 955   case coro::ABI::Retcon:
 956   case coro::ABI::RetconOnce: {
 957     auto Id = Shape.getRetconCoroId();
 958     Shape.RetconLowering.IsFrameInlineInStorage
 959       = (B.getStructSize() <= Id->getStorageSize() &&
 960          B.getStructAlign() <= Id->getStorageAlignment());
 961     break;
 962   }
 963   case coro::ABI::Async: {
 964     Shape.AsyncLowering.FrameOffset =
 965         alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
 966     // Also make the final context size a multiple of the context alignment to
 967     // make allocation easier for allocators.
 968     Shape.AsyncLowering.ContextSize =
 969         alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
 970                 Shape.AsyncLowering.getContextAlignment());
 971     if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
 972       report_fatal_error(
 973           "The alignment requirment of frame variables cannot be higher than "
 974           "the alignment of the async function context");
 975     }
 976     break;
 977   }
 978   }
 979
 980   return FrameTy;
 981 }
 982
 983 // Replace all alloca and SSA values that are accessed across suspend points
 984 // with GetElementPointer from coroutine frame + loads and stores. Create an
 985 // AllocaSpillBB that will become the new entry block for the resume parts of
 986 // the coroutine:
 987 //
 988 //    %hdl = coro.begin(...)
 989 //    whatever
 990 //
 991 // becomes:
 992 //
 993 //    %hdl = coro.begin(...)
 994 //    br label %AllocaSpillBB
 995 //
 996 //  AllocaSpillBB:
 997 //    ; geps corresponding to allocas that were moved to coroutine frame
 998 //    br label PostSpill
 999 //
1000 //  PostSpill:
1001 //    whatever
1002 //
1003 //
1004 static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1005   LLVMContext &C = Shape.CoroBegin->getContext();
1006   Function *F = Shape.CoroBegin->getFunction();
1007   IRBuilder<> Builder(C);
1008   StructType *FrameTy = Shape.FrameTy;
1009   Value *FramePtr = Shape.FramePtr;
1010   DominatorTree DT(*F);
1011   SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1012
1013   // Create a GEP with the given index into the coroutine frame for the original
1014   // value Orig. Appends an extra 0 index for array-allocas, preserving the
1015   // original type.
1016   auto GetFramePointer = [&](Value *Orig) -> Value * {
1017     FieldIDType Index = FrameData.getFieldIndex(Orig);
1018     SmallVector<Value *, 3> Indices = {
1019         ConstantInt::get(Type::getInt32Ty(C), 0),
1020         ConstantInt::get(Type::getInt32Ty(C), Index),
1021     };
1022
1023     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1024       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1025         auto Count = CI->getValue().getZExtValue();
1026         if (Count > 1) {
1027           Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
1028         }
1029       } else {
1030         report_fatal_error("Coroutines cannot handle non static allocas yet");
1031       }
1032     }
1033
1034     auto GEP = cast<GetElementPtrInst>(
1035         Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1036     if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1037       if (FrameData.getDynamicAlign(Orig) != 0) {
1038         assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1039         auto *M = AI->getModule();
1040         auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1041         auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1042         auto *AlignMask =
1043             ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1);
1044         PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1045         PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1046         return Builder.CreateIntToPtr(PtrValue, AI->getType());
1047       }
1048       // If the type of GEP is not equal to the type of AllocaInst, it implies
1049       // that the AllocaInst may be reused in the Frame slot of other
1050       // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1051       // the Frame storage.
1052       //
1053       // Note: If we change the strategy dealing with alignment, we need to refine
1054       // this casting.
1055       if (GEP->getType() != Orig->getType())
1056         return Builder.CreateAddrSpaceCast(GEP, Orig->getType(),
1057                                            Orig->getName() + Twine(".cast"));
1058     }
1059     return GEP;
1060   };
1061
1062   for (auto const &E : FrameData.Spills) {
1063     Value *Def = E.first;
1064     auto SpillAlignment = Align(FrameData.getAlign(Def));
1065     // Create a store instruction storing the value into the
1066     // coroutine frame.
1067     BasicBlock::iterator InsertPt = coro::getSpillInsertionPt(Shape, Def, DT);
1068
1069     Type *ByValTy = nullptr;
1070     if (auto *Arg = dyn_cast<Argument>(Def)) {
1071       // If we're spilling an Argument, make sure we clear 'nocapture'
1072       // from the coroutine function.
1073       Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1074
1075       if (Arg->hasByValAttr())
1076         ByValTy = Arg->getParamByValType();
1077     }
1078
1079     auto Index = FrameData.getFieldIndex(Def);
1080     Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1081     auto *G = Builder.CreateConstInBoundsGEP2_32(
1082         FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1083     if (ByValTy) {
1084       // For byval arguments, we need to store the pointed value in the frame,
1085       // instead of the pointer itself.
1086       auto *Value = Builder.CreateLoad(ByValTy, Def);
1087       Builder.CreateAlignedStore(Value, G, SpillAlignment);
1088     } else {
1089       Builder.CreateAlignedStore(Def, G, SpillAlignment);
1090     }
1091
1092     BasicBlock *CurrentBlock = nullptr;
1093     Value *CurrentReload = nullptr;
1094     for (auto *U : E.second) {
1095       // If we have not seen the use block, create a load instruction to reload
1096       // the spilled value from the coroutine frame. Populates the Value pointer
1097       // reference provided with the frame GEP.
1098       if (CurrentBlock != U->getParent()) {
1099         CurrentBlock = U->getParent();
1100         Builder.SetInsertPoint(CurrentBlock,
1101                                CurrentBlock->getFirstInsertionPt());
1102
1103         auto *GEP = GetFramePointer(E.first);
1104         GEP->setName(E.first->getName() + Twine(".reload.addr"));
1105         if (ByValTy)
1106           CurrentReload = GEP;
1107         else
1108           CurrentReload = Builder.CreateAlignedLoad(
1109               FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1110               SpillAlignment, E.first->getName() + Twine(".reload"));
1111
1112         TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(Def);
1113         TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(Def);
1114         // Try best to find dbg.declare. If the spill is a temp, there may not
1115         // be a direct dbg.declare. Walk up the load chain to find one from an
1116         // alias.
1117         if (F->getSubprogram()) {
1118           auto *CurDef = Def;
1119           while (DIs.empty() && DVRs.empty() && isa<LoadInst>(CurDef)) {
1120             auto *LdInst = cast<LoadInst>(CurDef);
1121             // Only consider ptr to ptr same type load.
1122             if (LdInst->getPointerOperandType() != LdInst->getType())
1123               break;
1124             CurDef = LdInst->getPointerOperand();
1125             if (!isa<AllocaInst, LoadInst>(CurDef))
1126               break;
1127             DIs = findDbgDeclares(CurDef);
1128             DVRs = findDVRDeclares(CurDef);
1129           }
1130         }
1131
1132         auto SalvageOne = [&](auto *DDI) {
1133           bool AllowUnresolved = false;
1134           // This dbg.declare is preserved for all coro-split function
1135           // fragments. It will be unreachable in the main function, and
1136           // processed by coro::salvageDebugInfo() by the Cloner.
1137           if (UseNewDbgInfoFormat) {
1138             DbgVariableRecord *NewDVR = new DbgVariableRecord(
1139                 ValueAsMetadata::get(CurrentReload), DDI->getVariable(),
1140                 DDI->getExpression(), DDI->getDebugLoc(),
1141                 DbgVariableRecord::LocationType::Declare);
1142             Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1143                 NewDVR, Builder.GetInsertPoint());
1144           } else {
1145             DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1146                 .insertDeclare(CurrentReload, DDI->getVariable(),
1147                                DDI->getExpression(), DDI->getDebugLoc(),
1148                                &*Builder.GetInsertPoint());
1149           }
1150           // This dbg.declare is for the main function entry point.  It
1151           // will be deleted in all coro-split functions.
1152           coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/);
1153         };
1154         for_each(DIs, SalvageOne);
1155         for_each(DVRs, SalvageOne);
1156       }
1157
1158       // If we have a single edge PHINode, remove it and replace it with a
1159       // reload from the coroutine frame. (We already took care of multi edge
1160       // PHINodes by normalizing them in the rewritePHIs function).
1161       if (auto *PN = dyn_cast<PHINode>(U)) {
1162         assert(PN->getNumIncomingValues() == 1 &&
1163                "unexpected number of incoming "
1164                "values in the PHINode");
1165         PN->replaceAllUsesWith(CurrentReload);
1166         PN->eraseFromParent();
1167         continue;
1168       }
1169
1170       // Replace all uses of CurrentValue in the current instruction with
1171       // reload.
1172       U->replaceUsesOfWith(Def, CurrentReload);
1173       // Instructions are added to Def's user list if the attached
1174       // debug records use Def. Update those now.
1175       for (DbgVariableRecord &DVR : filterDbgVars(U->getDbgRecordRange()))
1176         DVR.replaceVariableLocationOp(Def, CurrentReload, true);
1177     }
1178   }
1179
1180   BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1181
1182   auto SpillBlock = FramePtrBB->splitBasicBlock(
1183       Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1184   SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1185   Shape.AllocaSpillBlock = SpillBlock;
1186
1187   // retcon and retcon.once lowering assumes all uses have been sunk.
1188   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1189       Shape.ABI == coro::ABI::Async) {
1190     // If we found any allocas, replace all of their remaining uses with Geps.
1191     Builder.SetInsertPoint(SpillBlock, SpillBlock->begin());
1192     for (const auto &P : FrameData.Allocas) {
1193       AllocaInst *Alloca = P.Alloca;
1194       auto *G = GetFramePointer(Alloca);
1195
1196       // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1197       // here, as we are changing location of the instruction.
1198       G->takeName(Alloca);
1199       Alloca->replaceAllUsesWith(G);
1200       Alloca->eraseFromParent();
1201     }
1202     return;
1203   }
1204
1205   // If we found any alloca, replace all of their remaining uses with GEP
1206   // instructions. To remain debugbility, we replace the uses of allocas for
1207   // dbg.declares and dbg.values with the reload from the frame.
1208   // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1209   // as some of the uses may not be dominated by CoroBegin.
1210   Builder.SetInsertPoint(Shape.AllocaSpillBlock,
1211                          Shape.AllocaSpillBlock->begin());
1212   SmallVector<Instruction *, 4> UsersToUpdate;
1213   for (const auto &A : FrameData.Allocas) {
1214     AllocaInst *Alloca = A.Alloca;
1215     UsersToUpdate.clear();
1216     for (User *U : Alloca->users()) {
1217       auto *I = cast<Instruction>(U);
1218       if (DT.dominates(Shape.CoroBegin, I))
1219         UsersToUpdate.push_back(I);
1220     }
1221     if (UsersToUpdate.empty())
1222       continue;
1223     auto *G = GetFramePointer(Alloca);
1224     G->setName(Alloca->getName() + Twine(".reload.addr"));
1225
1226     SmallVector<DbgVariableIntrinsic *, 4> DIs;
1227     SmallVector<DbgVariableRecord *> DbgVariableRecords;
1228     findDbgUsers(DIs, Alloca, &DbgVariableRecords);
1229     for (auto *DVI : DIs)
1230       DVI->replaceUsesOfWith(Alloca, G);
1231     for (auto *DVR : DbgVariableRecords)
1232       DVR->replaceVariableLocationOp(Alloca, G);
1233
1234     for (Instruction *I : UsersToUpdate) {
1235       // It is meaningless to retain the lifetime intrinsics refer for the
1236       // member of coroutine frames and the meaningless lifetime intrinsics
1237       // are possible to block further optimizations.
1238       if (I->isLifetimeStartOrEnd()) {
1239         I->eraseFromParent();
1240         continue;
1241       }
1242
1243       I->replaceUsesOfWith(Alloca, G);
1244     }
1245   }
1246   Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1247   for (const auto &A : FrameData.Allocas) {
1248     AllocaInst *Alloca = A.Alloca;
1249     if (A.MayWriteBeforeCoroBegin) {
1250       // isEscaped really means potentially modified before CoroBegin.
1251       if (Alloca->isArrayAllocation())
1252         report_fatal_error(
1253             "Coroutines cannot handle copying of array allocas yet");
1254
1255       auto *G = GetFramePointer(Alloca);
1256       auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
1257       Builder.CreateStore(Value, G);
1258     }
1259     // For each alias to Alloca created before CoroBegin but used after
1260     // CoroBegin, we recreate them after CoroBegin by applying the offset
1261     // to the pointer in the frame.
1262     for (const auto &Alias : A.Aliases) {
1263       auto *FramePtr = GetFramePointer(Alloca);
1264       auto &Value = *Alias.second;
1265       auto ITy = IntegerType::get(C, Value.getBitWidth());
1266       auto *AliasPtr =
1267           Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value));
1268       Alias.first->replaceUsesWithIf(
1269           AliasPtr, [&](Use &U) { return DT.dominates(Shape.CoroBegin, U); });
1270     }
1271   }
1272
1273   // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
1274   // the case that the PromiseAlloca may have writes before CoroBegin in the
1275   // above codes. And it may be problematic in edge cases. See
1276   // https://github.com/llvm/llvm-project/issues/57861 for an example.
1277   if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
1278     AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
1279     // If there is memory accessing to promise alloca before CoroBegin;
1280     bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
1281       auto *Inst = dyn_cast<Instruction>(U.getUser());
1282       if (!Inst || DT.dominates(Shape.CoroBegin, Inst))
1283         return false;
1284
1285       if (auto *CI = dyn_cast<CallInst>(Inst)) {
1286         // It is fine if the call wouldn't write to the Promise.
1287         // This is possible for @llvm.coro.id intrinsics, which
1288         // would take the promise as the second argument as a
1289         // marker.
1290         if (CI->onlyReadsMemory() ||
1291             CI->onlyReadsMemory(CI->getArgOperandNo(&U)))
1292           return false;
1293         return true;
1294       }
1295
1296       return isa<StoreInst>(Inst) ||
1297              // It may take too much time to track the uses.
1298              // Be conservative about the case the use may escape.
1299              isa<GetElementPtrInst>(Inst) ||
1300              // There would always be a bitcast for the promise alloca
1301              // before we enabled Opaque pointers. And now given
1302              // opaque pointers are enabled by default. This should be
1303              // fine.
1304              isa<BitCastInst>(Inst);
1305     });
1306     if (HasAccessingPromiseBeforeCB) {
1307       Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1308       auto *G = GetFramePointer(PA);
1309       auto *Value = Builder.CreateLoad(PA->getAllocatedType(), PA);
1310       Builder.CreateStore(Value, G);
1311     }
1312   }
1313 }
1314
1315 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1316 // PHI in InsertedBB.
1317 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1318                                          BasicBlock *InsertedBB,
1319                                          BasicBlock *PredBB,
1320                                          PHINode *UntilPHI = nullptr) {
1321   auto *PN = cast<PHINode>(&SuccBB->front());
1322   do {
1323     int Index = PN->getBasicBlockIndex(InsertedBB);
1324     Value *V = PN->getIncomingValue(Index);
1325     PHINode *InputV = PHINode::Create(
1326         V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName());
1327     InputV->insertBefore(InsertedBB->begin());
1328     InputV->addIncoming(V, PredBB);
1329     PN->setIncomingValue(Index, InputV);
1330     PN = dyn_cast<PHINode>(PN->getNextNode());
1331   } while (PN != UntilPHI);
1332 }
1333
1334 // Rewrites the PHI Nodes in a cleanuppad.
1335 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1336                                      CleanupPadInst *CleanupPad) {
1337   // For every incoming edge to a CleanupPad we will create a new block holding
1338   // all incoming values in single-value PHI nodes. We will then create another
1339   // block to act as a dispather (as all unwind edges for related EH blocks
1340   // must be the same).
1341   //
1342   // cleanuppad:
1343   //    %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1344   //    %3 = cleanuppad within none []
1345   //
1346   // It will create:
1347   //
1348   // cleanuppad.corodispatch
1349   //    %2 = phi i8[0, %catchswitch], [1, %catch.1]
1350   //    %3 = cleanuppad within none []
1351   //    switch i8 % 2, label %unreachable
1352   //            [i8 0, label %cleanuppad.from.catchswitch
1353   //             i8 1, label %cleanuppad.from.catch.1]
1354   // cleanuppad.from.catchswitch:
1355   //    %4 = phi i32 [%0, %catchswitch]
1356   //    br %label cleanuppad
1357   // cleanuppad.from.catch.1:
1358   //    %6 = phi i32 [%1, %catch.1]
1359   //    br %label cleanuppad
1360   // cleanuppad:
1361   //    %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1362   //                 [%6, %cleanuppad.from.catch.1]
1363
1364   // Unreachable BB, in case switching on an invalid value in the dispatcher.
1365   auto *UnreachBB = BasicBlock::Create(
1366       CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
1367   IRBuilder<> Builder(UnreachBB);
1368   Builder.CreateUnreachable();
1369
1370   // Create a new cleanuppad which will be the dispatcher.
1371   auto *NewCleanupPadBB =
1372       BasicBlock::Create(CleanupPadBB->getContext(),
1373                          CleanupPadBB->getName() + Twine(".corodispatch"),
1374                          CleanupPadBB->getParent(), CleanupPadBB);
1375   Builder.SetInsertPoint(NewCleanupPadBB);
1376   auto *SwitchType = Builder.getInt8Ty();
1377   auto *SetDispatchValuePN =
1378       Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
1379   CleanupPad->removeFromParent();
1380   CleanupPad->insertAfter(SetDispatchValuePN);
1381   auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
1382                                                 pred_size(CleanupPadBB));
1383
1384   int SwitchIndex = 0;
1385   SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
1386   for (BasicBlock *Pred : Preds) {
1387     // Create a new cleanuppad and move the PHI values to there.
1388     auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
1389                                       CleanupPadBB->getName() +
1390                                           Twine(".from.") + Pred->getName(),
1391                                       CleanupPadBB->getParent(), CleanupPadBB);
1392     updatePhiNodes(CleanupPadBB, Pred, CaseBB);
1393     CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1394                     Pred->getName());
1395     Builder.SetInsertPoint(CaseBB);
1396     Builder.CreateBr(CleanupPadBB);
1397     movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
1398
1399     // Update this Pred to the new unwind point.
1400     setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
1401
1402     // Setup the switch in the dispatcher.
1403     auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
1404     SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
1405     SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
1406     SwitchIndex++;
1407   }
1408 }
1409
1410 static void cleanupSinglePredPHIs(Function &F) {
1411   SmallVector<PHINode *, 32> Worklist;
1412   for (auto &BB : F) {
1413     for (auto &Phi : BB.phis()) {
1414       if (Phi.getNumIncomingValues() == 1) {
1415         Worklist.push_back(&Phi);
1416       } else
1417         break;
1418     }
1419   }
1420   while (!Worklist.empty()) {
1421     auto *Phi = Worklist.pop_back_val();
1422     auto *OriginalValue = Phi->getIncomingValue(0);
1423     Phi->replaceAllUsesWith(OriginalValue);
1424   }
1425 }
1426
1427 static void rewritePHIs(BasicBlock &BB) {
1428   // For every incoming edge we will create a block holding all
1429   // incoming values in a single PHI nodes.
1430   //
1431   // loop:
1432   //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1433   //
1434   // It will create:
1435   //
1436   // loop.from.entry:
1437   //    %n.loop.pre = phi i32 [%n, %entry]
1438   //    br %label loop
1439   // loop.from.loop:
1440   //    %inc.loop.pre = phi i32 [%inc, %loop]
1441   //    br %label loop
1442   //
1443   // After this rewrite, further analysis will ignore any phi nodes with more
1444   // than one incoming edge.
1445
1446   // TODO: Simplify PHINodes in the basic block to remove duplicate
1447   // predecessors.
1448
1449   // Special case for CleanupPad: all EH blocks must have the same unwind edge
1450   // so we need to create an additional "dispatcher" block.
1451   if (auto *CleanupPad =
1452           dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
1453     SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1454     for (BasicBlock *Pred : Preds) {
1455       if (CatchSwitchInst *CS =
1456               dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
1457         // CleanupPad with a CatchSwitch predecessor: therefore this is an
1458         // unwind destination that needs to be handle specially.
1459         assert(CS->getUnwindDest() == &BB);
1460         (void)CS;
1461         rewritePHIsForCleanupPad(&BB, CleanupPad);
1462         return;
1463       }
1464     }
1465   }
1466
1467   LandingPadInst *LandingPad = nullptr;
1468   PHINode *ReplPHI = nullptr;
1469   if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1470     // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1471     // We replace the original landing pad with a PHINode that will collect the
1472     // results from all of them.
1473     ReplPHI = PHINode::Create(LandingPad->getType(), 1, "");
1474     ReplPHI->insertBefore(LandingPad->getIterator());
1475     ReplPHI->takeName(LandingPad);
1476     LandingPad->replaceAllUsesWith(ReplPHI);
1477     // We will erase the original landing pad at the end of this function after
1478     // ehAwareSplitEdge cloned it in the transition blocks.
1479   }
1480
1481   SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
1482   for (BasicBlock *Pred : Preds) {
1483     auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1484     IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1485
1486     // Stop the moving of values at ReplPHI, as this is either null or the PHI
1487     // that replaced the landing pad.
1488     movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
1489   }
1490
1491   if (LandingPad) {
1492     // Calls to ehAwareSplitEdge function cloned the original lading pad.
1493     // No longer need it.
1494     LandingPad->eraseFromParent();
1495   }
1496 }
1497
1498 static void rewritePHIs(Function &F) {
1499   SmallVector<BasicBlock *, 8> WorkList;
1500
1501   for (BasicBlock &BB : F)
1502     if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1503       if (PN->getNumIncomingValues() > 1)
1504         WorkList.push_back(&BB);
1505
1506   for (BasicBlock *BB : WorkList)
1507     rewritePHIs(*BB);
1508 }
1509
1510 // Splits the block at a particular instruction unless it is the first
1511 // instruction in the block with a single predecessor.
1512 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1513   auto *BB = I->getParent();
1514   if (&BB->front() == I) {
1515     if (BB->getSinglePredecessor()) {
1516       BB->setName(Name);
1517       return BB;
1518     }
1519   }
1520   return BB->splitBasicBlock(I, Name);
1521 }
1522
1523 // Split above and below a particular instruction so that it
1524 // will be all alone by itself in a block.
1525 static void splitAround(Instruction *I, const Twine &Name) {
1526   splitBlockIfNotFirst(I, Name);
1527   splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1528 }
1529
1530 /// After we split the coroutine, will the given basic block be along
1531 /// an obvious exit path for the resumption function?
1532 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1533                                               unsigned depth = 3) {
1534   // If we've bottomed out our depth count, stop searching and assume
1535   // that the path might loop back.
1536   if (depth == 0) return false;
1537
1538   // If this is a suspend block, we're about to exit the resumption function.
1539   if (coro::isSuspendBlock(BB))
1540     return true;
1541
1542   // Recurse into the successors.
1543   for (auto *Succ : successors(BB)) {
1544     if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1545       return false;
1546   }
1547
1548   // If none of the successors leads back in a loop, we're on an exit/abort.
1549   return true;
1550 }
1551
1552 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1553   // Look for a free that isn't sufficiently obviously followed by
1554   // either a suspend or a termination, i.e. something that will leave
1555   // the coro resumption frame.
1556   for (auto *U : AI->users()) {
1557     auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1558     if (!FI) continue;
1559
1560     if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1561       return true;
1562   }
1563
1564   // If we never found one, we don't need a stack save.
1565   return false;
1566 }
1567
1568 /// Turn each of the given local allocas into a normal (dynamic) alloca
1569 /// instruction.
1570 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1571                               SmallVectorImpl<Instruction*> &DeadInsts) {
1572   for (auto *AI : LocalAllocas) {
1573     IRBuilder<> Builder(AI);
1574
1575     // Save the stack depth.  Try to avoid doing this if the stackrestore
1576     // is going to immediately precede a return or something.
1577     Value *StackSave = nullptr;
1578     if (localAllocaNeedsStackSave(AI))
1579       StackSave = Builder.CreateStackSave();
1580
1581     // Allocate memory.
1582     auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1583     Alloca->setAlignment(AI->getAlignment());
1584
1585     for (auto *U : AI->users()) {
1586       // Replace gets with the allocation.
1587       if (isa<CoroAllocaGetInst>(U)) {
1588         U->replaceAllUsesWith(Alloca);
1589
1590       // Replace frees with stackrestores.  This is safe because
1591       // alloca.alloc is required to obey a stack discipline, although we
1592       // don't enforce that structurally.
1593       } else {
1594         auto FI = cast<CoroAllocaFreeInst>(U);
1595         if (StackSave) {
1596           Builder.SetInsertPoint(FI);
1597           Builder.CreateStackRestore(StackSave);
1598         }
1599       }
1600       DeadInsts.push_back(cast<Instruction>(U));
1601     }
1602
1603     DeadInsts.push_back(AI);
1604   }
1605 }
1606
1607 /// Get the current swifterror value.
1608 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1609                                      coro::Shape &Shape) {
1610   // Make a fake function pointer as a sort of intrinsic.
1611   auto FnTy = FunctionType::get(ValueTy, {}, false);
1612   auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1613
1614   auto Call = Builder.CreateCall(FnTy, Fn, {});
1615   Shape.SwiftErrorOps.push_back(Call);
1616
1617   return Call;
1618 }
1619
1620 /// Set the given value as the current swifterror value.
1621 ///
1622 /// Returns a slot that can be used as a swifterror slot.
1623 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1624                                      coro::Shape &Shape) {
1625   // Make a fake function pointer as a sort of intrinsic.
1626   auto FnTy = FunctionType::get(Builder.getPtrTy(),
1627                                 {V->getType()}, false);
1628   auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
1629
1630   auto Call = Builder.CreateCall(FnTy, Fn, { V });
1631   Shape.SwiftErrorOps.push_back(Call);
1632
1633   return Call;
1634 }
1635
1636 /// Set the swifterror value from the given alloca before a call,
1637 /// then put in back in the alloca afterwards.
1638 ///
1639 /// Returns an address that will stand in for the swifterror slot
1640 /// until splitting.
1641 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1642                                                  AllocaInst *Alloca,
1643                                                  coro::Shape &Shape) {
1644   auto ValueTy = Alloca->getAllocatedType();
1645   IRBuilder<> Builder(Call);
1646
1647   // Load the current value from the alloca and set it as the
1648   // swifterror value.
1649   auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1650   auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1651
1652   // Move to after the call.  Since swifterror only has a guaranteed
1653   // value on normal exits, we can ignore implicit and explicit unwind
1654   // edges.
1655   if (isa<CallInst>(Call)) {
1656     Builder.SetInsertPoint(Call->getNextNode());
1657   } else {
1658     auto Invoke = cast<InvokeInst>(Call);
1659     Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1660   }
1661
1662   // Get the current swifterror value and store it to the alloca.
1663   auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1664   Builder.CreateStore(ValueAfterCall, Alloca);
1665
1666   return Addr;
1667 }
1668
1669 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1670 /// intrinsics and attempting to MemToReg the alloca away.
1671 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1672                                       coro::Shape &Shape) {
1673   for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
1674     // swifterror values can only be used in very specific ways.
1675     // We take advantage of that here.
1676     auto User = Use.getUser();
1677     if (isa<LoadInst>(User) || isa<StoreInst>(User))
1678       continue;
1679
1680     assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1681     auto Call = cast<Instruction>(User);
1682
1683     auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1684
1685     // Use the returned slot address as the call argument.
1686     Use.set(Addr);
1687   }
1688
1689   // All the uses should be loads and stores now.
1690   assert(isAllocaPromotable(Alloca));
1691 }
1692
1693 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1694 /// and then loading and storing in the prologue and epilog.
1695 ///
1696 /// The argument keeps the swifterror flag.
1697 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1698                                         coro::Shape &Shape,
1699                              SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1700   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1701
1702   auto ArgTy = cast<PointerType>(Arg.getType());
1703   auto ValueTy = PointerType::getUnqual(F.getContext());
1704
1705   // Reduce to the alloca case:
1706
1707   // Create an alloca and replace all uses of the arg with it.
1708   auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1709   Arg.replaceAllUsesWith(Alloca);
1710
1711   // Set an initial value in the alloca.  swifterror is always null on entry.
1712   auto InitialValue = Constant::getNullValue(ValueTy);
1713   Builder.CreateStore(InitialValue, Alloca);
1714
1715   // Find all the suspends in the function and save and restore around them.
1716   for (auto *Suspend : Shape.CoroSuspends) {
1717     (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1718   }
1719
1720   // Find all the coro.ends in the function and restore the error value.
1721   for (auto *End : Shape.CoroEnds) {
1722     Builder.SetInsertPoint(End);
1723     auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1724     (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1725   }
1726
1727   // Now we can use the alloca logic.
1728   AllocasToPromote.push_back(Alloca);
1729   eliminateSwiftErrorAlloca(F, Alloca, Shape);
1730 }
1731
1732 /// Eliminate all problematic uses of swifterror arguments and allocas
1733 /// from the function.  We'll fix them up later when splitting the function.
1734 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1735   SmallVector<AllocaInst*, 4> AllocasToPromote;
1736
1737   // Look for a swifterror argument.
1738   for (auto &Arg : F.args()) {
1739     if (!Arg.hasSwiftErrorAttr()) continue;
1740
1741     eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1742     break;
1743   }
1744
1745   // Look for swifterror allocas.
1746   for (auto &Inst : F.getEntryBlock()) {
1747     auto Alloca = dyn_cast<AllocaInst>(&Inst);
1748     if (!Alloca || !Alloca->isSwiftError()) continue;
1749
1750     // Clear the swifterror flag.
1751     Alloca->setSwiftError(false);
1752
1753     AllocasToPromote.push_back(Alloca);
1754     eliminateSwiftErrorAlloca(F, Alloca, Shape);
1755   }
1756
1757   // If we have any allocas to promote, compute a dominator tree and
1758   // promote them en masse.
1759   if (!AllocasToPromote.empty()) {
1760     DominatorTree DT(F);
1761     PromoteMemToReg(AllocasToPromote, DT);
1762   }
1763 }
1764
1765 /// For each local variable that all of its user are only used inside one of
1766 /// suspended region, we sink their lifetime.start markers to the place where
1767 /// after the suspend block. Doing so minimizes the lifetime of each variable,
1768 /// hence minimizing the amount of data we end up putting on the frame.
1769 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1770                                      SuspendCrossingInfo &Checker,
1771                                      const DominatorTree &DT) {
1772   if (F.hasOptNone())
1773     return;
1774
1775   // Collect all possible basic blocks which may dominate all uses of allocas.
1776   SmallPtrSet<BasicBlock *, 4> DomSet;
1777   DomSet.insert(&F.getEntryBlock());
1778   for (auto *CSI : Shape.CoroSuspends) {
1779     BasicBlock *SuspendBlock = CSI->getParent();
1780     assert(coro::isSuspendBlock(SuspendBlock) &&
1781            SuspendBlock->getSingleSuccessor() &&
1782            "should have split coro.suspend into its own block");
1783     DomSet.insert(SuspendBlock->getSingleSuccessor());
1784   }
1785
1786   for (Instruction &I : instructions(F)) {
1787     AllocaInst* AI = dyn_cast<AllocaInst>(&I);
1788     if (!AI)
1789       continue;
1790
1791     for (BasicBlock *DomBB : DomSet) {
1792       bool Valid = true;
1793       SmallVector<Instruction *, 1> Lifetimes;
1794
1795       auto isLifetimeStart = [](Instruction* I) {
1796         if (auto* II = dyn_cast<IntrinsicInst>(I))
1797           return II->getIntrinsicID() == Intrinsic::lifetime_start;
1798         return false;
1799       };
1800
1801       auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
1802         if (isLifetimeStart(U)) {
1803           Lifetimes.push_back(U);
1804           return true;
1805         }
1806         if (!U->hasOneUse() || U->stripPointerCasts() != AI)
1807           return false;
1808         if (isLifetimeStart(U->user_back())) {
1809           Lifetimes.push_back(U->user_back());
1810           return true;
1811         }
1812         return false;
1813       };
1814
1815       for (User *U : AI->users()) {
1816         Instruction *UI = cast<Instruction>(U);
1817         // For all users except lifetime.start markers, if they are all
1818         // dominated by one of the basic blocks and do not cross
1819         // suspend points as well, then there is no need to spill the
1820         // instruction.
1821         if (!DT.dominates(DomBB, UI->getParent()) ||
1822             Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
1823           // Skip lifetime.start, GEP and bitcast used by lifetime.start
1824           // markers.
1825           if (collectLifetimeStart(UI, AI))
1826             continue;
1827           Valid = false;
1828           break;
1829         }
1830       }
1831       // Sink lifetime.start markers to dominate block when they are
1832       // only used outside the region.
1833       if (Valid && Lifetimes.size() != 0) {
1834         auto *NewLifetime = Lifetimes[0]->clone();
1835         NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI);
1836         NewLifetime->insertBefore(DomBB->getTerminator());
1837
1838         // All the outsided lifetime.start markers are no longer necessary.
1839         for (Instruction *S : Lifetimes)
1840           S->eraseFromParent();
1841
1842         break;
1843       }
1844     }
1845   }
1846 }
1847
1848 static std::optional<std::pair<Value &, DIExpression &>>
1849 salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1850                      bool UseEntryValue, Function *F, Value *Storage,
1851                      DIExpression *Expr, bool SkipOutermostLoad) {
1852   IRBuilder<> Builder(F->getContext());
1853   auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
1854   while (isa<IntrinsicInst>(InsertPt))
1855     ++InsertPt;
1856   Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
1857
1858   while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
1859     if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
1860       Storage = LdInst->getPointerOperand();
1861       // FIXME: This is a heuristic that works around the fact that
1862       // LLVM IR debug intrinsics cannot yet distinguish between
1863       // memory and value locations: Because a dbg.declare(alloca) is
1864       // implicitly a memory location no DW_OP_deref operation for the
1865       // last direct load from an alloca is necessary.  This condition
1866       // effectively drops the *last* DW_OP_deref in the expression.
1867       if (!SkipOutermostLoad)
1868         Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1869     } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
1870       Storage = StInst->getValueOperand();
1871     } else {
1872       SmallVector<uint64_t, 16> Ops;
1873       SmallVector<Value *, 0> AdditionalValues;
1874       Value *Op = llvm::salvageDebugInfoImpl(
1875           *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
1876           AdditionalValues);
1877       if (!Op || !AdditionalValues.empty()) {
1878         // If salvaging failed or salvaging produced more than one location
1879         // operand, give up.
1880         break;
1881       }
1882       Storage = Op;
1883       Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
1884     }
1885     SkipOutermostLoad = false;
1886   }
1887   if (!Storage)
1888     return std::nullopt;
1889
1890   auto *StorageAsArg = dyn_cast<Argument>(Storage);
1891   const bool IsSwiftAsyncArg =
1892       StorageAsArg && StorageAsArg->hasAttribute(Attribute::SwiftAsync);
1893
1894   // Swift async arguments are described by an entry value of the ABI-defined
1895   // register containing the coroutine context.
1896   // Entry values in variadic expressions are not supported.
1897   if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
1898       Expr->isSingleLocationExpression())
1899     Expr = DIExpression::prepend(Expr, DIExpression::EntryValue);
1900
1901   // If the coroutine frame is an Argument, store it in an alloca to improve
1902   // its availability (e.g. registers may be clobbered).
1903   // Avoid this if the value is guaranteed to be available through other means
1904   // (e.g. swift ABI guarantees).
1905   if (StorageAsArg && !IsSwiftAsyncArg) {
1906     auto &Cached = ArgToAllocaMap[StorageAsArg];
1907     if (!Cached) {
1908       Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
1909                                     Storage->getName() + ".debug");
1910       Builder.CreateStore(Storage, Cached);
1911     }
1912     Storage = Cached;
1913     // FIXME: LLVM lacks nuanced semantics to differentiate between
1914     // memory and direct locations at the IR level. The backend will
1915     // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
1916     // location. Thus, if there are deref and offset operations in the
1917     // expression, we need to add a DW_OP_deref at the *start* of the
1918     // expression to first load the contents of the alloca before
1919     // adjusting it with the expression.
1920     Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
1921   }
1922
1923   Expr = Expr->foldConstantMath();
1924   return {{*Storage, *Expr}};
1925 }
1926
1927 void coro::salvageDebugInfo(
1928     SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1929     DbgVariableIntrinsic &DVI, bool UseEntryValue) {
1930
1931   Function *F = DVI.getFunction();
1932   // Follow the pointer arithmetic all the way to the incoming
1933   // function argument and convert into a DIExpression.
1934   bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
1935   Value *OriginalStorage = DVI.getVariableLocationOp(0);
1936
1937   auto SalvagedInfo =
1938       ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1939                              DVI.getExpression(), SkipOutermostLoad);
1940   if (!SalvagedInfo)
1941     return;
1942
1943   Value *Storage = &SalvagedInfo->first;
1944   DIExpression *Expr = &SalvagedInfo->second;
1945
1946   DVI.replaceVariableLocationOp(OriginalStorage, Storage);
1947   DVI.setExpression(Expr);
1948   // We only hoist dbg.declare today since it doesn't make sense to hoist
1949   // dbg.value since it does not have the same function wide guarantees that
1950   // dbg.declare does.
1951   if (isa<DbgDeclareInst>(DVI)) {
1952     std::optional<BasicBlock::iterator> InsertPt;
1953     if (auto *I = dyn_cast<Instruction>(Storage)) {
1954       InsertPt = I->getInsertionPointAfterDef();
1955       // Update DILocation only if variable was not inlined.
1956       DebugLoc ILoc = I->getDebugLoc();
1957       DebugLoc DVILoc = DVI.getDebugLoc();
1958       if (ILoc && DVILoc &&
1959           DVILoc->getScope()->getSubprogram() ==
1960               ILoc->getScope()->getSubprogram())
1961         DVI.setDebugLoc(I->getDebugLoc());
1962     } else if (isa<Argument>(Storage))
1963       InsertPt = F->getEntryBlock().begin();
1964     if (InsertPt)
1965       DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
1966   }
1967 }
1968
1969 void coro::salvageDebugInfo(
1970     SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1971     DbgVariableRecord &DVR, bool UseEntryValue) {
1972
1973   Function *F = DVR.getFunction();
1974   // Follow the pointer arithmetic all the way to the incoming
1975   // function argument and convert into a DIExpression.
1976   bool SkipOutermostLoad = DVR.isDbgDeclare();
1977   Value *OriginalStorage = DVR.getVariableLocationOp(0);
1978
1979   auto SalvagedInfo =
1980       ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
1981                              DVR.getExpression(), SkipOutermostLoad);
1982   if (!SalvagedInfo)
1983     return;
1984
1985   Value *Storage = &SalvagedInfo->first;
1986   DIExpression *Expr = &SalvagedInfo->second;
1987
1988   DVR.replaceVariableLocationOp(OriginalStorage, Storage);
1989   DVR.setExpression(Expr);
1990   // We only hoist dbg.declare today since it doesn't make sense to hoist
1991   // dbg.value since it does not have the same function wide guarantees that
1992   // dbg.declare does.
1993   if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
1994     std::optional<BasicBlock::iterator> InsertPt;
1995     if (auto *I = dyn_cast<Instruction>(Storage)) {
1996       InsertPt = I->getInsertionPointAfterDef();
1997       // Update DILocation only if variable was not inlined.
1998       DebugLoc ILoc = I->getDebugLoc();
1999       DebugLoc DVRLoc = DVR.getDebugLoc();
2000       if (ILoc && DVRLoc &&
2001           DVRLoc->getScope()->getSubprogram() ==
2002               ILoc->getScope()->getSubprogram())
2003         DVR.setDebugLoc(ILoc);
2004     } else if (isa<Argument>(Storage))
2005       InsertPt = F->getEntryBlock().begin();
2006     if (InsertPt) {
2007       DVR.removeFromParent();
2008       (*InsertPt)->getParent()->insertDbgRecordBefore(&DVR, *InsertPt);
2009     }
2010   }
2011 }
2012
2013 void coro::normalizeCoroutine(Function &F, coro::Shape &Shape,
2014                               TargetTransformInfo &TTI) {
2015   // Don't eliminate swifterror in async functions that won't be split.
2016   if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2017     eliminateSwiftError(F, Shape);
2018
2019   if (Shape.ABI == coro::ABI::Switch &&
2020       Shape.SwitchLowering.PromiseAlloca) {
2021     Shape.getSwitchCoroId()->clearPromise();
2022   }
2023
2024   // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
2025   // intrinsics are in their own blocks to simplify the logic of building up
2026   // SuspendCrossing data.
2027   for (auto *CSI : Shape.CoroSuspends) {
2028     if (auto *Save = CSI->getCoroSave())
2029       splitAround(Save, "CoroSave");
2030     splitAround(CSI, "CoroSuspend");
2031   }
2032
2033   // Put CoroEnds into their own blocks.
2034   for (AnyCoroEndInst *CE : Shape.CoroEnds) {
2035     splitAround(CE, "CoroEnd");
2036
2037     // Emit the musttail call function in a new block before the CoroEnd.
2038     // We do this here so that the right suspend crossing info is computed for
2039     // the uses of the musttail call function call. (Arguments to the coro.end
2040     // instructions would be ignored)
2041     if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
2042       auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
2043       if (!MustTailCallFn)
2044         continue;
2045       IRBuilder<> Builder(AsyncEnd);
2046       SmallVector<Value *, 8> Args(AsyncEnd->args());
2047       auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
2048       auto *Call = coro::createMustTailCall(
2049           AsyncEnd->getDebugLoc(), MustTailCallFn, TTI, Arguments, Builder);
2050       splitAround(Call, "MustTailCall.Before.CoroEnd");
2051     }
2052   }
2053
2054   // Later code makes structural assumptions about single predecessors phis e.g
2055   // that they are not live across a suspend point.
2056   cleanupSinglePredPHIs(F);
2057
2058   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2059   // never have its definition separated from the PHI by the suspend point.
2060   rewritePHIs(F);
2061 }
2062
2063 void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) {
2064   SuspendCrossingInfo Checker(F, Shape.CoroSuspends, Shape.CoroEnds);
2065   doRematerializations(F, Checker, IsMaterializable);
2066
2067   const DominatorTree DT(F);
2068   if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2069       Shape.ABI != coro::ABI::RetconOnce)
2070     sinkLifetimeStartMarkers(F, Shape, Checker, DT);
2071
2072   // All values (that are not allocas) that needs to be spilled to the frame.
2073   coro::SpillInfo Spills;
2074   // All values defined as allocas that need to live in the frame.
2075   SmallVector<coro::AllocaInfo, 8> Allocas;
2076
2077   // Collect the spills for arguments and other not-materializable values.
2078   coro::collectSpillsFromArgs(Spills, F, Checker);
2079   SmallVector<Instruction *, 4> DeadInstructions;
2080   SmallVector<CoroAllocaAllocInst *, 4> LocalAllocas;
2081   coro::collectSpillsAndAllocasFromInsts(Spills, Allocas, DeadInstructions,
2082                                          LocalAllocas, F, Checker, DT, Shape);
2083   coro::collectSpillsFromDbgInfo(Spills, F, Checker);
2084
2085   LLVM_DEBUG(dumpAllocas(Allocas));
2086   LLVM_DEBUG(dumpSpills("Spills", Spills));
2087
2088   if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
2089       Shape.ABI == coro::ABI::Async)
2090     sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas);
2091
2092   // Build frame
2093   FrameDataInfo FrameData(Spills, Allocas);
2094   Shape.FrameTy = buildFrameType(F, Shape, FrameData, OptimizeFrame);
2095   Shape.FramePtr = Shape.CoroBegin;
2096   // For now, this works for C++ programs only.
2097   buildFrameDebugInfo(F, Shape, FrameData);
2098   // Insert spills and reloads
2099   insertSpills(FrameData, Shape);
2100   lowerLocalAllocas(LocalAllocas, DeadInstructions);
2101
2102   for (auto *I : DeadInstructions)
2103     I->eraseFromParent();
2104 }