Clang] Fix expansion of response files in -Wp after integrated-cc1 change
[llvm-project.git] / llvm / lib / Transforms / Coroutines / CoroSplit.cpp
blob66cb3e74e53e69680d9a2438b53196f4a168bda6
1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass builds the coroutine frame and outlines resume and destroy parts
9 // of the coroutine into separate functions.
11 // We present a coroutine to an LLVM as an ordinary function with suspension
12 // points marked up with intrinsics. We let the optimizer party on the coroutine
13 // as a single function for as long as possible. Shortly before the coroutine is
14 // eligible to be inlined into its callers, we split up the coroutine into parts
15 // corresponding to an initial, resume and destroy invocations of the coroutine,
16 // add them to the current SCC and restart the IPO pipeline to optimize the
17 // coroutine subfunctions we extracted before proceeding to the caller of the
18 // coroutine.
19 //===----------------------------------------------------------------------===//
21 #include "CoroInstr.h"
22 #include "CoroInternal.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/Analysis/CallGraph.h"
29 #include "llvm/Analysis/CallGraphSCCPass.h"
30 #include "llvm/IR/Argument.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/GlobalVariable.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/InstIterator.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/IR/LegacyPassManager.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/IR/Verifier.h"
54 #include "llvm/InitializePasses.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/PrettyStackTrace.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include "llvm/Transforms/Scalar.h"
61 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
62 #include "llvm/Transforms/Utils/Cloning.h"
63 #include "llvm/Transforms/Utils/Local.h"
64 #include "llvm/Transforms/Utils/ValueMapper.h"
65 #include <cassert>
66 #include <cstddef>
67 #include <cstdint>
68 #include <initializer_list>
69 #include <iterator>
71 using namespace llvm;
73 #define DEBUG_TYPE "coro-split"
75 namespace {
77 /// A little helper class for building
78 class CoroCloner {
79 public:
80 enum class Kind {
81 /// The shared resume function for a switch lowering.
82 SwitchResume,
84 /// The shared unwind function for a switch lowering.
85 SwitchUnwind,
87 /// The shared cleanup function for a switch lowering.
88 SwitchCleanup,
90 /// An individual continuation function.
91 Continuation,
93 private:
94 Function &OrigF;
95 Function *NewF;
96 const Twine &Suffix;
97 coro::Shape &Shape;
98 Kind FKind;
99 ValueToValueMapTy VMap;
100 IRBuilder<> Builder;
101 Value *NewFramePtr = nullptr;
102 Value *SwiftErrorSlot = nullptr;
104 /// The active suspend instruction; meaningful only for continuation ABIs.
105 AnyCoroSuspendInst *ActiveSuspend = nullptr;
107 public:
108 /// Create a cloner for a switch lowering.
109 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
110 Kind FKind)
111 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape),
112 FKind(FKind), Builder(OrigF.getContext()) {
113 assert(Shape.ABI == coro::ABI::Switch);
116 /// Create a cloner for a continuation lowering.
117 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
118 Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
119 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
120 FKind(Kind::Continuation), Builder(OrigF.getContext()),
121 ActiveSuspend(ActiveSuspend) {
122 assert(Shape.ABI == coro::ABI::Retcon ||
123 Shape.ABI == coro::ABI::RetconOnce);
124 assert(NewF && "need existing function for continuation");
125 assert(ActiveSuspend && "need active suspend point for continuation");
128 Function *getFunction() const {
129 assert(NewF != nullptr && "declaration not yet set");
130 return NewF;
133 void create();
135 private:
136 bool isSwitchDestroyFunction() {
137 switch (FKind) {
138 case Kind::Continuation:
139 case Kind::SwitchResume:
140 return false;
141 case Kind::SwitchUnwind:
142 case Kind::SwitchCleanup:
143 return true;
145 llvm_unreachable("Unknown CoroCloner::Kind enum");
148 void createDeclaration();
149 void replaceEntryBlock();
150 Value *deriveNewFramePointer();
151 void replaceRetconSuspendUses();
152 void replaceCoroSuspends();
153 void replaceCoroEnds();
154 void replaceSwiftErrorOps();
155 void handleFinalSuspend();
156 void maybeFreeContinuationStorage();
159 } // end anonymous namespace
161 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
162 const coro::Shape &Shape, Value *FramePtr,
163 CallGraph *CG) {
164 assert(Shape.ABI == coro::ABI::Retcon ||
165 Shape.ABI == coro::ABI::RetconOnce);
166 if (Shape.RetconLowering.IsFrameInlineInStorage)
167 return;
169 Shape.emitDealloc(Builder, FramePtr, CG);
172 /// Replace a non-unwind call to llvm.coro.end.
173 static void replaceFallthroughCoroEnd(CoroEndInst *End,
174 const coro::Shape &Shape, Value *FramePtr,
175 bool InResume, CallGraph *CG) {
176 // Start inserting right before the coro.end.
177 IRBuilder<> Builder(End);
179 // Create the return instruction.
180 switch (Shape.ABI) {
181 // The cloned functions in switch-lowering always return void.
182 case coro::ABI::Switch:
183 // coro.end doesn't immediately end the coroutine in the main function
184 // in this lowering, because we need to deallocate the coroutine.
185 if (!InResume)
186 return;
187 Builder.CreateRetVoid();
188 break;
190 // In unique continuation lowering, the continuations always return void.
191 // But we may have implicitly allocated storage.
192 case coro::ABI::RetconOnce:
193 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
194 Builder.CreateRetVoid();
195 break;
197 // In non-unique continuation lowering, we signal completion by returning
198 // a null continuation.
199 case coro::ABI::Retcon: {
200 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
201 auto RetTy = Shape.getResumeFunctionType()->getReturnType();
202 auto RetStructTy = dyn_cast<StructType>(RetTy);
203 PointerType *ContinuationTy =
204 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
206 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
207 if (RetStructTy) {
208 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
209 ReturnValue, 0);
211 Builder.CreateRet(ReturnValue);
212 break;
216 // Remove the rest of the block, by splitting it into an unreachable block.
217 auto *BB = End->getParent();
218 BB->splitBasicBlock(End);
219 BB->getTerminator()->eraseFromParent();
222 /// Replace an unwind call to llvm.coro.end.
223 static void replaceUnwindCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
224 Value *FramePtr, bool InResume, CallGraph *CG){
225 IRBuilder<> Builder(End);
227 switch (Shape.ABI) {
228 // In switch-lowering, this does nothing in the main function.
229 case coro::ABI::Switch:
230 if (!InResume)
231 return;
232 break;
234 // In continuation-lowering, this frees the continuation storage.
235 case coro::ABI::Retcon:
236 case coro::ABI::RetconOnce:
237 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
238 break;
241 // If coro.end has an associated bundle, add cleanupret instruction.
242 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
243 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
244 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
245 End->getParent()->splitBasicBlock(End);
246 CleanupRet->getParent()->getTerminator()->eraseFromParent();
250 static void replaceCoroEnd(CoroEndInst *End, const coro::Shape &Shape,
251 Value *FramePtr, bool InResume, CallGraph *CG) {
252 if (End->isUnwind())
253 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
254 else
255 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
257 auto &Context = End->getContext();
258 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
259 : ConstantInt::getFalse(Context));
260 End->eraseFromParent();
263 // Create an entry block for a resume function with a switch that will jump to
264 // suspend points.
265 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
266 assert(Shape.ABI == coro::ABI::Switch);
267 LLVMContext &C = F.getContext();
269 // resume.entry:
270 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
271 // i32 2
272 // % index = load i32, i32* %index.addr
273 // switch i32 %index, label %unreachable [
274 // i32 0, label %resume.0
275 // i32 1, label %resume.1
276 // ...
277 // ]
279 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
280 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
282 IRBuilder<> Builder(NewEntry);
283 auto *FramePtr = Shape.FramePtr;
284 auto *FrameTy = Shape.FrameTy;
285 auto *GepIndex = Builder.CreateStructGEP(
286 FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
287 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
288 auto *Switch =
289 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
290 Shape.SwitchLowering.ResumeSwitch = Switch;
292 size_t SuspendIndex = 0;
293 for (auto *AnyS : Shape.CoroSuspends) {
294 auto *S = cast<CoroSuspendInst>(AnyS);
295 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
297 // Replace CoroSave with a store to Index:
298 // %index.addr = getelementptr %f.frame... (index field number)
299 // store i32 0, i32* %index.addr1
300 auto *Save = S->getCoroSave();
301 Builder.SetInsertPoint(Save);
302 if (S->isFinal()) {
303 // Final suspend point is represented by storing zero in ResumeFnAddr.
304 auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr,
305 coro::Shape::SwitchFieldIndex::Resume,
306 "ResumeFn.addr");
307 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
308 cast<PointerType>(GepIndex->getType())->getElementType()));
309 Builder.CreateStore(NullPtr, GepIndex);
310 } else {
311 auto *GepIndex = Builder.CreateStructGEP(
312 FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr");
313 Builder.CreateStore(IndexVal, GepIndex);
315 Save->replaceAllUsesWith(ConstantTokenNone::get(C));
316 Save->eraseFromParent();
318 // Split block before and after coro.suspend and add a jump from an entry
319 // switch:
321 // whateverBB:
322 // whatever
323 // %0 = call i8 @llvm.coro.suspend(token none, i1 false)
324 // switch i8 %0, label %suspend[i8 0, label %resume
325 // i8 1, label %cleanup]
326 // becomes:
328 // whateverBB:
329 // whatever
330 // br label %resume.0.landing
332 // resume.0: ; <--- jump from the switch in the resume.entry
333 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
334 // br label %resume.0.landing
336 // resume.0.landing:
337 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
338 // switch i8 % 1, label %suspend [i8 0, label %resume
339 // i8 1, label %cleanup]
341 auto *SuspendBB = S->getParent();
342 auto *ResumeBB =
343 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
344 auto *LandingBB = ResumeBB->splitBasicBlock(
345 S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
346 Switch->addCase(IndexVal, ResumeBB);
348 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
349 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
350 S->replaceAllUsesWith(PN);
351 PN->addIncoming(Builder.getInt8(-1), SuspendBB);
352 PN->addIncoming(S, ResumeBB);
354 ++SuspendIndex;
357 Builder.SetInsertPoint(UnreachBB);
358 Builder.CreateUnreachable();
360 Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
364 // Rewrite final suspend point handling. We do not use suspend index to
365 // represent the final suspend point. Instead we zero-out ResumeFnAddr in the
366 // coroutine frame, since it is undefined behavior to resume a coroutine
367 // suspended at the final suspend point. Thus, in the resume function, we can
368 // simply remove the last case (when coro::Shape is built, the final suspend
369 // point (if present) is always the last element of CoroSuspends array).
370 // In the destroy function, we add a code sequence to check if ResumeFnAddress
371 // is Null, and if so, jump to the appropriate label to handle cleanup from the
372 // final suspend point.
373 void CoroCloner::handleFinalSuspend() {
374 assert(Shape.ABI == coro::ABI::Switch &&
375 Shape.SwitchLowering.HasFinalSuspend);
376 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
377 auto FinalCaseIt = std::prev(Switch->case_end());
378 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
379 Switch->removeCase(FinalCaseIt);
380 if (isSwitchDestroyFunction()) {
381 BasicBlock *OldSwitchBB = Switch->getParent();
382 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
383 Builder.SetInsertPoint(OldSwitchBB->getTerminator());
384 auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr,
385 coro::Shape::SwitchFieldIndex::Resume,
386 "ResumeFn.addr");
387 auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(),
388 GepIndex);
389 auto *Cond = Builder.CreateIsNull(Load);
390 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
391 OldSwitchBB->getTerminator()->eraseFromParent();
395 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
396 const Twine &Suffix,
397 Module::iterator InsertBefore) {
398 Module *M = OrigF.getParent();
399 auto *FnTy = Shape.getResumeFunctionType();
401 Function *NewF =
402 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
403 OrigF.getName() + Suffix);
404 NewF->addParamAttr(0, Attribute::NonNull);
405 NewF->addParamAttr(0, Attribute::NoAlias);
407 M->getFunctionList().insert(InsertBefore, NewF);
409 return NewF;
412 /// Replace uses of the active llvm.coro.suspend.retcon call with the
413 /// arguments to the continuation function.
415 /// This assumes that the builder has a meaningful insertion point.
416 void CoroCloner::replaceRetconSuspendUses() {
417 assert(Shape.ABI == coro::ABI::Retcon ||
418 Shape.ABI == coro::ABI::RetconOnce);
420 auto NewS = VMap[ActiveSuspend];
421 if (NewS->use_empty()) return;
423 // Copy out all the continuation arguments after the buffer pointer into
424 // an easily-indexed data structure for convenience.
425 SmallVector<Value*, 8> Args;
426 for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I)
427 Args.push_back(&*I);
429 // If the suspend returns a single scalar value, we can just do a simple
430 // replacement.
431 if (!isa<StructType>(NewS->getType())) {
432 assert(Args.size() == 1);
433 NewS->replaceAllUsesWith(Args.front());
434 return;
437 // Try to peephole extracts of an aggregate return.
438 for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) {
439 auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser());
440 if (!EVI || EVI->getNumIndices() != 1)
441 continue;
443 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
444 EVI->eraseFromParent();
447 // If we have no remaining uses, we're done.
448 if (NewS->use_empty()) return;
450 // Otherwise, we need to create an aggregate.
451 Value *Agg = UndefValue::get(NewS->getType());
452 for (size_t I = 0, E = Args.size(); I != E; ++I)
453 Agg = Builder.CreateInsertValue(Agg, Args[I], I);
455 NewS->replaceAllUsesWith(Agg);
458 void CoroCloner::replaceCoroSuspends() {
459 Value *SuspendResult;
461 switch (Shape.ABI) {
462 // In switch lowering, replace coro.suspend with the appropriate value
463 // for the type of function we're extracting.
464 // Replacing coro.suspend with (0) will result in control flow proceeding to
465 // a resume label associated with a suspend point, replacing it with (1) will
466 // result in control flow proceeding to a cleanup label associated with this
467 // suspend point.
468 case coro::ABI::Switch:
469 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
470 break;
472 // In returned-continuation lowering, the arguments from earlier
473 // continuations are theoretically arbitrary, and they should have been
474 // spilled.
475 case coro::ABI::RetconOnce:
476 case coro::ABI::Retcon:
477 return;
480 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
481 // The active suspend was handled earlier.
482 if (CS == ActiveSuspend) continue;
484 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
485 MappedCS->replaceAllUsesWith(SuspendResult);
486 MappedCS->eraseFromParent();
490 void CoroCloner::replaceCoroEnds() {
491 for (CoroEndInst *CE : Shape.CoroEnds) {
492 // We use a null call graph because there's no call graph node for
493 // the cloned function yet. We'll just be rebuilding that later.
494 auto NewCE = cast<CoroEndInst>(VMap[CE]);
495 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
499 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
500 ValueToValueMapTy *VMap) {
501 Value *CachedSlot = nullptr;
502 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
503 if (CachedSlot) {
504 assert(CachedSlot->getType()->getPointerElementType() == ValueTy &&
505 "multiple swifterror slots in function with different types");
506 return CachedSlot;
509 // Check if the function has a swifterror argument.
510 for (auto &Arg : F.args()) {
511 if (Arg.isSwiftError()) {
512 CachedSlot = &Arg;
513 assert(Arg.getType()->getPointerElementType() == ValueTy &&
514 "swifterror argument does not have expected type");
515 return &Arg;
519 // Create a swifterror alloca.
520 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
521 auto Alloca = Builder.CreateAlloca(ValueTy);
522 Alloca->setSwiftError(true);
524 CachedSlot = Alloca;
525 return Alloca;
528 for (CallInst *Op : Shape.SwiftErrorOps) {
529 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
530 IRBuilder<> Builder(MappedOp);
532 // If there are no arguments, this is a 'get' operation.
533 Value *MappedResult;
534 if (Op->getNumArgOperands() == 0) {
535 auto ValueTy = Op->getType();
536 auto Slot = getSwiftErrorSlot(ValueTy);
537 MappedResult = Builder.CreateLoad(ValueTy, Slot);
538 } else {
539 assert(Op->getNumArgOperands() == 1);
540 auto Value = MappedOp->getArgOperand(0);
541 auto ValueTy = Value->getType();
542 auto Slot = getSwiftErrorSlot(ValueTy);
543 Builder.CreateStore(Value, Slot);
544 MappedResult = Slot;
547 MappedOp->replaceAllUsesWith(MappedResult);
548 MappedOp->eraseFromParent();
551 // If we're updating the original function, we've invalidated SwiftErrorOps.
552 if (VMap == nullptr) {
553 Shape.SwiftErrorOps.clear();
557 void CoroCloner::replaceSwiftErrorOps() {
558 ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
561 void CoroCloner::replaceEntryBlock() {
562 // In the original function, the AllocaSpillBlock is a block immediately
563 // following the allocation of the frame object which defines GEPs for
564 // all the allocas that have been moved into the frame, and it ends by
565 // branching to the original beginning of the coroutine. Make this
566 // the entry block of the cloned function.
567 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
568 Entry->setName("entry" + Suffix);
569 Entry->moveBefore(&NewF->getEntryBlock());
570 Entry->getTerminator()->eraseFromParent();
572 // Clear all predecessors of the new entry block. There should be
573 // exactly one predecessor, which we created when splitting out
574 // AllocaSpillBlock to begin with.
575 assert(Entry->hasOneUse());
576 auto BranchToEntry = cast<BranchInst>(Entry->user_back());
577 assert(BranchToEntry->isUnconditional());
578 Builder.SetInsertPoint(BranchToEntry);
579 Builder.CreateUnreachable();
580 BranchToEntry->eraseFromParent();
582 // TODO: move any allocas into Entry that weren't moved into the frame.
583 // (Currently we move all allocas into the frame.)
585 // Branch from the entry to the appropriate place.
586 Builder.SetInsertPoint(Entry);
587 switch (Shape.ABI) {
588 case coro::ABI::Switch: {
589 // In switch-lowering, we built a resume-entry block in the original
590 // function. Make the entry block branch to this.
591 auto *SwitchBB =
592 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
593 Builder.CreateBr(SwitchBB);
594 break;
597 case coro::ABI::Retcon:
598 case coro::ABI::RetconOnce: {
599 // In continuation ABIs, we want to branch to immediately after the
600 // active suspend point. Earlier phases will have put the suspend in its
601 // own basic block, so just thread our jump directly to its successor.
602 auto MappedCS = cast<CoroSuspendRetconInst>(VMap[ActiveSuspend]);
603 auto Branch = cast<BranchInst>(MappedCS->getNextNode());
604 assert(Branch->isUnconditional());
605 Builder.CreateBr(Branch->getSuccessor(0));
606 break;
611 /// Derive the value of the new frame pointer.
612 Value *CoroCloner::deriveNewFramePointer() {
613 // Builder should be inserting to the front of the new entry block.
615 switch (Shape.ABI) {
616 // In switch-lowering, the argument is the frame pointer.
617 case coro::ABI::Switch:
618 return &*NewF->arg_begin();
620 // In continuation-lowering, the argument is the opaque storage.
621 case coro::ABI::Retcon:
622 case coro::ABI::RetconOnce: {
623 Argument *NewStorage = &*NewF->arg_begin();
624 auto FramePtrTy = Shape.FrameTy->getPointerTo();
626 // If the storage is inline, just bitcast to the storage to the frame type.
627 if (Shape.RetconLowering.IsFrameInlineInStorage)
628 return Builder.CreateBitCast(NewStorage, FramePtrTy);
630 // Otherwise, load the real frame from the opaque storage.
631 auto FramePtrPtr =
632 Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo());
633 return Builder.CreateLoad(FramePtrPtr);
636 llvm_unreachable("bad ABI");
639 /// Clone the body of the original function into a resume function of
640 /// some sort.
641 void CoroCloner::create() {
642 // Create the new function if we don't already have one.
643 if (!NewF) {
644 NewF = createCloneDeclaration(OrigF, Shape, Suffix,
645 OrigF.getParent()->end());
648 // Replace all args with undefs. The buildCoroutineFrame algorithm already
649 // rewritten access to the args that occurs after suspend points with loads
650 // and stores to/from the coroutine frame.
651 for (Argument &A : OrigF.args())
652 VMap[&A] = UndefValue::get(A.getType());
654 SmallVector<ReturnInst *, 4> Returns;
656 // Ignore attempts to change certain attributes of the function.
657 // TODO: maybe there should be a way to suppress this during cloning?
658 auto savedVisibility = NewF->getVisibility();
659 auto savedUnnamedAddr = NewF->getUnnamedAddr();
660 auto savedDLLStorageClass = NewF->getDLLStorageClass();
662 // NewF's linkage (which CloneFunctionInto does *not* change) might not
663 // be compatible with the visibility of OrigF (which it *does* change),
664 // so protect against that.
665 auto savedLinkage = NewF->getLinkage();
666 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
668 CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns);
670 NewF->setLinkage(savedLinkage);
671 NewF->setVisibility(savedVisibility);
672 NewF->setUnnamedAddr(savedUnnamedAddr);
673 NewF->setDLLStorageClass(savedDLLStorageClass);
675 auto &Context = NewF->getContext();
677 // Replace the attributes of the new function:
678 auto OrigAttrs = NewF->getAttributes();
679 auto NewAttrs = AttributeList();
681 switch (Shape.ABI) {
682 case coro::ABI::Switch:
683 // Bootstrap attributes by copying function attributes from the
684 // original function. This should include optimization settings and so on.
685 NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
686 OrigAttrs.getFnAttributes());
687 break;
689 case coro::ABI::Retcon:
690 case coro::ABI::RetconOnce:
691 // If we have a continuation prototype, just use its attributes,
692 // full-stop.
693 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
694 break;
697 // Make the frame parameter nonnull and noalias.
698 NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull);
699 NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias);
701 switch (Shape.ABI) {
702 // In these ABIs, the cloned functions always return 'void', and the
703 // existing return sites are meaningless. Note that for unique
704 // continuations, this includes the returns associated with suspends;
705 // this is fine because we can't suspend twice.
706 case coro::ABI::Switch:
707 case coro::ABI::RetconOnce:
708 // Remove old returns.
709 for (ReturnInst *Return : Returns)
710 changeToUnreachable(Return, /*UseLLVMTrap=*/false);
711 break;
713 // With multi-suspend continuations, we'll already have eliminated the
714 // original returns and inserted returns before all the suspend points,
715 // so we want to leave any returns in place.
716 case coro::ABI::Retcon:
717 break;
720 NewF->setAttributes(NewAttrs);
721 NewF->setCallingConv(Shape.getResumeFunctionCC());
723 // Set up the new entry block.
724 replaceEntryBlock();
726 Builder.SetInsertPoint(&NewF->getEntryBlock().front());
727 NewFramePtr = deriveNewFramePointer();
729 // Remap frame pointer.
730 Value *OldFramePtr = VMap[Shape.FramePtr];
731 NewFramePtr->takeName(OldFramePtr);
732 OldFramePtr->replaceAllUsesWith(NewFramePtr);
734 // Remap vFrame pointer.
735 auto *NewVFrame = Builder.CreateBitCast(
736 NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
737 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
738 OldVFrame->replaceAllUsesWith(NewVFrame);
740 switch (Shape.ABI) {
741 case coro::ABI::Switch:
742 // Rewrite final suspend handling as it is not done via switch (allows to
743 // remove final case from the switch, since it is undefined behavior to
744 // resume the coroutine suspended at the final suspend point.
745 if (Shape.SwitchLowering.HasFinalSuspend)
746 handleFinalSuspend();
747 break;
749 case coro::ABI::Retcon:
750 case coro::ABI::RetconOnce:
751 // Replace uses of the active suspend with the corresponding
752 // continuation-function arguments.
753 assert(ActiveSuspend != nullptr &&
754 "no active suspend when lowering a continuation-style coroutine");
755 replaceRetconSuspendUses();
756 break;
759 // Handle suspends.
760 replaceCoroSuspends();
762 // Handle swifterror.
763 replaceSwiftErrorOps();
765 // Remove coro.end intrinsics.
766 replaceCoroEnds();
768 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
769 // to suppress deallocation code.
770 if (Shape.ABI == coro::ABI::Switch)
771 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
772 /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup);
775 // Create a resume clone by cloning the body of the original function, setting
776 // new entry block and replacing coro.suspend an appropriate value to force
777 // resume or cleanup pass for every suspend point.
778 static Function *createClone(Function &F, const Twine &Suffix,
779 coro::Shape &Shape, CoroCloner::Kind FKind) {
780 CoroCloner Cloner(F, Suffix, Shape, FKind);
781 Cloner.create();
782 return Cloner.getFunction();
785 /// Remove calls to llvm.coro.end in the original function.
786 static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
787 for (auto End : Shape.CoroEnds) {
788 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
792 static void replaceFrameSize(coro::Shape &Shape) {
793 if (Shape.CoroSizes.empty())
794 return;
796 // In the same function all coro.sizes should have the same result type.
797 auto *SizeIntrin = Shape.CoroSizes.back();
798 Module *M = SizeIntrin->getModule();
799 const DataLayout &DL = M->getDataLayout();
800 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
801 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
803 for (CoroSizeInst *CS : Shape.CoroSizes) {
804 CS->replaceAllUsesWith(SizeConstant);
805 CS->eraseFromParent();
809 // Create a global constant array containing pointers to functions provided and
810 // set Info parameter of CoroBegin to point at this constant. Example:
812 // @f.resumers = internal constant [2 x void(%f.frame*)*]
813 // [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
814 // define void @f() {
815 // ...
816 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
817 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
819 // Assumes that all the functions have the same signature.
820 static void setCoroInfo(Function &F, coro::Shape &Shape,
821 ArrayRef<Function *> Fns) {
822 // This only works under the switch-lowering ABI because coro elision
823 // only works on the switch-lowering ABI.
824 assert(Shape.ABI == coro::ABI::Switch);
826 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
827 assert(!Args.empty());
828 Function *Part = *Fns.begin();
829 Module *M = Part->getParent();
830 auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
832 auto *ConstVal = ConstantArray::get(ArrTy, Args);
833 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
834 GlobalVariable::PrivateLinkage, ConstVal,
835 F.getName() + Twine(".resumers"));
837 // Update coro.begin instruction to refer to this constant.
838 LLVMContext &C = F.getContext();
839 auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
840 Shape.getSwitchCoroId()->setInfo(BC);
843 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
844 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
845 Function *DestroyFn, Function *CleanupFn) {
846 assert(Shape.ABI == coro::ABI::Switch);
848 IRBuilder<> Builder(Shape.FramePtr->getNextNode());
849 auto *ResumeAddr = Builder.CreateStructGEP(
850 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
851 "resume.addr");
852 Builder.CreateStore(ResumeFn, ResumeAddr);
854 Value *DestroyOrCleanupFn = DestroyFn;
856 CoroIdInst *CoroId = Shape.getSwitchCoroId();
857 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
858 // If there is a CoroAlloc and it returns false (meaning we elide the
859 // allocation, use CleanupFn instead of DestroyFn).
860 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
863 auto *DestroyAddr = Builder.CreateStructGEP(
864 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
865 "destroy.addr");
866 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
869 static void postSplitCleanup(Function &F) {
870 removeUnreachableBlocks(F);
872 // For now, we do a mandatory verification step because we don't
873 // entirely trust this pass. Note that we don't want to add a verifier
874 // pass to FPM below because it will also verify all the global data.
875 verifyFunction(F);
877 legacy::FunctionPassManager FPM(F.getParent());
879 FPM.add(createSCCPPass());
880 FPM.add(createCFGSimplificationPass());
881 FPM.add(createEarlyCSEPass());
882 FPM.add(createCFGSimplificationPass());
884 FPM.doInitialization();
885 FPM.run(F);
886 FPM.doFinalization();
889 // Assuming we arrived at the block NewBlock from Prev instruction, store
890 // PHI's incoming values in the ResolvedValues map.
891 static void
892 scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
893 DenseMap<Value *, Value *> &ResolvedValues) {
894 auto *PrevBB = Prev->getParent();
895 for (PHINode &PN : NewBlock->phis()) {
896 auto V = PN.getIncomingValueForBlock(PrevBB);
897 // See if we already resolved it.
898 auto VI = ResolvedValues.find(V);
899 if (VI != ResolvedValues.end())
900 V = VI->second;
901 // Remember the value.
902 ResolvedValues[&PN] = V;
906 // Replace a sequence of branches leading to a ret, with a clone of a ret
907 // instruction. Suspend instruction represented by a switch, track the PHI
908 // values and select the correct case successor when possible.
909 static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
910 DenseMap<Value *, Value *> ResolvedValues;
911 BasicBlock *UnconditionalSucc = nullptr;
913 Instruction *I = InitialInst;
914 while (I->isTerminator()) {
915 if (isa<ReturnInst>(I)) {
916 if (I != InitialInst) {
917 // If InitialInst is an unconditional branch,
918 // remove PHI values that come from basic block of InitialInst
919 if (UnconditionalSucc)
920 for (PHINode &PN : UnconditionalSucc->phis()) {
921 int idx = PN.getBasicBlockIndex(InitialInst->getParent());
922 if (idx != -1)
923 PN.removeIncomingValue(idx);
925 ReplaceInstWithInst(InitialInst, I->clone());
927 return true;
929 if (auto *BR = dyn_cast<BranchInst>(I)) {
930 if (BR->isUnconditional()) {
931 BasicBlock *BB = BR->getSuccessor(0);
932 if (I == InitialInst)
933 UnconditionalSucc = BB;
934 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
935 I = BB->getFirstNonPHIOrDbgOrLifetime();
936 continue;
938 } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
939 Value *V = SI->getCondition();
940 auto it = ResolvedValues.find(V);
941 if (it != ResolvedValues.end())
942 V = it->second;
943 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
944 BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
945 scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
946 I = BB->getFirstNonPHIOrDbgOrLifetime();
947 continue;
950 return false;
952 return false;
955 // Add musttail to any resume instructions that is immediately followed by a
956 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
957 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
958 // This transformation is done only in the resume part of the coroutine that has
959 // identical signature and calling convention as the coro.resume call.
960 static void addMustTailToCoroResumes(Function &F) {
961 bool changed = false;
963 // Collect potential resume instructions.
964 SmallVector<CallInst *, 4> Resumes;
965 for (auto &I : instructions(F))
966 if (auto *Call = dyn_cast<CallInst>(&I))
967 if (auto *CalledValue = Call->getCalledValue())
968 // CoroEarly pass replaced coro resumes with indirect calls to an
969 // address return by CoroSubFnInst intrinsic. See if it is one of those.
970 if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts()))
971 Resumes.push_back(Call);
973 // Set musttail on those that are followed by a ret instruction.
974 for (CallInst *Call : Resumes)
975 if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
976 Call->setTailCallKind(CallInst::TCK_MustTail);
977 changed = true;
980 if (changed)
981 removeUnreachableBlocks(F);
984 // Coroutine has no suspend points. Remove heap allocation for the coroutine
985 // frame if possible.
986 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
987 auto *CoroBegin = Shape.CoroBegin;
988 auto *CoroId = CoroBegin->getId();
989 auto *AllocInst = CoroId->getCoroAlloc();
990 switch (Shape.ABI) {
991 case coro::ABI::Switch: {
992 auto SwitchId = cast<CoroIdInst>(CoroId);
993 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
994 if (AllocInst) {
995 IRBuilder<> Builder(AllocInst);
996 // FIXME: Need to handle overaligned members.
997 auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
998 auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
999 AllocInst->replaceAllUsesWith(Builder.getFalse());
1000 AllocInst->eraseFromParent();
1001 CoroBegin->replaceAllUsesWith(VFrame);
1002 } else {
1003 CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1005 break;
1008 case coro::ABI::Retcon:
1009 case coro::ABI::RetconOnce:
1010 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1011 break;
1014 CoroBegin->eraseFromParent();
1017 // SimplifySuspendPoint needs to check that there is no calls between
1018 // coro_save and coro_suspend, since any of the calls may potentially resume
1019 // the coroutine and if that is the case we cannot eliminate the suspend point.
1020 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1021 for (Instruction *I = From; I != To; I = I->getNextNode()) {
1022 // Assume that no intrinsic can resume the coroutine.
1023 if (isa<IntrinsicInst>(I))
1024 continue;
1026 if (CallSite(I))
1027 return true;
1029 return false;
1032 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1033 SmallPtrSet<BasicBlock *, 8> Set;
1034 SmallVector<BasicBlock *, 8> Worklist;
1036 Set.insert(SaveBB);
1037 Worklist.push_back(ResDesBB);
1039 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1040 // returns a token consumed by suspend instruction, all blocks in between
1041 // will have to eventually hit SaveBB when going backwards from ResDesBB.
1042 while (!Worklist.empty()) {
1043 auto *BB = Worklist.pop_back_val();
1044 Set.insert(BB);
1045 for (auto *Pred : predecessors(BB))
1046 if (Set.count(Pred) == 0)
1047 Worklist.push_back(Pred);
1050 // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1051 Set.erase(SaveBB);
1052 Set.erase(ResDesBB);
1054 for (auto *BB : Set)
1055 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1056 return true;
1058 return false;
1061 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1062 auto *SaveBB = Save->getParent();
1063 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1065 if (SaveBB == ResumeOrDestroyBB)
1066 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1068 // Any calls from Save to the end of the block?
1069 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1070 return true;
1072 // Any calls from begging of the block up to ResumeOrDestroy?
1073 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1074 ResumeOrDestroy))
1075 return true;
1077 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1078 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1079 return true;
1081 return false;
1084 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1085 // suspend point and replace it with nornal control flow.
1086 static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1087 CoroBeginInst *CoroBegin) {
1088 Instruction *Prev = Suspend->getPrevNode();
1089 if (!Prev) {
1090 auto *Pred = Suspend->getParent()->getSinglePredecessor();
1091 if (!Pred)
1092 return false;
1093 Prev = Pred->getTerminator();
1096 CallSite CS{Prev};
1097 if (!CS)
1098 return false;
1100 auto *CallInstr = CS.getInstruction();
1102 auto *Callee = CS.getCalledValue()->stripPointerCasts();
1104 // See if the callsite is for resumption or destruction of the coroutine.
1105 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1106 if (!SubFn)
1107 return false;
1109 // Does not refer to the current coroutine, we cannot do anything with it.
1110 if (SubFn->getFrame() != CoroBegin)
1111 return false;
1113 // See if the transformation is safe. Specifically, see if there are any
1114 // calls in between Save and CallInstr. They can potenitally resume the
1115 // coroutine rendering this optimization unsafe.
1116 auto *Save = Suspend->getCoroSave();
1117 if (hasCallsBetween(Save, CallInstr))
1118 return false;
1120 // Replace llvm.coro.suspend with the value that results in resumption over
1121 // the resume or cleanup path.
1122 Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1123 Suspend->eraseFromParent();
1124 Save->eraseFromParent();
1126 // No longer need a call to coro.resume or coro.destroy.
1127 if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
1128 BranchInst::Create(Invoke->getNormalDest(), Invoke);
1131 // Grab the CalledValue from CS before erasing the CallInstr.
1132 auto *CalledValue = CS.getCalledValue();
1133 CallInstr->eraseFromParent();
1135 // If no more users remove it. Usually it is a bitcast of SubFn.
1136 if (CalledValue != SubFn && CalledValue->user_empty())
1137 if (auto *I = dyn_cast<Instruction>(CalledValue))
1138 I->eraseFromParent();
1140 // Now we are good to remove SubFn.
1141 if (SubFn->user_empty())
1142 SubFn->eraseFromParent();
1144 return true;
1147 // Remove suspend points that are simplified.
1148 static void simplifySuspendPoints(coro::Shape &Shape) {
1149 // Currently, the only simplification we do is switch-lowering-specific.
1150 if (Shape.ABI != coro::ABI::Switch)
1151 return;
1153 auto &S = Shape.CoroSuspends;
1154 size_t I = 0, N = S.size();
1155 if (N == 0)
1156 return;
1157 while (true) {
1158 if (simplifySuspendPoint(cast<CoroSuspendInst>(S[I]), Shape.CoroBegin)) {
1159 if (--N == I)
1160 break;
1161 std::swap(S[I], S[N]);
1162 continue;
1164 if (++I == N)
1165 break;
1167 S.resize(N);
1170 static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
1171 SmallVectorImpl<Function *> &Clones) {
1172 assert(Shape.ABI == coro::ABI::Switch);
1174 createResumeEntryBlock(F, Shape);
1175 auto ResumeClone = createClone(F, ".resume", Shape,
1176 CoroCloner::Kind::SwitchResume);
1177 auto DestroyClone = createClone(F, ".destroy", Shape,
1178 CoroCloner::Kind::SwitchUnwind);
1179 auto CleanupClone = createClone(F, ".cleanup", Shape,
1180 CoroCloner::Kind::SwitchCleanup);
1182 postSplitCleanup(*ResumeClone);
1183 postSplitCleanup(*DestroyClone);
1184 postSplitCleanup(*CleanupClone);
1186 addMustTailToCoroResumes(*ResumeClone);
1188 // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1189 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1191 assert(Clones.empty());
1192 Clones.push_back(ResumeClone);
1193 Clones.push_back(DestroyClone);
1194 Clones.push_back(CleanupClone);
1196 // Create a constant array referring to resume/destroy/clone functions pointed
1197 // by the last argument of @llvm.coro.info, so that CoroElide pass can
1198 // determined correct function to call.
1199 setCoroInfo(F, Shape, Clones);
1202 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1203 SmallVectorImpl<Function *> &Clones) {
1204 assert(Shape.ABI == coro::ABI::Retcon ||
1205 Shape.ABI == coro::ABI::RetconOnce);
1206 assert(Clones.empty());
1208 // Reset various things that the optimizer might have decided it
1209 // "knows" about the coroutine function due to not seeing a return.
1210 F.removeFnAttr(Attribute::NoReturn);
1211 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
1212 F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
1214 // Allocate the frame.
1215 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1216 Value *RawFramePtr;
1217 if (Shape.RetconLowering.IsFrameInlineInStorage) {
1218 RawFramePtr = Id->getStorage();
1219 } else {
1220 IRBuilder<> Builder(Id);
1222 // Determine the size of the frame.
1223 const DataLayout &DL = F.getParent()->getDataLayout();
1224 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1226 // Allocate. We don't need to update the call graph node because we're
1227 // going to recompute it from scratch after splitting.
1228 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1229 RawFramePtr =
1230 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1232 // Stash the allocated frame pointer in the continuation storage.
1233 auto Dest = Builder.CreateBitCast(Id->getStorage(),
1234 RawFramePtr->getType()->getPointerTo());
1235 Builder.CreateStore(RawFramePtr, Dest);
1238 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1240 // Make sure we don't invalidate Shape.FramePtr.
1241 TrackingVH<Instruction> Handle(Shape.FramePtr);
1242 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1243 Shape.FramePtr = Handle.getValPtr();
1246 // Create a unique return block.
1247 BasicBlock *ReturnBB = nullptr;
1248 SmallVector<PHINode *, 4> ReturnPHIs;
1250 // Create all the functions in order after the main function.
1251 auto NextF = std::next(F.getIterator());
1253 // Create a continuation function for each of the suspend points.
1254 Clones.reserve(Shape.CoroSuspends.size());
1255 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1256 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1258 // Create the clone declaration.
1259 auto Continuation =
1260 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF);
1261 Clones.push_back(Continuation);
1263 // Insert a branch to the unified return block immediately before
1264 // the suspend point.
1265 auto SuspendBB = Suspend->getParent();
1266 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1267 auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1269 // Create the unified return block.
1270 if (!ReturnBB) {
1271 // Place it before the first suspend.
1272 ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F,
1273 NewSuspendBB);
1274 Shape.RetconLowering.ReturnBlock = ReturnBB;
1276 IRBuilder<> Builder(ReturnBB);
1278 // Create PHIs for all the return values.
1279 assert(ReturnPHIs.empty());
1281 // First, the continuation.
1282 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1283 Shape.CoroSuspends.size()));
1285 // Next, all the directly-yielded values.
1286 for (auto ResultTy : Shape.getRetconResultTypes())
1287 ReturnPHIs.push_back(Builder.CreatePHI(ResultTy,
1288 Shape.CoroSuspends.size()));
1290 // Build the return value.
1291 auto RetTy = F.getReturnType();
1293 // Cast the continuation value if necessary.
1294 // We can't rely on the types matching up because that type would
1295 // have to be infinite.
1296 auto CastedContinuationTy =
1297 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1298 auto *CastedContinuation =
1299 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1301 Value *RetV;
1302 if (ReturnPHIs.size() == 1) {
1303 RetV = CastedContinuation;
1304 } else {
1305 RetV = UndefValue::get(RetTy);
1306 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1307 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1308 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1311 Builder.CreateRet(RetV);
1314 // Branch to the return block.
1315 Branch->setSuccessor(0, ReturnBB);
1316 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1317 size_t NextPHIIndex = 1;
1318 for (auto &VUse : Suspend->value_operands())
1319 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1320 assert(NextPHIIndex == ReturnPHIs.size());
1323 assert(Clones.size() == Shape.CoroSuspends.size());
1324 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1325 auto Suspend = Shape.CoroSuspends[i];
1326 auto Clone = Clones[i];
1328 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
1332 namespace {
1333 class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1334 Function &F;
1335 public:
1336 PrettyStackTraceFunction(Function &F) : F(F) {}
1337 void print(raw_ostream &OS) const override {
1338 OS << "While splitting coroutine ";
1339 F.printAsOperand(OS, /*print type*/ false, F.getParent());
1340 OS << "\n";
1345 static void splitCoroutine(Function &F, coro::Shape &Shape,
1346 SmallVectorImpl<Function *> &Clones) {
1347 switch (Shape.ABI) {
1348 case coro::ABI::Switch:
1349 return splitSwitchCoroutine(F, Shape, Clones);
1350 case coro::ABI::Retcon:
1351 case coro::ABI::RetconOnce:
1352 return splitRetconCoroutine(F, Shape, Clones);
1354 llvm_unreachable("bad ABI kind");
1357 static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
1358 PrettyStackTraceFunction prettyStackTrace(F);
1360 // The suspend-crossing algorithm in buildCoroutineFrame get tripped
1361 // up by uses in unreachable blocks, so remove them as a first pass.
1362 removeUnreachableBlocks(F);
1364 coro::Shape Shape(F);
1365 if (!Shape.CoroBegin)
1366 return;
1368 simplifySuspendPoints(Shape);
1369 buildCoroutineFrame(F, Shape);
1370 replaceFrameSize(Shape);
1372 SmallVector<Function*, 4> Clones;
1374 // If there are no suspend points, no split required, just remove
1375 // the allocation and deallocation blocks, they are not needed.
1376 if (Shape.CoroSuspends.empty()) {
1377 handleNoSuspendCoroutine(Shape);
1378 } else {
1379 splitCoroutine(F, Shape, Clones);
1382 // Replace all the swifterror operations in the original function.
1383 // This invalidates SwiftErrorOps in the Shape.
1384 replaceSwiftErrorOps(F, Shape, nullptr);
1386 removeCoroEnds(Shape, &CG);
1387 postSplitCleanup(F);
1389 // Update call graph and add the functions we created to the SCC.
1390 coro::updateCallGraph(F, Clones, CG, SCC);
1393 // When we see the coroutine the first time, we insert an indirect call to a
1394 // devirt trigger function and mark the coroutine that it is now ready for
1395 // split.
1396 static void prepareForSplit(Function &F, CallGraph &CG) {
1397 Module &M = *F.getParent();
1398 LLVMContext &Context = F.getContext();
1399 #ifndef NDEBUG
1400 Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
1401 assert(DevirtFn && "coro.devirt.trigger function not found");
1402 #endif
1404 F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
1406 // Insert an indirect call sequence that will be devirtualized by CoroElide
1407 // pass:
1408 // %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1)
1409 // %1 = bitcast i8* %0 to void(i8*)*
1410 // call void %1(i8* null)
1411 coro::LowererBase Lowerer(M);
1412 Instruction *InsertPt = F.getEntryBlock().getTerminator();
1413 auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
1414 auto *DevirtFnAddr =
1415 Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
1416 FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
1417 {Type::getInt8PtrTy(Context)}, false);
1418 auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
1420 // Update CG graph with an indirect call we just added.
1421 CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
1424 // Make sure that there is a devirtualization trigger function that the
1425 // coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt
1426 // trigger function is not found, we will create one and add it to the current
1427 // SCC.
1428 static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
1429 Module &M = CG.getModule();
1430 if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
1431 return;
1433 LLVMContext &C = M.getContext();
1434 auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
1435 /*isVarArg=*/false);
1436 Function *DevirtFn =
1437 Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
1438 CORO_DEVIRT_TRIGGER_FN, &M);
1439 DevirtFn->addFnAttr(Attribute::AlwaysInline);
1440 auto *Entry = BasicBlock::Create(C, "entry", DevirtFn);
1441 ReturnInst::Create(C, Entry);
1443 auto *Node = CG.getOrInsertFunction(DevirtFn);
1445 SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
1446 Nodes.push_back(Node);
1447 SCC.initialize(Nodes);
1450 /// Replace a call to llvm.coro.prepare.retcon.
1451 static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
1452 auto CastFn = Prepare->getArgOperand(0); // as an i8*
1453 auto Fn = CastFn->stripPointerCasts(); // as its original type
1455 // Find call graph nodes for the preparation.
1456 CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr;
1457 if (auto ConcreteFn = dyn_cast<Function>(Fn)) {
1458 PrepareUserNode = CG[Prepare->getFunction()];
1459 FnNode = CG[ConcreteFn];
1462 // Attempt to peephole this pattern:
1463 // %0 = bitcast [[TYPE]] @some_function to i8*
1464 // %1 = call @llvm.coro.prepare.retcon(i8* %0)
1465 // %2 = bitcast %1 to [[TYPE]]
1466 // ==>
1467 // %2 = @some_function
1468 for (auto UI = Prepare->use_begin(), UE = Prepare->use_end();
1469 UI != UE; ) {
1470 // Look for bitcasts back to the original function type.
1471 auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
1472 if (!Cast || Cast->getType() != Fn->getType()) continue;
1474 // Check whether the replacement will introduce new direct calls.
1475 // If so, we'll need to update the call graph.
1476 if (PrepareUserNode) {
1477 for (auto &Use : Cast->uses()) {
1478 if (auto *CB = dyn_cast<CallBase>(Use.getUser())) {
1479 if (!CB->isCallee(&Use))
1480 continue;
1481 PrepareUserNode->removeCallEdgeFor(*CB);
1482 PrepareUserNode->addCalledFunction(CB, FnNode);
1487 // Replace and remove the cast.
1488 Cast->replaceAllUsesWith(Fn);
1489 Cast->eraseFromParent();
1492 // Replace any remaining uses with the function as an i8*.
1493 // This can never directly be a callee, so we don't need to update CG.
1494 Prepare->replaceAllUsesWith(CastFn);
1495 Prepare->eraseFromParent();
1497 // Kill dead bitcasts.
1498 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
1499 if (!Cast->use_empty()) break;
1500 CastFn = Cast->getOperand(0);
1501 Cast->eraseFromParent();
1505 /// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
1506 /// IPO from operating on calls to a retcon coroutine before it's been
1507 /// split. This is only safe to do after we've split all retcon
1508 /// coroutines in the module. We can do that this in this pass because
1509 /// this pass does promise to split all retcon coroutines (as opposed to
1510 /// switch coroutines, which are lowered in multiple stages).
1511 static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
1512 bool Changed = false;
1513 for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end();
1514 PI != PE; ) {
1515 // Intrinsics can only be used in calls.
1516 auto *Prepare = cast<CallInst>((PI++)->getUser());
1517 replacePrepare(Prepare, CG);
1518 Changed = true;
1521 return Changed;
1524 //===----------------------------------------------------------------------===//
1525 // Top Level Driver
1526 //===----------------------------------------------------------------------===//
1528 namespace {
1530 struct CoroSplitLegacy : public CallGraphSCCPass {
1531 static char ID; // Pass identification, replacement for typeid
1533 CoroSplitLegacy() : CallGraphSCCPass(ID) {
1534 initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
1537 bool Run = false;
1539 // A coroutine is identified by the presence of coro.begin intrinsic, if
1540 // we don't have any, this pass has nothing to do.
1541 bool doInitialization(CallGraph &CG) override {
1542 Run = coro::declaresIntrinsics(CG.getModule(),
1543 {"llvm.coro.begin",
1544 "llvm.coro.prepare.retcon"});
1545 return CallGraphSCCPass::doInitialization(CG);
1548 bool runOnSCC(CallGraphSCC &SCC) override {
1549 if (!Run)
1550 return false;
1552 // Check for uses of llvm.coro.prepare.retcon.
1553 auto PrepareFn =
1554 SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon");
1555 if (PrepareFn && PrepareFn->use_empty())
1556 PrepareFn = nullptr;
1558 // Find coroutines for processing.
1559 SmallVector<Function *, 4> Coroutines;
1560 for (CallGraphNode *CGN : SCC)
1561 if (auto *F = CGN->getFunction())
1562 if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
1563 Coroutines.push_back(F);
1565 if (Coroutines.empty() && !PrepareFn)
1566 return false;
1568 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
1570 if (Coroutines.empty())
1571 return replaceAllPrepares(PrepareFn, CG);
1573 createDevirtTriggerFunc(CG, SCC);
1575 // Split all the coroutines.
1576 for (Function *F : Coroutines) {
1577 Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
1578 StringRef Value = Attr.getValueAsString();
1579 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
1580 << "' state: " << Value << "\n");
1581 if (Value == UNPREPARED_FOR_SPLIT) {
1582 prepareForSplit(*F, CG);
1583 continue;
1585 F->removeFnAttr(CORO_PRESPLIT_ATTR);
1586 splitCoroutine(*F, CG, SCC);
1589 if (PrepareFn)
1590 replaceAllPrepares(PrepareFn, CG);
1592 return true;
1595 void getAnalysisUsage(AnalysisUsage &AU) const override {
1596 CallGraphSCCPass::getAnalysisUsage(AU);
1599 StringRef getPassName() const override { return "Coroutine Splitting"; }
1602 } // end anonymous namespace
1604 char CoroSplitLegacy::ID = 0;
1606 INITIALIZE_PASS_BEGIN(
1607 CoroSplitLegacy, "coro-split",
1608 "Split coroutine into a set of functions driving its state machine", false,
1609 false)
1610 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
1611 INITIALIZE_PASS_END(
1612 CoroSplitLegacy, "coro-split",
1613 "Split coroutine into a set of functions driving its state machine", false,
1614 false)
1616 Pass *llvm::createCoroSplitLegacyPass() { return new CoroSplitLegacy(); }