1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Rewrite call/invoke instructions so as to make potential relocations
10 // performed by the garbage collector explicit in the IR.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/None.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/iterator_range.h"
28 #include "llvm/Analysis/DomTreeUpdater.h"
29 #include "llvm/Analysis/TargetLibraryInfo.h"
30 #include "llvm/Analysis/TargetTransformInfo.h"
31 #include "llvm/IR/Argument.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InstIterator.h"
43 #include "llvm/IR/InstrTypes.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Intrinsics.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/IR/MDBuilder.h"
50 #include "llvm/IR/Metadata.h"
51 #include "llvm/IR/Module.h"
52 #include "llvm/IR/Statepoint.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/User.h"
55 #include "llvm/IR/Value.h"
56 #include "llvm/IR/ValueHandle.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Scalar.h"
65 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
66 #include "llvm/Transforms/Utils/Local.h"
67 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
78 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
82 // Print the liveset found at the insert location
83 static cl::opt
<bool> PrintLiveSet("spp-print-liveset", cl::Hidden
,
85 static cl::opt
<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden
,
88 // Print out the base pointers for debugging
89 static cl::opt
<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden
,
92 // Cost threshold measuring when it is profitable to rematerialize value instead
94 static cl::opt
<unsigned>
95 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden
,
98 #ifdef EXPENSIVE_CHECKS
99 static bool ClobberNonLive
= true;
101 static bool ClobberNonLive
= false;
104 static cl::opt
<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
105 cl::location(ClobberNonLive
),
109 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
110 cl::Hidden
, cl::init(true));
112 /// The IR fed into RewriteStatepointsForGC may have had attributes and
113 /// metadata implying dereferenceability that are no longer valid/correct after
114 /// RewriteStatepointsForGC has run. This is because semantically, after
115 /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
116 /// heap. stripNonValidData (conservatively) restores
117 /// correctness by erasing all attributes in the module that externally imply
118 /// dereferenceability. Similar reasoning also applies to the noalias
119 /// attributes and metadata. gc.statepoint can touch the entire heap including
121 /// Apart from attributes and metadata, we also remove instructions that imply
122 /// constant physical memory: llvm.invariant.start.
123 static void stripNonValidData(Module
&M
);
125 static bool shouldRewriteStatepointsIn(Function
&F
);
127 PreservedAnalyses
RewriteStatepointsForGC::run(Module
&M
,
128 ModuleAnalysisManager
&AM
) {
129 bool Changed
= false;
130 auto &FAM
= AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
131 for (Function
&F
: M
) {
132 // Nothing to do for declarations.
133 if (F
.isDeclaration() || F
.empty())
136 // Policy choice says not to rewrite - the most common reason is that we're
137 // compiling code without a GCStrategy.
138 if (!shouldRewriteStatepointsIn(F
))
141 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
142 auto &TTI
= FAM
.getResult
<TargetIRAnalysis
>(F
);
143 auto &TLI
= FAM
.getResult
<TargetLibraryAnalysis
>(F
);
144 Changed
|= runOnFunction(F
, DT
, TTI
, TLI
);
147 return PreservedAnalyses::all();
149 // stripNonValidData asserts that shouldRewriteStatepointsIn
150 // returns true for at least one function in the module. Since at least
151 // one function changed, we know that the precondition is satisfied.
152 stripNonValidData(M
);
154 PreservedAnalyses PA
;
155 PA
.preserve
<TargetIRAnalysis
>();
156 PA
.preserve
<TargetLibraryAnalysis
>();
162 class RewriteStatepointsForGCLegacyPass
: public ModulePass
{
163 RewriteStatepointsForGC Impl
;
166 static char ID
; // Pass identification, replacement for typeid
168 RewriteStatepointsForGCLegacyPass() : ModulePass(ID
), Impl() {
169 initializeRewriteStatepointsForGCLegacyPassPass(
170 *PassRegistry::getPassRegistry());
173 bool runOnModule(Module
&M
) override
{
174 bool Changed
= false;
175 for (Function
&F
: M
) {
176 // Nothing to do for declarations.
177 if (F
.isDeclaration() || F
.empty())
180 // Policy choice says not to rewrite - the most common reason is that
181 // we're compiling code without a GCStrategy.
182 if (!shouldRewriteStatepointsIn(F
))
185 TargetTransformInfo
&TTI
=
186 getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
187 const TargetLibraryInfo
&TLI
=
188 getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
189 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>(F
).getDomTree();
191 Changed
|= Impl
.runOnFunction(F
, DT
, TTI
, TLI
);
197 // stripNonValidData asserts that shouldRewriteStatepointsIn
198 // returns true for at least one function in the module. Since at least
199 // one function changed, we know that the precondition is satisfied.
200 stripNonValidData(M
);
204 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
205 // We add and rewrite a bunch of instructions, but don't really do much
206 // else. We could in theory preserve a lot more analyses here.
207 AU
.addRequired
<DominatorTreeWrapperPass
>();
208 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
209 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
213 } // end anonymous namespace
215 char RewriteStatepointsForGCLegacyPass::ID
= 0;
217 ModulePass
*llvm::createRewriteStatepointsForGCLegacyPass() {
218 return new RewriteStatepointsForGCLegacyPass();
221 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGCLegacyPass
,
222 "rewrite-statepoints-for-gc",
223 "Make relocations explicit at statepoints", false, false)
224 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
225 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
226 INITIALIZE_PASS_END(RewriteStatepointsForGCLegacyPass
,
227 "rewrite-statepoints-for-gc",
228 "Make relocations explicit at statepoints", false, false)
232 struct GCPtrLivenessData
{
233 /// Values defined in this block.
234 MapVector
<BasicBlock
*, SetVector
<Value
*>> KillSet
;
236 /// Values used in this block (and thus live); does not included values
237 /// killed within this block.
238 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveSet
;
240 /// Values live into this basic block (i.e. used by any
241 /// instruction in this basic block or ones reachable from here)
242 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveIn
;
244 /// Values live out of this basic block (i.e. live into
245 /// any successor block)
246 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveOut
;
249 // The type of the internal cache used inside the findBasePointers family
250 // of functions. From the callers perspective, this is an opaque type and
251 // should not be inspected.
253 // In the actual implementation this caches two relations:
254 // - The base relation itself (i.e. this pointer is based on that one)
255 // - The base defining value relation (i.e. before base_phi insertion)
256 // Generally, after the execution of a full findBasePointer call, only the
257 // base relation will remain. Internally, we add a mixture of the two
258 // types, then update all the second type to the first type
259 using DefiningValueMapTy
= MapVector
<Value
*, Value
*>;
260 using StatepointLiveSetTy
= SetVector
<Value
*>;
261 using RematerializedValueMapTy
=
262 MapVector
<AssertingVH
<Instruction
>, AssertingVH
<Value
>>;
264 struct PartiallyConstructedSafepointRecord
{
265 /// The set of values known to be live across this safepoint
266 StatepointLiveSetTy LiveSet
;
268 /// Mapping from live pointers to a base-defining-value
269 MapVector
<Value
*, Value
*> PointerToBase
;
271 /// The *new* gc.statepoint instruction itself. This produces the token
272 /// that normal path gc.relocates and the gc.result are tied to.
273 Instruction
*StatepointToken
;
275 /// Instruction to which exceptional gc relocates are attached
276 /// Makes it easier to iterate through them during relocationViaAlloca.
277 Instruction
*UnwindToken
;
279 /// Record live values we are rematerialized instead of relocating.
280 /// They are not included into 'LiveSet' field.
281 /// Maps rematerialized copy to it's original value.
282 RematerializedValueMapTy RematerializedValues
;
285 } // end anonymous namespace
287 static ArrayRef
<Use
> GetDeoptBundleOperands(const CallBase
*Call
) {
288 Optional
<OperandBundleUse
> DeoptBundle
=
289 Call
->getOperandBundle(LLVMContext::OB_deopt
);
291 if (!DeoptBundle
.hasValue()) {
292 assert(AllowStatepointWithNoDeoptInfo
&&
293 "Found non-leaf call without deopt info!");
297 return DeoptBundle
.getValue().Inputs
;
300 /// Compute the live-in set for every basic block in the function
301 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
302 GCPtrLivenessData
&Data
);
304 /// Given results from the dataflow liveness computation, find the set of live
305 /// Values at a particular instruction.
306 static void findLiveSetAtInst(Instruction
*inst
, GCPtrLivenessData
&Data
,
307 StatepointLiveSetTy
&out
);
309 // TODO: Once we can get to the GCStrategy, this becomes
310 // Optional<bool> isGCManagedPointer(const Type *Ty) const override {
312 static bool isGCPointerType(Type
*T
) {
313 if (auto *PT
= dyn_cast
<PointerType
>(T
))
314 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
315 // GC managed heap. We know that a pointer into this heap needs to be
316 // updated and that no other pointer does.
317 return PT
->getAddressSpace() == 1;
321 // Return true if this type is one which a) is a gc pointer or contains a GC
322 // pointer and b) is of a type this code expects to encounter as a live value.
323 // (The insertion code will assert that a type which matches (a) and not (b)
324 // is not encountered.)
325 static bool isHandledGCPointerType(Type
*T
) {
326 // We fully support gc pointers
327 if (isGCPointerType(T
))
329 // We partially support vectors of gc pointers. The code will assert if it
330 // can't handle something.
331 if (auto VT
= dyn_cast
<VectorType
>(T
))
332 if (isGCPointerType(VT
->getElementType()))
338 /// Returns true if this type contains a gc pointer whether we know how to
339 /// handle that type or not.
340 static bool containsGCPtrType(Type
*Ty
) {
341 if (isGCPointerType(Ty
))
343 if (VectorType
*VT
= dyn_cast
<VectorType
>(Ty
))
344 return isGCPointerType(VT
->getScalarType());
345 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(Ty
))
346 return containsGCPtrType(AT
->getElementType());
347 if (StructType
*ST
= dyn_cast
<StructType
>(Ty
))
348 return llvm::any_of(ST
->elements(), containsGCPtrType
);
352 // Returns true if this is a type which a) is a gc pointer or contains a GC
353 // pointer and b) is of a type which the code doesn't expect (i.e. first class
354 // aggregates). Used to trip assertions.
355 static bool isUnhandledGCPointerType(Type
*Ty
) {
356 return containsGCPtrType(Ty
) && !isHandledGCPointerType(Ty
);
360 // Return the name of the value suffixed with the provided value, or if the
361 // value didn't have a name, the default value specified.
362 static std::string
suffixed_name_or(Value
*V
, StringRef Suffix
,
363 StringRef DefaultName
) {
364 return V
->hasName() ? (V
->getName() + Suffix
).str() : DefaultName
.str();
367 // Conservatively identifies any definitions which might be live at the
368 // given instruction. The analysis is performed immediately before the
369 // given instruction. Values defined by that instruction are not considered
370 // live. Values used by that instruction are considered live.
371 static void analyzeParsePointLiveness(
372 DominatorTree
&DT
, GCPtrLivenessData
&OriginalLivenessData
, CallBase
*Call
,
373 PartiallyConstructedSafepointRecord
&Result
) {
374 StatepointLiveSetTy LiveSet
;
375 findLiveSetAtInst(Call
, OriginalLivenessData
, LiveSet
);
378 dbgs() << "Live Variables:\n";
379 for (Value
*V
: LiveSet
)
380 dbgs() << " " << V
->getName() << " " << *V
<< "\n";
382 if (PrintLiveSetSize
) {
383 dbgs() << "Safepoint For: " << Call
->getCalledValue()->getName() << "\n";
384 dbgs() << "Number live values: " << LiveSet
.size() << "\n";
386 Result
.LiveSet
= LiveSet
;
389 static bool isKnownBaseResult(Value
*V
);
393 /// A single base defining value - An immediate base defining value for an
394 /// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
395 /// For instructions which have multiple pointer [vector] inputs or that
396 /// transition between vector and scalar types, there is no immediate base
397 /// defining value. The 'base defining value' for 'Def' is the transitive
398 /// closure of this relation stopping at the first instruction which has no
399 /// immediate base defining value. The b.d.v. might itself be a base pointer,
400 /// but it can also be an arbitrary derived pointer.
401 struct BaseDefiningValueResult
{
402 /// Contains the value which is the base defining value.
405 /// True if the base defining value is also known to be an actual base
407 const bool IsKnownBase
;
409 BaseDefiningValueResult(Value
*BDV
, bool IsKnownBase
)
410 : BDV(BDV
), IsKnownBase(IsKnownBase
) {
412 // Check consistency between new and old means of checking whether a BDV is
414 bool MustBeBase
= isKnownBaseResult(BDV
);
415 assert(!MustBeBase
|| MustBeBase
== IsKnownBase
);
420 } // end anonymous namespace
422 static BaseDefiningValueResult
findBaseDefiningValue(Value
*I
);
424 /// Return a base defining value for the 'Index' element of the given vector
425 /// instruction 'I'. If Index is null, returns a BDV for the entire vector
426 /// 'I'. As an optimization, this method will try to determine when the
427 /// element is known to already be a base pointer. If this can be established,
428 /// the second value in the returned pair will be true. Note that either a
429 /// vector or a pointer typed value can be returned. For the former, the
430 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
431 /// If the later, the return pointer is a BDV (or possibly a base) for the
432 /// particular element in 'I'.
433 static BaseDefiningValueResult
434 findBaseDefiningValueOfVector(Value
*I
) {
435 // Each case parallels findBaseDefiningValue below, see that code for
436 // detailed motivation.
438 if (isa
<Argument
>(I
))
439 // An incoming argument to the function is a base pointer
440 return BaseDefiningValueResult(I
, true);
442 if (isa
<Constant
>(I
))
443 // Base of constant vector consists only of constant null pointers.
444 // For reasoning see similar case inside 'findBaseDefiningValue' function.
445 return BaseDefiningValueResult(ConstantAggregateZero::get(I
->getType()),
448 if (isa
<LoadInst
>(I
))
449 return BaseDefiningValueResult(I
, true);
451 if (isa
<InsertElementInst
>(I
))
452 // We don't know whether this vector contains entirely base pointers or
453 // not. To be conservatively correct, we treat it as a BDV and will
454 // duplicate code as needed to construct a parallel vector of bases.
455 return BaseDefiningValueResult(I
, false);
457 if (isa
<ShuffleVectorInst
>(I
))
458 // We don't know whether this vector contains entirely base pointers or
459 // not. To be conservatively correct, we treat it as a BDV and will
460 // duplicate code as needed to construct a parallel vector of bases.
461 // TODO: There a number of local optimizations which could be applied here
462 // for particular sufflevector patterns.
463 return BaseDefiningValueResult(I
, false);
465 // The behavior of getelementptr instructions is the same for vector and
466 // non-vector data types.
467 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(I
))
468 return findBaseDefiningValue(GEP
->getPointerOperand());
470 // If the pointer comes through a bitcast of a vector of pointers to
471 // a vector of another type of pointer, then look through the bitcast
472 if (auto *BC
= dyn_cast
<BitCastInst
>(I
))
473 return findBaseDefiningValue(BC
->getOperand(0));
475 // We assume that functions in the source language only return base
476 // pointers. This should probably be generalized via attributes to support
477 // both source language and internal functions.
478 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
479 return BaseDefiningValueResult(I
, true);
481 // A PHI or Select is a base defining value. The outer findBasePointer
482 // algorithm is responsible for constructing a base value for this BDV.
483 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
484 "unknown vector instruction - no base found for vector element");
485 return BaseDefiningValueResult(I
, false);
488 /// Helper function for findBasePointer - Will return a value which either a)
489 /// defines the base pointer for the input, b) blocks the simple search
490 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
491 /// from pointer to vector type or back.
492 static BaseDefiningValueResult
findBaseDefiningValue(Value
*I
) {
493 assert(I
->getType()->isPtrOrPtrVectorTy() &&
494 "Illegal to ask for the base pointer of a non-pointer type");
496 if (I
->getType()->isVectorTy())
497 return findBaseDefiningValueOfVector(I
);
499 if (isa
<Argument
>(I
))
500 // An incoming argument to the function is a base pointer
501 // We should have never reached here if this argument isn't an gc value
502 return BaseDefiningValueResult(I
, true);
504 if (isa
<Constant
>(I
)) {
505 // We assume that objects with a constant base (e.g. a global) can't move
506 // and don't need to be reported to the collector because they are always
507 // live. Besides global references, all kinds of constants (e.g. undef,
508 // constant expressions, null pointers) can be introduced by the inliner or
509 // the optimizer, especially on dynamically dead paths.
510 // Here we treat all of them as having single null base. By doing this we
511 // trying to avoid problems reporting various conflicts in a form of
512 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
513 // See constant.ll file for relevant test cases.
515 return BaseDefiningValueResult(
516 ConstantPointerNull::get(cast
<PointerType
>(I
->getType())), true);
519 if (CastInst
*CI
= dyn_cast
<CastInst
>(I
)) {
520 Value
*Def
= CI
->stripPointerCasts();
521 // If stripping pointer casts changes the address space there is an
522 // addrspacecast in between.
523 assert(cast
<PointerType
>(Def
->getType())->getAddressSpace() ==
524 cast
<PointerType
>(CI
->getType())->getAddressSpace() &&
525 "unsupported addrspacecast");
526 // If we find a cast instruction here, it means we've found a cast which is
527 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
528 // handle int->ptr conversion.
529 assert(!isa
<CastInst
>(Def
) && "shouldn't find another cast here");
530 return findBaseDefiningValue(Def
);
533 if (isa
<LoadInst
>(I
))
534 // The value loaded is an gc base itself
535 return BaseDefiningValueResult(I
, true);
537 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(I
))
538 // The base of this GEP is the base
539 return findBaseDefiningValue(GEP
->getPointerOperand());
541 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
542 switch (II
->getIntrinsicID()) {
544 // fall through to general call handling
546 case Intrinsic::experimental_gc_statepoint
:
547 llvm_unreachable("statepoints don't produce pointers");
548 case Intrinsic::experimental_gc_relocate
:
549 // Rerunning safepoint insertion after safepoints are already
550 // inserted is not supported. It could probably be made to work,
551 // but why are you doing this? There's no good reason.
552 llvm_unreachable("repeat safepoint insertion is not supported");
553 case Intrinsic::gcroot
:
554 // Currently, this mechanism hasn't been extended to work with gcroot.
555 // There's no reason it couldn't be, but I haven't thought about the
556 // implications much.
558 "interaction with the gcroot mechanism is not supported");
561 // We assume that functions in the source language only return base
562 // pointers. This should probably be generalized via attributes to support
563 // both source language and internal functions.
564 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
565 return BaseDefiningValueResult(I
, true);
567 // TODO: I have absolutely no idea how to implement this part yet. It's not
568 // necessarily hard, I just haven't really looked at it yet.
569 assert(!isa
<LandingPadInst
>(I
) && "Landing Pad is unimplemented");
571 if (isa
<AtomicCmpXchgInst
>(I
))
572 // A CAS is effectively a atomic store and load combined under a
573 // predicate. From the perspective of base pointers, we just treat it
575 return BaseDefiningValueResult(I
, true);
577 assert(!isa
<AtomicRMWInst
>(I
) && "Xchg handled above, all others are "
578 "binary ops which don't apply to pointers");
580 // The aggregate ops. Aggregates can either be in the heap or on the
581 // stack, but in either case, this is simply a field load. As a result,
582 // this is a defining definition of the base just like a load is.
583 if (isa
<ExtractValueInst
>(I
))
584 return BaseDefiningValueResult(I
, true);
586 // We should never see an insert vector since that would require we be
587 // tracing back a struct value not a pointer value.
588 assert(!isa
<InsertValueInst
>(I
) &&
589 "Base pointer for a struct is meaningless");
591 // An extractelement produces a base result exactly when it's input does.
592 // We may need to insert a parallel instruction to extract the appropriate
593 // element out of the base vector corresponding to the input. Given this,
594 // it's analogous to the phi and select case even though it's not a merge.
595 if (isa
<ExtractElementInst
>(I
))
596 // Note: There a lot of obvious peephole cases here. This are deliberately
597 // handled after the main base pointer inference algorithm to make writing
598 // test cases to exercise that code easier.
599 return BaseDefiningValueResult(I
, false);
601 // The last two cases here don't return a base pointer. Instead, they
602 // return a value which dynamically selects from among several base
603 // derived pointers (each with it's own base potentially). It's the job of
604 // the caller to resolve these.
605 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
606 "missing instruction case in findBaseDefiningValing");
607 return BaseDefiningValueResult(I
, false);
610 /// Returns the base defining value for this value.
611 static Value
*findBaseDefiningValueCached(Value
*I
, DefiningValueMapTy
&Cache
) {
612 Value
*&Cached
= Cache
[I
];
614 Cached
= findBaseDefiningValue(I
).BDV
;
615 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I
->getName() << " -> "
616 << Cached
->getName() << "\n");
618 assert(Cache
[I
] != nullptr);
622 /// Return a base pointer for this value if known. Otherwise, return it's
623 /// base defining value.
624 static Value
*findBaseOrBDV(Value
*I
, DefiningValueMapTy
&Cache
) {
625 Value
*Def
= findBaseDefiningValueCached(I
, Cache
);
626 auto Found
= Cache
.find(Def
);
627 if (Found
!= Cache
.end()) {
628 // Either a base-of relation, or a self reference. Caller must check.
629 return Found
->second
;
631 // Only a BDV available
635 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
636 /// is it known to be a base pointer? Or do we need to continue searching.
637 static bool isKnownBaseResult(Value
*V
) {
638 if (!isa
<PHINode
>(V
) && !isa
<SelectInst
>(V
) &&
639 !isa
<ExtractElementInst
>(V
) && !isa
<InsertElementInst
>(V
) &&
640 !isa
<ShuffleVectorInst
>(V
)) {
641 // no recursion possible
644 if (isa
<Instruction
>(V
) &&
645 cast
<Instruction
>(V
)->getMetadata("is_base_value")) {
646 // This is a previously inserted base phi or select. We know
647 // that this is a base value.
651 // We need to keep searching
657 /// Models the state of a single base defining value in the findBasePointer
658 /// algorithm for determining where a new instruction is needed to propagate
659 /// the base of this BDV.
662 enum Status
{ Unknown
, Base
, Conflict
};
664 BDVState() : BaseValue(nullptr) {}
666 explicit BDVState(Status Status
, Value
*BaseValue
= nullptr)
667 : Status(Status
), BaseValue(BaseValue
) {
668 assert(Status
!= Base
|| BaseValue
);
671 explicit BDVState(Value
*BaseValue
) : Status(Base
), BaseValue(BaseValue
) {}
673 Status
getStatus() const { return Status
; }
674 Value
*getBaseValue() const { return BaseValue
; }
676 bool isBase() const { return getStatus() == Base
; }
677 bool isUnknown() const { return getStatus() == Unknown
; }
678 bool isConflict() const { return getStatus() == Conflict
; }
680 bool operator==(const BDVState
&Other
) const {
681 return BaseValue
== Other
.BaseValue
&& Status
== Other
.Status
;
684 bool operator!=(const BDVState
&other
) const { return !(*this == other
); }
692 void print(raw_ostream
&OS
) const {
693 switch (getStatus()) {
704 OS
<< " (" << getBaseValue() << " - "
705 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << "): ";
709 Status Status
= Unknown
;
710 AssertingVH
<Value
> BaseValue
; // Non-null only if Status == Base.
713 } // end anonymous namespace
716 static raw_ostream
&operator<<(raw_ostream
&OS
, const BDVState
&State
) {
722 static BDVState
meetBDVStateImpl(const BDVState
&LHS
, const BDVState
&RHS
) {
723 switch (LHS
.getStatus()) {
724 case BDVState::Unknown
:
728 assert(LHS
.getBaseValue() && "can't be null");
733 if (LHS
.getBaseValue() == RHS
.getBaseValue()) {
734 assert(LHS
== RHS
&& "equality broken!");
737 return BDVState(BDVState::Conflict
);
739 assert(RHS
.isConflict() && "only three states!");
740 return BDVState(BDVState::Conflict
);
742 case BDVState::Conflict
:
745 llvm_unreachable("only three states!");
748 // Values of type BDVState form a lattice, and this function implements the meet
750 static BDVState
meetBDVState(const BDVState
&LHS
, const BDVState
&RHS
) {
751 BDVState Result
= meetBDVStateImpl(LHS
, RHS
);
752 assert(Result
== meetBDVStateImpl(RHS
, LHS
) &&
753 "Math is wrong: meet does not commute!");
757 /// For a given value or instruction, figure out what base ptr its derived from.
758 /// For gc objects, this is simply itself. On success, returns a value which is
759 /// the base pointer. (This is reliable and can be used for relocation.) On
760 /// failure, returns nullptr.
761 static Value
*findBasePointer(Value
*I
, DefiningValueMapTy
&Cache
) {
762 Value
*Def
= findBaseOrBDV(I
, Cache
);
764 if (isKnownBaseResult(Def
))
767 // Here's the rough algorithm:
768 // - For every SSA value, construct a mapping to either an actual base
769 // pointer or a PHI which obscures the base pointer.
770 // - Construct a mapping from PHI to unknown TOP state. Use an
771 // optimistic algorithm to propagate base pointer information. Lattice
776 // When algorithm terminates, all PHIs will either have a single concrete
777 // base or be in a conflict state.
778 // - For every conflict, insert a dummy PHI node without arguments. Add
779 // these to the base[Instruction] = BasePtr mapping. For every
780 // non-conflict, add the actual base.
781 // - For every conflict, add arguments for the base[a] of each input
784 // Note: A simpler form of this would be to add the conflict form of all
785 // PHIs without running the optimistic algorithm. This would be
786 // analogous to pessimistic data flow and would likely lead to an
787 // overall worse solution.
790 auto isExpectedBDVType
= [](Value
*BDV
) {
791 return isa
<PHINode
>(BDV
) || isa
<SelectInst
>(BDV
) ||
792 isa
<ExtractElementInst
>(BDV
) || isa
<InsertElementInst
>(BDV
) ||
793 isa
<ShuffleVectorInst
>(BDV
);
797 // Once populated, will contain a mapping from each potentially non-base BDV
798 // to a lattice value (described above) which corresponds to that BDV.
799 // We use the order of insertion (DFS over the def/use graph) to provide a
800 // stable deterministic ordering for visiting DenseMaps (which are unordered)
801 // below. This is important for deterministic compilation.
802 MapVector
<Value
*, BDVState
> States
;
804 // Recursively fill in all base defining values reachable from the initial
805 // one for which we don't already know a definite base value for
807 SmallVector
<Value
*, 16> Worklist
;
808 Worklist
.push_back(Def
);
809 States
.insert({Def
, BDVState()});
810 while (!Worklist
.empty()) {
811 Value
*Current
= Worklist
.pop_back_val();
812 assert(!isKnownBaseResult(Current
) && "why did it get added?");
814 auto visitIncomingValue
= [&](Value
*InVal
) {
815 Value
*Base
= findBaseOrBDV(InVal
, Cache
);
816 if (isKnownBaseResult(Base
))
817 // Known bases won't need new instructions introduced and can be
820 assert(isExpectedBDVType(Base
) && "the only non-base values "
821 "we see should be base defining values");
822 if (States
.insert(std::make_pair(Base
, BDVState())).second
)
823 Worklist
.push_back(Base
);
825 if (PHINode
*PN
= dyn_cast
<PHINode
>(Current
)) {
826 for (Value
*InVal
: PN
->incoming_values())
827 visitIncomingValue(InVal
);
828 } else if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Current
)) {
829 visitIncomingValue(SI
->getTrueValue());
830 visitIncomingValue(SI
->getFalseValue());
831 } else if (auto *EE
= dyn_cast
<ExtractElementInst
>(Current
)) {
832 visitIncomingValue(EE
->getVectorOperand());
833 } else if (auto *IE
= dyn_cast
<InsertElementInst
>(Current
)) {
834 visitIncomingValue(IE
->getOperand(0)); // vector operand
835 visitIncomingValue(IE
->getOperand(1)); // scalar operand
836 } else if (auto *SV
= dyn_cast
<ShuffleVectorInst
>(Current
)) {
837 visitIncomingValue(SV
->getOperand(0));
838 visitIncomingValue(SV
->getOperand(1));
841 llvm_unreachable("Unimplemented instruction case");
847 LLVM_DEBUG(dbgs() << "States after initialization:\n");
848 for (auto Pair
: States
) {
849 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
853 // Return a phi state for a base defining value. We'll generate a new
854 // base state for known bases and expect to find a cached state otherwise.
855 auto getStateForBDV
= [&](Value
*baseValue
) {
856 if (isKnownBaseResult(baseValue
))
857 return BDVState(baseValue
);
858 auto I
= States
.find(baseValue
);
859 assert(I
!= States
.end() && "lookup failed!");
863 bool Progress
= true;
866 const size_t OldSize
= States
.size();
869 // We're only changing values in this loop, thus safe to keep iterators.
870 // Since this is computing a fixed point, the order of visit does not
871 // effect the result. TODO: We could use a worklist here and make this run
873 for (auto Pair
: States
) {
874 Value
*BDV
= Pair
.first
;
875 assert(!isKnownBaseResult(BDV
) && "why did it get added?");
877 // Given an input value for the current instruction, return a BDVState
878 // instance which represents the BDV of that value.
879 auto getStateForInput
= [&](Value
*V
) mutable {
880 Value
*BDV
= findBaseOrBDV(V
, Cache
);
881 return getStateForBDV(BDV
);
885 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(BDV
)) {
886 NewState
= meetBDVState(NewState
, getStateForInput(SI
->getTrueValue()));
888 meetBDVState(NewState
, getStateForInput(SI
->getFalseValue()));
889 } else if (PHINode
*PN
= dyn_cast
<PHINode
>(BDV
)) {
890 for (Value
*Val
: PN
->incoming_values())
891 NewState
= meetBDVState(NewState
, getStateForInput(Val
));
892 } else if (auto *EE
= dyn_cast
<ExtractElementInst
>(BDV
)) {
893 // The 'meet' for an extractelement is slightly trivial, but it's still
894 // useful in that it drives us to conflict if our input is.
896 meetBDVState(NewState
, getStateForInput(EE
->getVectorOperand()));
897 } else if (auto *IE
= dyn_cast
<InsertElementInst
>(BDV
)){
898 // Given there's a inherent type mismatch between the operands, will
899 // *always* produce Conflict.
900 NewState
= meetBDVState(NewState
, getStateForInput(IE
->getOperand(0)));
901 NewState
= meetBDVState(NewState
, getStateForInput(IE
->getOperand(1)));
903 // The only instance this does not return a Conflict is when both the
904 // vector operands are the same vector.
905 auto *SV
= cast
<ShuffleVectorInst
>(BDV
);
906 NewState
= meetBDVState(NewState
, getStateForInput(SV
->getOperand(0)));
907 NewState
= meetBDVState(NewState
, getStateForInput(SV
->getOperand(1)));
910 BDVState OldState
= States
[BDV
];
911 if (OldState
!= NewState
) {
913 States
[BDV
] = NewState
;
917 assert(OldSize
== States
.size() &&
918 "fixed point shouldn't be adding any new nodes to state");
922 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
923 for (auto Pair
: States
) {
924 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
928 // Insert Phis for all conflicts
929 // TODO: adjust naming patterns to avoid this order of iteration dependency
930 for (auto Pair
: States
) {
931 Instruction
*I
= cast
<Instruction
>(Pair
.first
);
932 BDVState State
= Pair
.second
;
933 assert(!isKnownBaseResult(I
) && "why did it get added?");
934 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
936 // extractelement instructions are a bit special in that we may need to
937 // insert an extract even when we know an exact base for the instruction.
938 // The problem is that we need to convert from a vector base to a scalar
939 // base for the particular indice we're interested in.
940 if (State
.isBase() && isa
<ExtractElementInst
>(I
) &&
941 isa
<VectorType
>(State
.getBaseValue()->getType())) {
942 auto *EE
= cast
<ExtractElementInst
>(I
);
943 // TODO: In many cases, the new instruction is just EE itself. We should
944 // exploit this, but can't do it here since it would break the invariant
945 // about the BDV not being known to be a base.
946 auto *BaseInst
= ExtractElementInst::Create(
947 State
.getBaseValue(), EE
->getIndexOperand(), "base_ee", EE
);
948 BaseInst
->setMetadata("is_base_value", MDNode::get(I
->getContext(), {}));
949 States
[I
] = BDVState(BDVState::Base
, BaseInst
);
952 // Since we're joining a vector and scalar base, they can never be the
953 // same. As a result, we should always see insert element having reached
954 // the conflict state.
955 assert(!isa
<InsertElementInst
>(I
) || State
.isConflict());
957 if (!State
.isConflict())
960 /// Create and insert a new instruction which will represent the base of
961 /// the given instruction 'I'.
962 auto MakeBaseInstPlaceholder
= [](Instruction
*I
) -> Instruction
* {
963 if (isa
<PHINode
>(I
)) {
964 BasicBlock
*BB
= I
->getParent();
965 int NumPreds
= pred_size(BB
);
966 assert(NumPreds
> 0 && "how did we reach here");
967 std::string Name
= suffixed_name_or(I
, ".base", "base_phi");
968 return PHINode::Create(I
->getType(), NumPreds
, Name
, I
);
969 } else if (SelectInst
*SI
= dyn_cast
<SelectInst
>(I
)) {
970 // The undef will be replaced later
971 UndefValue
*Undef
= UndefValue::get(SI
->getType());
972 std::string Name
= suffixed_name_or(I
, ".base", "base_select");
973 return SelectInst::Create(SI
->getCondition(), Undef
, Undef
, Name
, SI
);
974 } else if (auto *EE
= dyn_cast
<ExtractElementInst
>(I
)) {
975 UndefValue
*Undef
= UndefValue::get(EE
->getVectorOperand()->getType());
976 std::string Name
= suffixed_name_or(I
, ".base", "base_ee");
977 return ExtractElementInst::Create(Undef
, EE
->getIndexOperand(), Name
,
979 } else if (auto *IE
= dyn_cast
<InsertElementInst
>(I
)) {
980 UndefValue
*VecUndef
= UndefValue::get(IE
->getOperand(0)->getType());
981 UndefValue
*ScalarUndef
= UndefValue::get(IE
->getOperand(1)->getType());
982 std::string Name
= suffixed_name_or(I
, ".base", "base_ie");
983 return InsertElementInst::Create(VecUndef
, ScalarUndef
,
984 IE
->getOperand(2), Name
, IE
);
986 auto *SV
= cast
<ShuffleVectorInst
>(I
);
987 UndefValue
*VecUndef
= UndefValue::get(SV
->getOperand(0)->getType());
988 std::string Name
= suffixed_name_or(I
, ".base", "base_sv");
989 return new ShuffleVectorInst(VecUndef
, VecUndef
, SV
->getOperand(2),
993 Instruction
*BaseInst
= MakeBaseInstPlaceholder(I
);
994 // Add metadata marking this as a base value
995 BaseInst
->setMetadata("is_base_value", MDNode::get(I
->getContext(), {}));
996 States
[I
] = BDVState(BDVState::Conflict
, BaseInst
);
999 // Returns a instruction which produces the base pointer for a given
1000 // instruction. The instruction is assumed to be an input to one of the BDVs
1001 // seen in the inference algorithm above. As such, we must either already
1002 // know it's base defining value is a base, or have inserted a new
1003 // instruction to propagate the base of it's BDV and have entered that newly
1004 // introduced instruction into the state table. In either case, we are
1005 // assured to be able to determine an instruction which produces it's base
1007 auto getBaseForInput
= [&](Value
*Input
, Instruction
*InsertPt
) {
1008 Value
*BDV
= findBaseOrBDV(Input
, Cache
);
1009 Value
*Base
= nullptr;
1010 if (isKnownBaseResult(BDV
)) {
1013 // Either conflict or base.
1014 assert(States
.count(BDV
));
1015 Base
= States
[BDV
].getBaseValue();
1017 assert(Base
&& "Can't be null");
1018 // The cast is needed since base traversal may strip away bitcasts
1019 if (Base
->getType() != Input
->getType() && InsertPt
)
1020 Base
= new BitCastInst(Base
, Input
->getType(), "cast", InsertPt
);
1024 // Fixup all the inputs of the new PHIs. Visit order needs to be
1025 // deterministic and predictable because we're naming newly created
1027 for (auto Pair
: States
) {
1028 Instruction
*BDV
= cast
<Instruction
>(Pair
.first
);
1029 BDVState State
= Pair
.second
;
1031 assert(!isKnownBaseResult(BDV
) && "why did it get added?");
1032 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1033 if (!State
.isConflict())
1036 if (PHINode
*BasePHI
= dyn_cast
<PHINode
>(State
.getBaseValue())) {
1037 PHINode
*PN
= cast
<PHINode
>(BDV
);
1038 unsigned NumPHIValues
= PN
->getNumIncomingValues();
1039 for (unsigned i
= 0; i
< NumPHIValues
; i
++) {
1040 Value
*InVal
= PN
->getIncomingValue(i
);
1041 BasicBlock
*InBB
= PN
->getIncomingBlock(i
);
1043 // If we've already seen InBB, add the same incoming value
1044 // we added for it earlier. The IR verifier requires phi
1045 // nodes with multiple entries from the same basic block
1046 // to have the same incoming value for each of those
1047 // entries. If we don't do this check here and basephi
1048 // has a different type than base, we'll end up adding two
1049 // bitcasts (and hence two distinct values) as incoming
1050 // values for the same basic block.
1052 int BlockIndex
= BasePHI
->getBasicBlockIndex(InBB
);
1053 if (BlockIndex
!= -1) {
1054 Value
*OldBase
= BasePHI
->getIncomingValue(BlockIndex
);
1055 BasePHI
->addIncoming(OldBase
, InBB
);
1058 Value
*Base
= getBaseForInput(InVal
, nullptr);
1059 // In essence this assert states: the only way two values
1060 // incoming from the same basic block may be different is by
1061 // being different bitcasts of the same value. A cleanup
1062 // that remains TODO is changing findBaseOrBDV to return an
1063 // llvm::Value of the correct type (and still remain pure).
1064 // This will remove the need to add bitcasts.
1065 assert(Base
->stripPointerCasts() == OldBase
->stripPointerCasts() &&
1066 "Sanity -- findBaseOrBDV should be pure!");
1071 // Find the instruction which produces the base for each input. We may
1072 // need to insert a bitcast in the incoming block.
1073 // TODO: Need to split critical edges if insertion is needed
1074 Value
*Base
= getBaseForInput(InVal
, InBB
->getTerminator());
1075 BasePHI
->addIncoming(Base
, InBB
);
1077 assert(BasePHI
->getNumIncomingValues() == NumPHIValues
);
1078 } else if (SelectInst
*BaseSI
=
1079 dyn_cast
<SelectInst
>(State
.getBaseValue())) {
1080 SelectInst
*SI
= cast
<SelectInst
>(BDV
);
1082 // Find the instruction which produces the base for each input.
1083 // We may need to insert a bitcast.
1084 BaseSI
->setTrueValue(getBaseForInput(SI
->getTrueValue(), BaseSI
));
1085 BaseSI
->setFalseValue(getBaseForInput(SI
->getFalseValue(), BaseSI
));
1086 } else if (auto *BaseEE
=
1087 dyn_cast
<ExtractElementInst
>(State
.getBaseValue())) {
1088 Value
*InVal
= cast
<ExtractElementInst
>(BDV
)->getVectorOperand();
1089 // Find the instruction which produces the base for each input. We may
1090 // need to insert a bitcast.
1091 BaseEE
->setOperand(0, getBaseForInput(InVal
, BaseEE
));
1092 } else if (auto *BaseIE
= dyn_cast
<InsertElementInst
>(State
.getBaseValue())){
1093 auto *BdvIE
= cast
<InsertElementInst
>(BDV
);
1094 auto UpdateOperand
= [&](int OperandIdx
) {
1095 Value
*InVal
= BdvIE
->getOperand(OperandIdx
);
1096 Value
*Base
= getBaseForInput(InVal
, BaseIE
);
1097 BaseIE
->setOperand(OperandIdx
, Base
);
1099 UpdateOperand(0); // vector operand
1100 UpdateOperand(1); // scalar operand
1102 auto *BaseSV
= cast
<ShuffleVectorInst
>(State
.getBaseValue());
1103 auto *BdvSV
= cast
<ShuffleVectorInst
>(BDV
);
1104 auto UpdateOperand
= [&](int OperandIdx
) {
1105 Value
*InVal
= BdvSV
->getOperand(OperandIdx
);
1106 Value
*Base
= getBaseForInput(InVal
, BaseSV
);
1107 BaseSV
->setOperand(OperandIdx
, Base
);
1109 UpdateOperand(0); // vector operand
1110 UpdateOperand(1); // vector operand
1114 // Cache all of our results so we can cheaply reuse them
1115 // NOTE: This is actually two caches: one of the base defining value
1116 // relation and one of the base pointer relation! FIXME
1117 for (auto Pair
: States
) {
1118 auto *BDV
= Pair
.first
;
1119 Value
*Base
= Pair
.second
.getBaseValue();
1120 assert(BDV
&& Base
);
1121 assert(!isKnownBaseResult(BDV
) && "why did it get added?");
1124 dbgs() << "Updating base value cache"
1125 << " for: " << BDV
->getName() << " from: "
1126 << (Cache
.count(BDV
) ? Cache
[BDV
]->getName().str() : "none")
1127 << " to: " << Base
->getName() << "\n");
1129 if (Cache
.count(BDV
)) {
1130 assert(isKnownBaseResult(Base
) &&
1131 "must be something we 'know' is a base pointer");
1132 // Once we transition from the BDV relation being store in the Cache to
1133 // the base relation being stored, it must be stable
1134 assert((!isKnownBaseResult(Cache
[BDV
]) || Cache
[BDV
] == Base
) &&
1135 "base relation should be stable");
1139 assert(Cache
.count(Def
));
1143 // For a set of live pointers (base and/or derived), identify the base
1144 // pointer of the object which they are derived from. This routine will
1145 // mutate the IR graph as needed to make the 'base' pointer live at the
1146 // definition site of 'derived'. This ensures that any use of 'derived' can
1147 // also use 'base'. This may involve the insertion of a number of
1148 // additional PHI nodes.
1150 // preconditions: live is a set of pointer type Values
1152 // side effects: may insert PHI nodes into the existing CFG, will preserve
1153 // CFG, will not remove or mutate any existing nodes
1155 // post condition: PointerToBase contains one (derived, base) pair for every
1156 // pointer in live. Note that derived can be equal to base if the original
1157 // pointer was a base pointer.
1159 findBasePointers(const StatepointLiveSetTy
&live
,
1160 MapVector
<Value
*, Value
*> &PointerToBase
,
1161 DominatorTree
*DT
, DefiningValueMapTy
&DVCache
) {
1162 for (Value
*ptr
: live
) {
1163 Value
*base
= findBasePointer(ptr
, DVCache
);
1164 assert(base
&& "failed to find base pointer");
1165 PointerToBase
[ptr
] = base
;
1166 assert((!isa
<Instruction
>(base
) || !isa
<Instruction
>(ptr
) ||
1167 DT
->dominates(cast
<Instruction
>(base
)->getParent(),
1168 cast
<Instruction
>(ptr
)->getParent())) &&
1169 "The base we found better dominate the derived pointer");
1173 /// Find the required based pointers (and adjust the live set) for the given
1175 static void findBasePointers(DominatorTree
&DT
, DefiningValueMapTy
&DVCache
,
1177 PartiallyConstructedSafepointRecord
&result
) {
1178 MapVector
<Value
*, Value
*> PointerToBase
;
1179 findBasePointers(result
.LiveSet
, PointerToBase
, &DT
, DVCache
);
1181 if (PrintBasePointers
) {
1182 errs() << "Base Pairs (w/o Relocation):\n";
1183 for (auto &Pair
: PointerToBase
) {
1184 errs() << " derived ";
1185 Pair
.first
->printAsOperand(errs(), false);
1187 Pair
.second
->printAsOperand(errs(), false);
1192 result
.PointerToBase
= PointerToBase
;
1195 /// Given an updated version of the dataflow liveness results, update the
1196 /// liveset and base pointer maps for the call site CS.
1197 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
1199 PartiallyConstructedSafepointRecord
&result
);
1201 static void recomputeLiveInValues(
1202 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
1203 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
) {
1204 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1205 // again. The old values are still live and will help it stabilize quickly.
1206 GCPtrLivenessData RevisedLivenessData
;
1207 computeLiveInValues(DT
, F
, RevisedLivenessData
);
1208 for (size_t i
= 0; i
< records
.size(); i
++) {
1209 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
1210 recomputeLiveInValues(RevisedLivenessData
, toUpdate
[i
], info
);
1214 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1215 // no uses of the original value / return value between the gc.statepoint and
1216 // the gc.relocate / gc.result call. One case which can arise is a phi node
1217 // starting one of the successor blocks. We also need to be able to insert the
1218 // gc.relocates only on the path which goes through the statepoint. We might
1219 // need to split an edge to make this possible.
1221 normalizeForInvokeSafepoint(BasicBlock
*BB
, BasicBlock
*InvokeParent
,
1222 DominatorTree
&DT
) {
1223 BasicBlock
*Ret
= BB
;
1224 if (!BB
->getUniquePredecessor())
1225 Ret
= SplitBlockPredecessors(BB
, InvokeParent
, "", &DT
);
1227 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1229 FoldSingleEntryPHINodes(Ret
);
1230 assert(!isa
<PHINode
>(Ret
->begin()) &&
1231 "All PHI nodes should have been removed!");
1233 // At this point, we can safely insert a gc.relocate or gc.result as the first
1234 // instruction in Ret if needed.
1238 // Create new attribute set containing only attributes which can be transferred
1239 // from original call to the safepoint.
1240 static AttributeList
legalizeCallAttributes(AttributeList AL
) {
1244 // Remove the readonly, readnone, and statepoint function attributes.
1245 AttrBuilder FnAttrs
= AL
.getFnAttributes();
1246 FnAttrs
.removeAttribute(Attribute::ReadNone
);
1247 FnAttrs
.removeAttribute(Attribute::ReadOnly
);
1248 for (Attribute A
: AL
.getFnAttributes()) {
1249 if (isStatepointDirectiveAttr(A
))
1253 // Just skip parameter and return attributes for now
1254 LLVMContext
&Ctx
= AL
.getContext();
1255 return AttributeList::get(Ctx
, AttributeList::FunctionIndex
,
1256 AttributeSet::get(Ctx
, FnAttrs
));
1259 /// Helper function to place all gc relocates necessary for the given
1262 /// liveVariables - list of variables to be relocated.
1263 /// liveStart - index of the first live variable.
1264 /// basePtrs - base pointers.
1265 /// statepointToken - statepoint instruction to which relocates should be
1267 /// Builder - Llvm IR builder to be used to construct new calls.
1268 static void CreateGCRelocates(ArrayRef
<Value
*> LiveVariables
,
1269 const int LiveStart
,
1270 ArrayRef
<Value
*> BasePtrs
,
1271 Instruction
*StatepointToken
,
1272 IRBuilder
<> Builder
) {
1273 if (LiveVariables
.empty())
1276 auto FindIndex
= [](ArrayRef
<Value
*> LiveVec
, Value
*Val
) {
1277 auto ValIt
= llvm::find(LiveVec
, Val
);
1278 assert(ValIt
!= LiveVec
.end() && "Val not found in LiveVec!");
1279 size_t Index
= std::distance(LiveVec
.begin(), ValIt
);
1280 assert(Index
< LiveVec
.size() && "Bug in std::find?");
1283 Module
*M
= StatepointToken
->getModule();
1285 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1286 // element type is i8 addrspace(1)*). We originally generated unique
1287 // declarations for each pointer type, but this proved problematic because
1288 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1289 // towards a single unified pointer type anyways, we can just cast everything
1290 // to an i8* of the right address space. A bitcast is added later to convert
1291 // gc_relocate to the actual value's type.
1292 auto getGCRelocateDecl
= [&] (Type
*Ty
) {
1293 assert(isHandledGCPointerType(Ty
));
1294 auto AS
= Ty
->getScalarType()->getPointerAddressSpace();
1295 Type
*NewTy
= Type::getInt8PtrTy(M
->getContext(), AS
);
1296 if (auto *VT
= dyn_cast
<VectorType
>(Ty
))
1297 NewTy
= VectorType::get(NewTy
, VT
->getNumElements());
1298 return Intrinsic::getDeclaration(M
, Intrinsic::experimental_gc_relocate
,
1302 // Lazily populated map from input types to the canonicalized form mentioned
1303 // in the comment above. This should probably be cached somewhere more
1305 DenseMap
<Type
*, Function
*> TypeToDeclMap
;
1307 for (unsigned i
= 0; i
< LiveVariables
.size(); i
++) {
1308 // Generate the gc.relocate call and save the result
1310 Builder
.getInt32(LiveStart
+ FindIndex(LiveVariables
, BasePtrs
[i
]));
1311 Value
*LiveIdx
= Builder
.getInt32(LiveStart
+ i
);
1313 Type
*Ty
= LiveVariables
[i
]->getType();
1314 if (!TypeToDeclMap
.count(Ty
))
1315 TypeToDeclMap
[Ty
] = getGCRelocateDecl(Ty
);
1316 Function
*GCRelocateDecl
= TypeToDeclMap
[Ty
];
1318 // only specify a debug name if we can give a useful one
1319 CallInst
*Reloc
= Builder
.CreateCall(
1320 GCRelocateDecl
, {StatepointToken
, BaseIdx
, LiveIdx
},
1321 suffixed_name_or(LiveVariables
[i
], ".relocated", ""));
1322 // Trick CodeGen into thinking there are lots of free registers at this
1324 Reloc
->setCallingConv(CallingConv::Cold
);
1330 /// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1331 /// avoids having to worry about keeping around dangling pointers to Values.
1332 class DeferredReplacement
{
1333 AssertingVH
<Instruction
> Old
;
1334 AssertingVH
<Instruction
> New
;
1335 bool IsDeoptimize
= false;
1337 DeferredReplacement() = default;
1340 static DeferredReplacement
createRAUW(Instruction
*Old
, Instruction
*New
) {
1341 assert(Old
!= New
&& Old
&& New
&&
1342 "Cannot RAUW equal values or to / from null!");
1344 DeferredReplacement D
;
1350 static DeferredReplacement
createDelete(Instruction
*ToErase
) {
1351 DeferredReplacement D
;
1356 static DeferredReplacement
createDeoptimizeReplacement(Instruction
*Old
) {
1358 auto *F
= cast
<CallInst
>(Old
)->getCalledFunction();
1359 assert(F
&& F
->getIntrinsicID() == Intrinsic::experimental_deoptimize
&&
1360 "Only way to construct a deoptimize deferred replacement");
1362 DeferredReplacement D
;
1364 D
.IsDeoptimize
= true;
1368 /// Does the task represented by this instance.
1369 void doReplacement() {
1370 Instruction
*OldI
= Old
;
1371 Instruction
*NewI
= New
;
1373 assert(OldI
!= NewI
&& "Disallowed at construction?!");
1374 assert((!IsDeoptimize
|| !New
) &&
1375 "Deoptimize intrinsics are not replaced!");
1381 OldI
->replaceAllUsesWith(NewI
);
1384 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1385 // not necessarily be followed by the matching return.
1386 auto *RI
= cast
<ReturnInst
>(OldI
->getParent()->getTerminator());
1387 new UnreachableInst(RI
->getContext(), RI
);
1388 RI
->eraseFromParent();
1391 OldI
->eraseFromParent();
1395 } // end anonymous namespace
1397 static StringRef
getDeoptLowering(CallBase
*Call
) {
1398 const char *DeoptLowering
= "deopt-lowering";
1399 if (Call
->hasFnAttr(DeoptLowering
)) {
1400 // FIXME: Calls have a *really* confusing interface around attributes
1402 const AttributeList
&CSAS
= Call
->getAttributes();
1403 if (CSAS
.hasAttribute(AttributeList::FunctionIndex
, DeoptLowering
))
1404 return CSAS
.getAttribute(AttributeList::FunctionIndex
, DeoptLowering
)
1405 .getValueAsString();
1406 Function
*F
= Call
->getCalledFunction();
1407 assert(F
&& F
->hasFnAttribute(DeoptLowering
));
1408 return F
->getFnAttribute(DeoptLowering
).getValueAsString();
1410 return "live-through";
1414 makeStatepointExplicitImpl(CallBase
*Call
, /* to replace */
1415 const SmallVectorImpl
<Value
*> &BasePtrs
,
1416 const SmallVectorImpl
<Value
*> &LiveVariables
,
1417 PartiallyConstructedSafepointRecord
&Result
,
1418 std::vector
<DeferredReplacement
> &Replacements
) {
1419 assert(BasePtrs
.size() == LiveVariables
.size());
1421 // Then go ahead and use the builder do actually do the inserts. We insert
1422 // immediately before the previous instruction under the assumption that all
1423 // arguments will be available here. We can't insert afterwards since we may
1424 // be replacing a terminator.
1425 IRBuilder
<> Builder(Call
);
1427 ArrayRef
<Value
*> GCArgs(LiveVariables
);
1428 uint64_t StatepointID
= StatepointDirectives::DefaultStatepointID
;
1429 uint32_t NumPatchBytes
= 0;
1430 uint32_t Flags
= uint32_t(StatepointFlags::None
);
1432 ArrayRef
<Use
> CallArgs(Call
->arg_begin(), Call
->arg_end());
1433 ArrayRef
<Use
> DeoptArgs
= GetDeoptBundleOperands(Call
);
1434 ArrayRef
<Use
> TransitionArgs
;
1435 if (auto TransitionBundle
=
1436 Call
->getOperandBundle(LLVMContext::OB_gc_transition
)) {
1437 Flags
|= uint32_t(StatepointFlags::GCTransition
);
1438 TransitionArgs
= TransitionBundle
->Inputs
;
1441 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1442 // with a return value, we lower then as never returning calls to
1443 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1444 bool IsDeoptimize
= false;
1446 StatepointDirectives SD
=
1447 parseStatepointDirectivesFromAttrs(Call
->getAttributes());
1448 if (SD
.NumPatchBytes
)
1449 NumPatchBytes
= *SD
.NumPatchBytes
;
1450 if (SD
.StatepointID
)
1451 StatepointID
= *SD
.StatepointID
;
1453 // Pass through the requested lowering if any. The default is live-through.
1454 StringRef DeoptLowering
= getDeoptLowering(Call
);
1455 if (DeoptLowering
.equals("live-in"))
1456 Flags
|= uint32_t(StatepointFlags::DeoptLiveIn
);
1458 assert(DeoptLowering
.equals("live-through") && "Unsupported value!");
1461 Value
*CallTarget
= Call
->getCalledValue();
1462 if (Function
*F
= dyn_cast
<Function
>(CallTarget
)) {
1463 if (F
->getIntrinsicID() == Intrinsic::experimental_deoptimize
) {
1464 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1465 // __llvm_deoptimize symbol. We want to resolve this now, since the
1466 // verifier does not allow taking the address of an intrinsic function.
1468 SmallVector
<Type
*, 8> DomainTy
;
1469 for (Value
*Arg
: CallArgs
)
1470 DomainTy
.push_back(Arg
->getType());
1471 auto *FTy
= FunctionType::get(Type::getVoidTy(F
->getContext()), DomainTy
,
1472 /* isVarArg = */ false);
1474 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1475 // calls to @llvm.experimental.deoptimize with different argument types in
1476 // the same module. This is fine -- we assume the frontend knew what it
1477 // was doing when generating this kind of IR.
1478 CallTarget
= F
->getParent()
1479 ->getOrInsertFunction("__llvm_deoptimize", FTy
)
1482 IsDeoptimize
= true;
1486 // Create the statepoint given all the arguments
1487 Instruction
*Token
= nullptr;
1488 if (auto *CI
= dyn_cast
<CallInst
>(Call
)) {
1489 CallInst
*SPCall
= Builder
.CreateGCStatepointCall(
1490 StatepointID
, NumPatchBytes
, CallTarget
, Flags
, CallArgs
,
1491 TransitionArgs
, DeoptArgs
, GCArgs
, "safepoint_token");
1493 SPCall
->setTailCallKind(CI
->getTailCallKind());
1494 SPCall
->setCallingConv(CI
->getCallingConv());
1496 // Currently we will fail on parameter attributes and on certain
1497 // function attributes. In case if we can handle this set of attributes -
1498 // set up function attrs directly on statepoint and return attrs later for
1499 // gc_result intrinsic.
1500 SPCall
->setAttributes(legalizeCallAttributes(CI
->getAttributes()));
1504 // Put the following gc_result and gc_relocate calls immediately after the
1505 // the old call (which we're about to delete)
1506 assert(CI
->getNextNode() && "Not a terminator, must have next!");
1507 Builder
.SetInsertPoint(CI
->getNextNode());
1508 Builder
.SetCurrentDebugLocation(CI
->getNextNode()->getDebugLoc());
1510 auto *II
= cast
<InvokeInst
>(Call
);
1512 // Insert the new invoke into the old block. We'll remove the old one in a
1513 // moment at which point this will become the new terminator for the
1515 InvokeInst
*SPInvoke
= Builder
.CreateGCStatepointInvoke(
1516 StatepointID
, NumPatchBytes
, CallTarget
, II
->getNormalDest(),
1517 II
->getUnwindDest(), Flags
, CallArgs
, TransitionArgs
, DeoptArgs
, GCArgs
,
1518 "statepoint_token");
1520 SPInvoke
->setCallingConv(II
->getCallingConv());
1522 // Currently we will fail on parameter attributes and on certain
1523 // function attributes. In case if we can handle this set of attributes -
1524 // set up function attrs directly on statepoint and return attrs later for
1525 // gc_result intrinsic.
1526 SPInvoke
->setAttributes(legalizeCallAttributes(II
->getAttributes()));
1530 // Generate gc relocates in exceptional path
1531 BasicBlock
*UnwindBlock
= II
->getUnwindDest();
1532 assert(!isa
<PHINode
>(UnwindBlock
->begin()) &&
1533 UnwindBlock
->getUniquePredecessor() &&
1534 "can't safely insert in this block!");
1536 Builder
.SetInsertPoint(&*UnwindBlock
->getFirstInsertionPt());
1537 Builder
.SetCurrentDebugLocation(II
->getDebugLoc());
1539 // Attach exceptional gc relocates to the landingpad.
1540 Instruction
*ExceptionalToken
= UnwindBlock
->getLandingPadInst();
1541 Result
.UnwindToken
= ExceptionalToken
;
1543 const unsigned LiveStartIdx
= Statepoint(Token
).gcArgsStartIdx();
1544 CreateGCRelocates(LiveVariables
, LiveStartIdx
, BasePtrs
, ExceptionalToken
,
1547 // Generate gc relocates and returns for normal block
1548 BasicBlock
*NormalDest
= II
->getNormalDest();
1549 assert(!isa
<PHINode
>(NormalDest
->begin()) &&
1550 NormalDest
->getUniquePredecessor() &&
1551 "can't safely insert in this block!");
1553 Builder
.SetInsertPoint(&*NormalDest
->getFirstInsertionPt());
1555 // gc relocates will be generated later as if it were regular call
1558 assert(Token
&& "Should be set in one of the above branches!");
1561 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1562 // transform the tail-call like structure to a call to a void function
1563 // followed by unreachable to get better codegen.
1564 Replacements
.push_back(
1565 DeferredReplacement::createDeoptimizeReplacement(Call
));
1567 Token
->setName("statepoint_token");
1568 if (!Call
->getType()->isVoidTy() && !Call
->use_empty()) {
1569 StringRef Name
= Call
->hasName() ? Call
->getName() : "";
1570 CallInst
*GCResult
= Builder
.CreateGCResult(Token
, Call
->getType(), Name
);
1571 GCResult
->setAttributes(
1572 AttributeList::get(GCResult
->getContext(), AttributeList::ReturnIndex
,
1573 Call
->getAttributes().getRetAttributes()));
1575 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1576 // live set of some other safepoint, in which case that safepoint's
1577 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1578 // llvm::Instruction. Instead, we defer the replacement and deletion to
1579 // after the live sets have been made explicit in the IR, and we no longer
1580 // have raw pointers to worry about.
1581 Replacements
.emplace_back(
1582 DeferredReplacement::createRAUW(Call
, GCResult
));
1584 Replacements
.emplace_back(DeferredReplacement::createDelete(Call
));
1588 Result
.StatepointToken
= Token
;
1590 // Second, create a gc.relocate for every live variable
1591 const unsigned LiveStartIdx
= Statepoint(Token
).gcArgsStartIdx();
1592 CreateGCRelocates(LiveVariables
, LiveStartIdx
, BasePtrs
, Token
, Builder
);
1595 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1596 // which make the relocations happening at this safepoint explicit.
1598 // WARNING: Does not do any fixup to adjust users of the original live
1599 // values. That's the callers responsibility.
1601 makeStatepointExplicit(DominatorTree
&DT
, CallBase
*Call
,
1602 PartiallyConstructedSafepointRecord
&Result
,
1603 std::vector
<DeferredReplacement
> &Replacements
) {
1604 const auto &LiveSet
= Result
.LiveSet
;
1605 const auto &PointerToBase
= Result
.PointerToBase
;
1607 // Convert to vector for efficient cross referencing.
1608 SmallVector
<Value
*, 64> BaseVec
, LiveVec
;
1609 LiveVec
.reserve(LiveSet
.size());
1610 BaseVec
.reserve(LiveSet
.size());
1611 for (Value
*L
: LiveSet
) {
1612 LiveVec
.push_back(L
);
1613 assert(PointerToBase
.count(L
));
1614 Value
*Base
= PointerToBase
.find(L
)->second
;
1615 BaseVec
.push_back(Base
);
1617 assert(LiveVec
.size() == BaseVec
.size());
1619 // Do the actual rewriting and delete the old statepoint
1620 makeStatepointExplicitImpl(Call
, BaseVec
, LiveVec
, Result
, Replacements
);
1623 // Helper function for the relocationViaAlloca.
1625 // It receives iterator to the statepoint gc relocates and emits a store to the
1626 // assigned location (via allocaMap) for the each one of them. It adds the
1627 // visited values into the visitedLiveValues set, which we will later use them
1628 // for sanity checking.
1630 insertRelocationStores(iterator_range
<Value::user_iterator
> GCRelocs
,
1631 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1632 DenseSet
<Value
*> &VisitedLiveValues
) {
1633 for (User
*U
: GCRelocs
) {
1634 GCRelocateInst
*Relocate
= dyn_cast
<GCRelocateInst
>(U
);
1638 Value
*OriginalValue
= Relocate
->getDerivedPtr();
1639 assert(AllocaMap
.count(OriginalValue
));
1640 Value
*Alloca
= AllocaMap
[OriginalValue
];
1642 // Emit store into the related alloca
1643 // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
1644 // the correct type according to alloca.
1645 assert(Relocate
->getNextNode() &&
1646 "Should always have one since it's not a terminator");
1647 IRBuilder
<> Builder(Relocate
->getNextNode());
1648 Value
*CastedRelocatedValue
=
1649 Builder
.CreateBitCast(Relocate
,
1650 cast
<AllocaInst
>(Alloca
)->getAllocatedType(),
1651 suffixed_name_or(Relocate
, ".casted", ""));
1653 StoreInst
*Store
= new StoreInst(CastedRelocatedValue
, Alloca
);
1654 Store
->insertAfter(cast
<Instruction
>(CastedRelocatedValue
));
1657 VisitedLiveValues
.insert(OriginalValue
);
1662 // Helper function for the "relocationViaAlloca". Similar to the
1663 // "insertRelocationStores" but works for rematerialized values.
1664 static void insertRematerializationStores(
1665 const RematerializedValueMapTy
&RematerializedValues
,
1666 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1667 DenseSet
<Value
*> &VisitedLiveValues
) {
1668 for (auto RematerializedValuePair
: RematerializedValues
) {
1669 Instruction
*RematerializedValue
= RematerializedValuePair
.first
;
1670 Value
*OriginalValue
= RematerializedValuePair
.second
;
1672 assert(AllocaMap
.count(OriginalValue
) &&
1673 "Can not find alloca for rematerialized value");
1674 Value
*Alloca
= AllocaMap
[OriginalValue
];
1676 StoreInst
*Store
= new StoreInst(RematerializedValue
, Alloca
);
1677 Store
->insertAfter(RematerializedValue
);
1680 VisitedLiveValues
.insert(OriginalValue
);
1685 /// Do all the relocation update via allocas and mem2reg
1686 static void relocationViaAlloca(
1687 Function
&F
, DominatorTree
&DT
, ArrayRef
<Value
*> Live
,
1688 ArrayRef
<PartiallyConstructedSafepointRecord
> Records
) {
1690 // record initial number of (static) allocas; we'll check we have the same
1691 // number when we get done.
1692 int InitialAllocaNum
= 0;
1693 for (Instruction
&I
: F
.getEntryBlock())
1694 if (isa
<AllocaInst
>(I
))
1698 // TODO-PERF: change data structures, reserve
1699 DenseMap
<Value
*, AllocaInst
*> AllocaMap
;
1700 SmallVector
<AllocaInst
*, 200> PromotableAllocas
;
1701 // Used later to chack that we have enough allocas to store all values
1702 std::size_t NumRematerializedValues
= 0;
1703 PromotableAllocas
.reserve(Live
.size());
1705 // Emit alloca for "LiveValue" and record it in "allocaMap" and
1706 // "PromotableAllocas"
1707 const DataLayout
&DL
= F
.getParent()->getDataLayout();
1708 auto emitAllocaFor
= [&](Value
*LiveValue
) {
1709 AllocaInst
*Alloca
= new AllocaInst(LiveValue
->getType(),
1710 DL
.getAllocaAddrSpace(), "",
1711 F
.getEntryBlock().getFirstNonPHI());
1712 AllocaMap
[LiveValue
] = Alloca
;
1713 PromotableAllocas
.push_back(Alloca
);
1716 // Emit alloca for each live gc pointer
1717 for (Value
*V
: Live
)
1720 // Emit allocas for rematerialized values
1721 for (const auto &Info
: Records
)
1722 for (auto RematerializedValuePair
: Info
.RematerializedValues
) {
1723 Value
*OriginalValue
= RematerializedValuePair
.second
;
1724 if (AllocaMap
.count(OriginalValue
) != 0)
1727 emitAllocaFor(OriginalValue
);
1728 ++NumRematerializedValues
;
1731 // The next two loops are part of the same conceptual operation. We need to
1732 // insert a store to the alloca after the original def and at each
1733 // redefinition. We need to insert a load before each use. These are split
1734 // into distinct loops for performance reasons.
1736 // Update gc pointer after each statepoint: either store a relocated value or
1737 // null (if no relocated value was found for this gc pointer and it is not a
1738 // gc_result). This must happen before we update the statepoint with load of
1739 // alloca otherwise we lose the link between statepoint and old def.
1740 for (const auto &Info
: Records
) {
1741 Value
*Statepoint
= Info
.StatepointToken
;
1743 // This will be used for consistency check
1744 DenseSet
<Value
*> VisitedLiveValues
;
1746 // Insert stores for normal statepoint gc relocates
1747 insertRelocationStores(Statepoint
->users(), AllocaMap
, VisitedLiveValues
);
1749 // In case if it was invoke statepoint
1750 // we will insert stores for exceptional path gc relocates.
1751 if (isa
<InvokeInst
>(Statepoint
)) {
1752 insertRelocationStores(Info
.UnwindToken
->users(), AllocaMap
,
1756 // Do similar thing with rematerialized values
1757 insertRematerializationStores(Info
.RematerializedValues
, AllocaMap
,
1760 if (ClobberNonLive
) {
1761 // As a debugging aid, pretend that an unrelocated pointer becomes null at
1762 // the gc.statepoint. This will turn some subtle GC problems into
1763 // slightly easier to debug SEGVs. Note that on large IR files with
1764 // lots of gc.statepoints this is extremely costly both memory and time
1766 SmallVector
<AllocaInst
*, 64> ToClobber
;
1767 for (auto Pair
: AllocaMap
) {
1768 Value
*Def
= Pair
.first
;
1769 AllocaInst
*Alloca
= Pair
.second
;
1771 // This value was relocated
1772 if (VisitedLiveValues
.count(Def
)) {
1775 ToClobber
.push_back(Alloca
);
1778 auto InsertClobbersAt
= [&](Instruction
*IP
) {
1779 for (auto *AI
: ToClobber
) {
1780 auto PT
= cast
<PointerType
>(AI
->getAllocatedType());
1781 Constant
*CPN
= ConstantPointerNull::get(PT
);
1782 StoreInst
*Store
= new StoreInst(CPN
, AI
);
1783 Store
->insertBefore(IP
);
1787 // Insert the clobbering stores. These may get intermixed with the
1788 // gc.results and gc.relocates, but that's fine.
1789 if (auto II
= dyn_cast
<InvokeInst
>(Statepoint
)) {
1790 InsertClobbersAt(&*II
->getNormalDest()->getFirstInsertionPt());
1791 InsertClobbersAt(&*II
->getUnwindDest()->getFirstInsertionPt());
1793 InsertClobbersAt(cast
<Instruction
>(Statepoint
)->getNextNode());
1798 // Update use with load allocas and add store for gc_relocated.
1799 for (auto Pair
: AllocaMap
) {
1800 Value
*Def
= Pair
.first
;
1801 AllocaInst
*Alloca
= Pair
.second
;
1803 // We pre-record the uses of allocas so that we dont have to worry about
1804 // later update that changes the user information..
1806 SmallVector
<Instruction
*, 20> Uses
;
1807 // PERF: trade a linear scan for repeated reallocation
1808 Uses
.reserve(Def
->getNumUses());
1809 for (User
*U
: Def
->users()) {
1810 if (!isa
<ConstantExpr
>(U
)) {
1811 // If the def has a ConstantExpr use, then the def is either a
1812 // ConstantExpr use itself or null. In either case
1813 // (recursively in the first, directly in the second), the oop
1814 // it is ultimately dependent on is null and this particular
1815 // use does not need to be fixed up.
1816 Uses
.push_back(cast
<Instruction
>(U
));
1821 auto Last
= std::unique(Uses
.begin(), Uses
.end());
1822 Uses
.erase(Last
, Uses
.end());
1824 for (Instruction
*Use
: Uses
) {
1825 if (isa
<PHINode
>(Use
)) {
1826 PHINode
*Phi
= cast
<PHINode
>(Use
);
1827 for (unsigned i
= 0; i
< Phi
->getNumIncomingValues(); i
++) {
1828 if (Def
== Phi
->getIncomingValue(i
)) {
1830 new LoadInst(Alloca
->getAllocatedType(), Alloca
, "",
1831 Phi
->getIncomingBlock(i
)->getTerminator());
1832 Phi
->setIncomingValue(i
, Load
);
1837 new LoadInst(Alloca
->getAllocatedType(), Alloca
, "", Use
);
1838 Use
->replaceUsesOfWith(Def
, Load
);
1842 // Emit store for the initial gc value. Store must be inserted after load,
1843 // otherwise store will be in alloca's use list and an extra load will be
1844 // inserted before it.
1845 StoreInst
*Store
= new StoreInst(Def
, Alloca
);
1846 if (Instruction
*Inst
= dyn_cast
<Instruction
>(Def
)) {
1847 if (InvokeInst
*Invoke
= dyn_cast
<InvokeInst
>(Inst
)) {
1848 // InvokeInst is a terminator so the store need to be inserted into its
1849 // normal destination block.
1850 BasicBlock
*NormalDest
= Invoke
->getNormalDest();
1851 Store
->insertBefore(NormalDest
->getFirstNonPHI());
1853 assert(!Inst
->isTerminator() &&
1854 "The only terminator that can produce a value is "
1855 "InvokeInst which is handled above.");
1856 Store
->insertAfter(Inst
);
1859 assert(isa
<Argument
>(Def
));
1860 Store
->insertAfter(cast
<Instruction
>(Alloca
));
1864 assert(PromotableAllocas
.size() == Live
.size() + NumRematerializedValues
&&
1865 "we must have the same allocas with lives");
1866 if (!PromotableAllocas
.empty()) {
1867 // Apply mem2reg to promote alloca to SSA
1868 PromoteMemToReg(PromotableAllocas
, DT
);
1872 for (auto &I
: F
.getEntryBlock())
1873 if (isa
<AllocaInst
>(I
))
1875 assert(InitialAllocaNum
== 0 && "We must not introduce any extra allocas");
1879 /// Implement a unique function which doesn't require we sort the input
1880 /// vector. Doing so has the effect of changing the output of a couple of
1881 /// tests in ways which make them less useful in testing fused safepoints.
1882 template <typename T
> static void unique_unsorted(SmallVectorImpl
<T
> &Vec
) {
1883 SmallSet
<T
, 8> Seen
;
1884 Vec
.erase(remove_if(Vec
, [&](const T
&V
) { return !Seen
.insert(V
).second
; }),
1888 /// Insert holders so that each Value is obviously live through the entire
1889 /// lifetime of the call.
1890 static void insertUseHolderAfter(CallBase
*Call
, const ArrayRef
<Value
*> Values
,
1891 SmallVectorImpl
<CallInst
*> &Holders
) {
1893 // No values to hold live, might as well not insert the empty holder
1896 Module
*M
= Call
->getModule();
1897 // Use a dummy vararg function to actually hold the values live
1898 FunctionCallee Func
= M
->getOrInsertFunction(
1899 "__tmp_use", FunctionType::get(Type::getVoidTy(M
->getContext()), true));
1900 if (isa
<CallInst
>(Call
)) {
1901 // For call safepoints insert dummy calls right after safepoint
1903 CallInst::Create(Func
, Values
, "", &*++Call
->getIterator()));
1906 // For invoke safepooints insert dummy calls both in normal and
1907 // exceptional destination blocks
1908 auto *II
= cast
<InvokeInst
>(Call
);
1909 Holders
.push_back(CallInst::Create(
1910 Func
, Values
, "", &*II
->getNormalDest()->getFirstInsertionPt()));
1911 Holders
.push_back(CallInst::Create(
1912 Func
, Values
, "", &*II
->getUnwindDest()->getFirstInsertionPt()));
1915 static void findLiveReferences(
1916 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
1917 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
) {
1918 GCPtrLivenessData OriginalLivenessData
;
1919 computeLiveInValues(DT
, F
, OriginalLivenessData
);
1920 for (size_t i
= 0; i
< records
.size(); i
++) {
1921 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
1922 analyzeParsePointLiveness(DT
, OriginalLivenessData
, toUpdate
[i
], info
);
1926 // Helper function for the "rematerializeLiveValues". It walks use chain
1927 // starting from the "CurrentValue" until it reaches the root of the chain, i.e.
1928 // the base or a value it cannot process. Only "simple" values are processed
1929 // (currently it is GEP's and casts). The returned root is examined by the
1930 // callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
1931 // with all visited values.
1932 static Value
* findRematerializableChainToBasePointer(
1933 SmallVectorImpl
<Instruction
*> &ChainToBase
,
1934 Value
*CurrentValue
) {
1935 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(CurrentValue
)) {
1936 ChainToBase
.push_back(GEP
);
1937 return findRematerializableChainToBasePointer(ChainToBase
,
1938 GEP
->getPointerOperand());
1941 if (CastInst
*CI
= dyn_cast
<CastInst
>(CurrentValue
)) {
1942 if (!CI
->isNoopCast(CI
->getModule()->getDataLayout()))
1945 ChainToBase
.push_back(CI
);
1946 return findRematerializableChainToBasePointer(ChainToBase
,
1950 // We have reached the root of the chain, which is either equal to the base or
1951 // is the first unsupported value along the use chain.
1952 return CurrentValue
;
1955 // Helper function for the "rematerializeLiveValues". Compute cost of the use
1956 // chain we are going to rematerialize.
1958 chainToBasePointerCost(SmallVectorImpl
<Instruction
*> &Chain
,
1959 TargetTransformInfo
&TTI
) {
1962 for (Instruction
*Instr
: Chain
) {
1963 if (CastInst
*CI
= dyn_cast
<CastInst
>(Instr
)) {
1964 assert(CI
->isNoopCast(CI
->getModule()->getDataLayout()) &&
1965 "non noop cast is found during rematerialization");
1967 Type
*SrcTy
= CI
->getOperand(0)->getType();
1968 Cost
+= TTI
.getCastInstrCost(CI
->getOpcode(), CI
->getType(), SrcTy
, CI
);
1970 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Instr
)) {
1971 // Cost of the address calculation
1972 Type
*ValTy
= GEP
->getSourceElementType();
1973 Cost
+= TTI
.getAddressComputationCost(ValTy
);
1975 // And cost of the GEP itself
1976 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
1977 // allowed for the external usage)
1978 if (!GEP
->hasAllConstantIndices())
1982 llvm_unreachable("unsupported instruction type during rematerialization");
1989 static bool AreEquivalentPhiNodes(PHINode
&OrigRootPhi
, PHINode
&AlternateRootPhi
) {
1990 unsigned PhiNum
= OrigRootPhi
.getNumIncomingValues();
1991 if (PhiNum
!= AlternateRootPhi
.getNumIncomingValues() ||
1992 OrigRootPhi
.getParent() != AlternateRootPhi
.getParent())
1994 // Map of incoming values and their corresponding basic blocks of
1996 SmallDenseMap
<Value
*, BasicBlock
*, 8> CurrentIncomingValues
;
1997 for (unsigned i
= 0; i
< PhiNum
; i
++)
1998 CurrentIncomingValues
[OrigRootPhi
.getIncomingValue(i
)] =
1999 OrigRootPhi
.getIncomingBlock(i
);
2001 // Both current and base PHIs should have same incoming values and
2002 // the same basic blocks corresponding to the incoming values.
2003 for (unsigned i
= 0; i
< PhiNum
; i
++) {
2005 CurrentIncomingValues
.find(AlternateRootPhi
.getIncomingValue(i
));
2006 if (CIVI
== CurrentIncomingValues
.end())
2008 BasicBlock
*CurrentIncomingBB
= CIVI
->second
;
2009 if (CurrentIncomingBB
!= AlternateRootPhi
.getIncomingBlock(i
))
2015 // From the statepoint live set pick values that are cheaper to recompute then
2016 // to relocate. Remove this values from the live set, rematerialize them after
2017 // statepoint and record them in "Info" structure. Note that similar to
2018 // relocated values we don't do any user adjustments here.
2019 static void rematerializeLiveValues(CallBase
*Call
,
2020 PartiallyConstructedSafepointRecord
&Info
,
2021 TargetTransformInfo
&TTI
) {
2022 const unsigned int ChainLengthThreshold
= 10;
2024 // Record values we are going to delete from this statepoint live set.
2025 // We can not di this in following loop due to iterator invalidation.
2026 SmallVector
<Value
*, 32> LiveValuesToBeDeleted
;
2028 for (Value
*LiveValue
: Info
.LiveSet
) {
2029 // For each live pointer find its defining chain
2030 SmallVector
<Instruction
*, 3> ChainToBase
;
2031 assert(Info
.PointerToBase
.count(LiveValue
));
2032 Value
*RootOfChain
=
2033 findRematerializableChainToBasePointer(ChainToBase
,
2036 // Nothing to do, or chain is too long
2037 if ( ChainToBase
.size() == 0 ||
2038 ChainToBase
.size() > ChainLengthThreshold
)
2041 // Handle the scenario where the RootOfChain is not equal to the
2042 // Base Value, but they are essentially the same phi values.
2043 if (RootOfChain
!= Info
.PointerToBase
[LiveValue
]) {
2044 PHINode
*OrigRootPhi
= dyn_cast
<PHINode
>(RootOfChain
);
2045 PHINode
*AlternateRootPhi
= dyn_cast
<PHINode
>(Info
.PointerToBase
[LiveValue
]);
2046 if (!OrigRootPhi
|| !AlternateRootPhi
)
2048 // PHI nodes that have the same incoming values, and belonging to the same
2049 // basic blocks are essentially the same SSA value. When the original phi
2050 // has incoming values with different base pointers, the original phi is
2051 // marked as conflict, and an additional `AlternateRootPhi` with the same
2052 // incoming values get generated by the findBasePointer function. We need
2053 // to identify the newly generated AlternateRootPhi (.base version of phi)
2054 // and RootOfChain (the original phi node itself) are the same, so that we
2055 // can rematerialize the gep and casts. This is a workaround for the
2056 // deficiency in the findBasePointer algorithm.
2057 if (!AreEquivalentPhiNodes(*OrigRootPhi
, *AlternateRootPhi
))
2059 // Now that the phi nodes are proved to be the same, assert that
2060 // findBasePointer's newly generated AlternateRootPhi is present in the
2061 // liveset of the call.
2062 assert(Info
.LiveSet
.count(AlternateRootPhi
));
2064 // Compute cost of this chain
2065 unsigned Cost
= chainToBasePointerCost(ChainToBase
, TTI
);
2066 // TODO: We can also account for cases when we will be able to remove some
2067 // of the rematerialized values by later optimization passes. I.e if
2068 // we rematerialized several intersecting chains. Or if original values
2069 // don't have any uses besides this statepoint.
2071 // For invokes we need to rematerialize each chain twice - for normal and
2072 // for unwind basic blocks. Model this by multiplying cost by two.
2073 if (isa
<InvokeInst
>(Call
)) {
2076 // If it's too expensive - skip it
2077 if (Cost
>= RematerializationThreshold
)
2080 // Remove value from the live set
2081 LiveValuesToBeDeleted
.push_back(LiveValue
);
2083 // Clone instructions and record them inside "Info" structure
2085 // Walk backwards to visit top-most instructions first
2086 std::reverse(ChainToBase
.begin(), ChainToBase
.end());
2088 // Utility function which clones all instructions from "ChainToBase"
2089 // and inserts them before "InsertBefore". Returns rematerialized value
2090 // which should be used after statepoint.
2091 auto rematerializeChain
= [&ChainToBase
](
2092 Instruction
*InsertBefore
, Value
*RootOfChain
, Value
*AlternateLiveBase
) {
2093 Instruction
*LastClonedValue
= nullptr;
2094 Instruction
*LastValue
= nullptr;
2095 for (Instruction
*Instr
: ChainToBase
) {
2096 // Only GEP's and casts are supported as we need to be careful to not
2097 // introduce any new uses of pointers not in the liveset.
2098 // Note that it's fine to introduce new uses of pointers which were
2099 // otherwise not used after this statepoint.
2100 assert(isa
<GetElementPtrInst
>(Instr
) || isa
<CastInst
>(Instr
));
2102 Instruction
*ClonedValue
= Instr
->clone();
2103 ClonedValue
->insertBefore(InsertBefore
);
2104 ClonedValue
->setName(Instr
->getName() + ".remat");
2106 // If it is not first instruction in the chain then it uses previously
2107 // cloned value. We should update it to use cloned value.
2108 if (LastClonedValue
) {
2110 ClonedValue
->replaceUsesOfWith(LastValue
, LastClonedValue
);
2112 for (auto OpValue
: ClonedValue
->operand_values()) {
2113 // Assert that cloned instruction does not use any instructions from
2114 // this chain other than LastClonedValue
2115 assert(!is_contained(ChainToBase
, OpValue
) &&
2116 "incorrect use in rematerialization chain");
2117 // Assert that the cloned instruction does not use the RootOfChain
2118 // or the AlternateLiveBase.
2119 assert(OpValue
!= RootOfChain
&& OpValue
!= AlternateLiveBase
);
2123 // For the first instruction, replace the use of unrelocated base i.e.
2124 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
2125 // live set. They have been proved to be the same PHI nodes. Note
2126 // that the *only* use of the RootOfChain in the ChainToBase list is
2127 // the first Value in the list.
2128 if (RootOfChain
!= AlternateLiveBase
)
2129 ClonedValue
->replaceUsesOfWith(RootOfChain
, AlternateLiveBase
);
2132 LastClonedValue
= ClonedValue
;
2135 assert(LastClonedValue
);
2136 return LastClonedValue
;
2139 // Different cases for calls and invokes. For invokes we need to clone
2140 // instructions both on normal and unwind path.
2141 if (isa
<CallInst
>(Call
)) {
2142 Instruction
*InsertBefore
= Call
->getNextNode();
2143 assert(InsertBefore
);
2144 Instruction
*RematerializedValue
= rematerializeChain(
2145 InsertBefore
, RootOfChain
, Info
.PointerToBase
[LiveValue
]);
2146 Info
.RematerializedValues
[RematerializedValue
] = LiveValue
;
2148 auto *Invoke
= cast
<InvokeInst
>(Call
);
2150 Instruction
*NormalInsertBefore
=
2151 &*Invoke
->getNormalDest()->getFirstInsertionPt();
2152 Instruction
*UnwindInsertBefore
=
2153 &*Invoke
->getUnwindDest()->getFirstInsertionPt();
2155 Instruction
*NormalRematerializedValue
= rematerializeChain(
2156 NormalInsertBefore
, RootOfChain
, Info
.PointerToBase
[LiveValue
]);
2157 Instruction
*UnwindRematerializedValue
= rematerializeChain(
2158 UnwindInsertBefore
, RootOfChain
, Info
.PointerToBase
[LiveValue
]);
2160 Info
.RematerializedValues
[NormalRematerializedValue
] = LiveValue
;
2161 Info
.RematerializedValues
[UnwindRematerializedValue
] = LiveValue
;
2165 // Remove rematerializaed values from the live set
2166 for (auto LiveValue
: LiveValuesToBeDeleted
) {
2167 Info
.LiveSet
.remove(LiveValue
);
2171 static bool insertParsePoints(Function
&F
, DominatorTree
&DT
,
2172 TargetTransformInfo
&TTI
,
2173 SmallVectorImpl
<CallBase
*> &ToUpdate
) {
2175 // sanity check the input
2176 std::set
<CallBase
*> Uniqued
;
2177 Uniqued
.insert(ToUpdate
.begin(), ToUpdate
.end());
2178 assert(Uniqued
.size() == ToUpdate
.size() && "no duplicates please!");
2180 for (CallBase
*Call
: ToUpdate
)
2181 assert(Call
->getFunction() == &F
);
2184 // When inserting gc.relocates for invokes, we need to be able to insert at
2185 // the top of the successor blocks. See the comment on
2186 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2187 // may restructure the CFG.
2188 for (CallBase
*Call
: ToUpdate
) {
2189 auto *II
= dyn_cast
<InvokeInst
>(Call
);
2192 normalizeForInvokeSafepoint(II
->getNormalDest(), II
->getParent(), DT
);
2193 normalizeForInvokeSafepoint(II
->getUnwindDest(), II
->getParent(), DT
);
2196 // A list of dummy calls added to the IR to keep various values obviously
2197 // live in the IR. We'll remove all of these when done.
2198 SmallVector
<CallInst
*, 64> Holders
;
2200 // Insert a dummy call with all of the deopt operands we'll need for the
2201 // actual safepoint insertion as arguments. This ensures reference operands
2202 // in the deopt argument list are considered live through the safepoint (and
2203 // thus makes sure they get relocated.)
2204 for (CallBase
*Call
: ToUpdate
) {
2205 SmallVector
<Value
*, 64> DeoptValues
;
2207 for (Value
*Arg
: GetDeoptBundleOperands(Call
)) {
2208 assert(!isUnhandledGCPointerType(Arg
->getType()) &&
2209 "support for FCA unimplemented");
2210 if (isHandledGCPointerType(Arg
->getType()))
2211 DeoptValues
.push_back(Arg
);
2214 insertUseHolderAfter(Call
, DeoptValues
, Holders
);
2217 SmallVector
<PartiallyConstructedSafepointRecord
, 64> Records(ToUpdate
.size());
2219 // A) Identify all gc pointers which are statically live at the given call
2221 findLiveReferences(F
, DT
, ToUpdate
, Records
);
2223 // B) Find the base pointers for each live pointer
2224 /* scope for caching */ {
2225 // Cache the 'defining value' relation used in the computation and
2226 // insertion of base phis and selects. This ensures that we don't insert
2227 // large numbers of duplicate base_phis.
2228 DefiningValueMapTy DVCache
;
2230 for (size_t i
= 0; i
< Records
.size(); i
++) {
2231 PartiallyConstructedSafepointRecord
&info
= Records
[i
];
2232 findBasePointers(DT
, DVCache
, ToUpdate
[i
], info
);
2234 } // end of cache scope
2236 // The base phi insertion logic (for any safepoint) may have inserted new
2237 // instructions which are now live at some safepoint. The simplest such
2240 // phi a <-- will be a new base_phi here
2241 // safepoint 1 <-- that needs to be live here
2245 // We insert some dummy calls after each safepoint to definitely hold live
2246 // the base pointers which were identified for that safepoint. We'll then
2247 // ask liveness for _every_ base inserted to see what is now live. Then we
2248 // remove the dummy calls.
2249 Holders
.reserve(Holders
.size() + Records
.size());
2250 for (size_t i
= 0; i
< Records
.size(); i
++) {
2251 PartiallyConstructedSafepointRecord
&Info
= Records
[i
];
2253 SmallVector
<Value
*, 128> Bases
;
2254 for (auto Pair
: Info
.PointerToBase
)
2255 Bases
.push_back(Pair
.second
);
2257 insertUseHolderAfter(ToUpdate
[i
], Bases
, Holders
);
2260 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2261 // need to rerun liveness. We may *also* have inserted new defs, but that's
2262 // not the key issue.
2263 recomputeLiveInValues(F
, DT
, ToUpdate
, Records
);
2265 if (PrintBasePointers
) {
2266 for (auto &Info
: Records
) {
2267 errs() << "Base Pairs: (w/Relocation)\n";
2268 for (auto Pair
: Info
.PointerToBase
) {
2269 errs() << " derived ";
2270 Pair
.first
->printAsOperand(errs(), false);
2272 Pair
.second
->printAsOperand(errs(), false);
2278 // It is possible that non-constant live variables have a constant base. For
2279 // example, a GEP with a variable offset from a global. In this case we can
2280 // remove it from the liveset. We already don't add constants to the liveset
2281 // because we assume they won't move at runtime and the GC doesn't need to be
2282 // informed about them. The same reasoning applies if the base is constant.
2283 // Note that the relocation placement code relies on this filtering for
2284 // correctness as it expects the base to be in the liveset, which isn't true
2285 // if the base is constant.
2286 for (auto &Info
: Records
)
2287 for (auto &BasePair
: Info
.PointerToBase
)
2288 if (isa
<Constant
>(BasePair
.second
))
2289 Info
.LiveSet
.remove(BasePair
.first
);
2291 for (CallInst
*CI
: Holders
)
2292 CI
->eraseFromParent();
2296 // In order to reduce live set of statepoint we might choose to rematerialize
2297 // some values instead of relocating them. This is purely an optimization and
2298 // does not influence correctness.
2299 for (size_t i
= 0; i
< Records
.size(); i
++)
2300 rematerializeLiveValues(ToUpdate
[i
], Records
[i
], TTI
);
2302 // We need this to safely RAUW and delete call or invoke return values that
2303 // may themselves be live over a statepoint. For details, please see usage in
2304 // makeStatepointExplicitImpl.
2305 std::vector
<DeferredReplacement
> Replacements
;
2307 // Now run through and replace the existing statepoints with new ones with
2308 // the live variables listed. We do not yet update uses of the values being
2309 // relocated. We have references to live variables that need to
2310 // survive to the last iteration of this loop. (By construction, the
2311 // previous statepoint can not be a live variable, thus we can and remove
2312 // the old statepoint calls as we go.)
2313 for (size_t i
= 0; i
< Records
.size(); i
++)
2314 makeStatepointExplicit(DT
, ToUpdate
[i
], Records
[i
], Replacements
);
2316 ToUpdate
.clear(); // prevent accident use of invalid calls.
2318 for (auto &PR
: Replacements
)
2321 Replacements
.clear();
2323 for (auto &Info
: Records
) {
2324 // These live sets may contain state Value pointers, since we replaced calls
2325 // with operand bundles with calls wrapped in gc.statepoint, and some of
2326 // those calls may have been def'ing live gc pointers. Clear these out to
2327 // avoid accidentally using them.
2329 // TODO: We should create a separate data structure that does not contain
2330 // these live sets, and migrate to using that data structure from this point
2332 Info
.LiveSet
.clear();
2333 Info
.PointerToBase
.clear();
2336 // Do all the fixups of the original live variables to their relocated selves
2337 SmallVector
<Value
*, 128> Live
;
2338 for (size_t i
= 0; i
< Records
.size(); i
++) {
2339 PartiallyConstructedSafepointRecord
&Info
= Records
[i
];
2341 // We can't simply save the live set from the original insertion. One of
2342 // the live values might be the result of a call which needs a safepoint.
2343 // That Value* no longer exists and we need to use the new gc_result.
2344 // Thankfully, the live set is embedded in the statepoint (and updated), so
2345 // we just grab that.
2346 Statepoint
Statepoint(Info
.StatepointToken
);
2347 Live
.insert(Live
.end(), Statepoint
.gc_args_begin(),
2348 Statepoint
.gc_args_end());
2350 // Do some basic sanity checks on our liveness results before performing
2351 // relocation. Relocation can and will turn mistakes in liveness results
2352 // into non-sensical code which is must harder to debug.
2353 // TODO: It would be nice to test consistency as well
2354 assert(DT
.isReachableFromEntry(Info
.StatepointToken
->getParent()) &&
2355 "statepoint must be reachable or liveness is meaningless");
2356 for (Value
*V
: Statepoint
.gc_args()) {
2357 if (!isa
<Instruction
>(V
))
2358 // Non-instruction values trivial dominate all possible uses
2360 auto *LiveInst
= cast
<Instruction
>(V
);
2361 assert(DT
.isReachableFromEntry(LiveInst
->getParent()) &&
2362 "unreachable values should never be live");
2363 assert(DT
.dominates(LiveInst
, Info
.StatepointToken
) &&
2364 "basic SSA liveness expectation violated by liveness analysis");
2368 unique_unsorted(Live
);
2372 for (auto *Ptr
: Live
)
2373 assert(isHandledGCPointerType(Ptr
->getType()) &&
2374 "must be a gc pointer type");
2377 relocationViaAlloca(F
, DT
, Live
, Records
);
2378 return !Records
.empty();
2381 // Handles both return values and arguments for Functions and calls.
2382 template <typename AttrHolder
>
2383 static void RemoveNonValidAttrAtIndex(LLVMContext
&Ctx
, AttrHolder
&AH
,
2386 if (AH
.getDereferenceableBytes(Index
))
2387 R
.addAttribute(Attribute::get(Ctx
, Attribute::Dereferenceable
,
2388 AH
.getDereferenceableBytes(Index
)));
2389 if (AH
.getDereferenceableOrNullBytes(Index
))
2390 R
.addAttribute(Attribute::get(Ctx
, Attribute::DereferenceableOrNull
,
2391 AH
.getDereferenceableOrNullBytes(Index
)));
2392 if (AH
.getAttributes().hasAttribute(Index
, Attribute::NoAlias
))
2393 R
.addAttribute(Attribute::NoAlias
);
2396 AH
.setAttributes(AH
.getAttributes().removeAttributes(Ctx
, Index
, R
));
2399 static void stripNonValidAttributesFromPrototype(Function
&F
) {
2400 LLVMContext
&Ctx
= F
.getContext();
2402 for (Argument
&A
: F
.args())
2403 if (isa
<PointerType
>(A
.getType()))
2404 RemoveNonValidAttrAtIndex(Ctx
, F
,
2405 A
.getArgNo() + AttributeList::FirstArgIndex
);
2407 if (isa
<PointerType
>(F
.getReturnType()))
2408 RemoveNonValidAttrAtIndex(Ctx
, F
, AttributeList::ReturnIndex
);
2411 /// Certain metadata on instructions are invalid after running RS4GC.
2412 /// Optimizations that run after RS4GC can incorrectly use this metadata to
2413 /// optimize functions. We drop such metadata on the instruction.
2414 static void stripInvalidMetadataFromInstruction(Instruction
&I
) {
2415 if (!isa
<LoadInst
>(I
) && !isa
<StoreInst
>(I
))
2417 // These are the attributes that are still valid on loads and stores after
2419 // The metadata implying dereferenceability and noalias are (conservatively)
2420 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2421 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2422 // touch the entire heap including noalias objects. Note: The reasoning is
2423 // same as stripping the dereferenceability and noalias attributes that are
2424 // analogous to the metadata counterparts.
2425 // We also drop the invariant.load metadata on the load because that metadata
2426 // implies the address operand to the load points to memory that is never
2427 // changed once it became dereferenceable. This is no longer true after RS4GC.
2428 // Similar reasoning applies to invariant.group metadata, which applies to
2429 // loads within a group.
2430 unsigned ValidMetadataAfterRS4GC
[] = {LLVMContext::MD_tbaa
,
2431 LLVMContext::MD_range
,
2432 LLVMContext::MD_alias_scope
,
2433 LLVMContext::MD_nontemporal
,
2434 LLVMContext::MD_nonnull
,
2435 LLVMContext::MD_align
,
2436 LLVMContext::MD_type
};
2438 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2439 I
.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC
);
2442 static void stripNonValidDataFromBody(Function
&F
) {
2446 LLVMContext
&Ctx
= F
.getContext();
2447 MDBuilder
Builder(Ctx
);
2449 // Set of invariantstart instructions that we need to remove.
2450 // Use this to avoid invalidating the instruction iterator.
2451 SmallVector
<IntrinsicInst
*, 12> InvariantStartInstructions
;
2453 for (Instruction
&I
: instructions(F
)) {
2454 // invariant.start on memory location implies that the referenced memory
2455 // location is constant and unchanging. This is no longer true after
2456 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2457 // which frees the entire heap and the presence of invariant.start allows
2458 // the optimizer to sink the load of a memory location past a statepoint,
2459 // which is incorrect.
2460 if (auto *II
= dyn_cast
<IntrinsicInst
>(&I
))
2461 if (II
->getIntrinsicID() == Intrinsic::invariant_start
) {
2462 InvariantStartInstructions
.push_back(II
);
2466 if (MDNode
*Tag
= I
.getMetadata(LLVMContext::MD_tbaa
)) {
2467 MDNode
*MutableTBAA
= Builder
.createMutableTBAAAccessTag(Tag
);
2468 I
.setMetadata(LLVMContext::MD_tbaa
, MutableTBAA
);
2471 stripInvalidMetadataFromInstruction(I
);
2473 if (auto *Call
= dyn_cast
<CallBase
>(&I
)) {
2474 for (int i
= 0, e
= Call
->arg_size(); i
!= e
; i
++)
2475 if (isa
<PointerType
>(Call
->getArgOperand(i
)->getType()))
2476 RemoveNonValidAttrAtIndex(Ctx
, *Call
,
2477 i
+ AttributeList::FirstArgIndex
);
2478 if (isa
<PointerType
>(Call
->getType()))
2479 RemoveNonValidAttrAtIndex(Ctx
, *Call
, AttributeList::ReturnIndex
);
2483 // Delete the invariant.start instructions and RAUW undef.
2484 for (auto *II
: InvariantStartInstructions
) {
2485 II
->replaceAllUsesWith(UndefValue::get(II
->getType()));
2486 II
->eraseFromParent();
2490 /// Returns true if this function should be rewritten by this pass. The main
2491 /// point of this function is as an extension point for custom logic.
2492 static bool shouldRewriteStatepointsIn(Function
&F
) {
2493 // TODO: This should check the GCStrategy
2495 const auto &FunctionGCName
= F
.getGC();
2496 const StringRef
StatepointExampleName("statepoint-example");
2497 const StringRef
CoreCLRName("coreclr");
2498 return (StatepointExampleName
== FunctionGCName
) ||
2499 (CoreCLRName
== FunctionGCName
);
2504 static void stripNonValidData(Module
&M
) {
2506 assert(llvm::any_of(M
, shouldRewriteStatepointsIn
) && "precondition!");
2509 for (Function
&F
: M
)
2510 stripNonValidAttributesFromPrototype(F
);
2512 for (Function
&F
: M
)
2513 stripNonValidDataFromBody(F
);
2516 bool RewriteStatepointsForGC::runOnFunction(Function
&F
, DominatorTree
&DT
,
2517 TargetTransformInfo
&TTI
,
2518 const TargetLibraryInfo
&TLI
) {
2519 assert(!F
.isDeclaration() && !F
.empty() &&
2520 "need function body to rewrite statepoints in");
2521 assert(shouldRewriteStatepointsIn(F
) && "mismatch in rewrite decision");
2523 auto NeedsRewrite
= [&TLI
](Instruction
&I
) {
2524 if (const auto *Call
= dyn_cast
<CallBase
>(&I
))
2525 return !callsGCLeafFunction(Call
, TLI
) && !isStatepoint(Call
);
2529 // Delete any unreachable statepoints so that we don't have unrewritten
2530 // statepoints surviving this pass. This makes testing easier and the
2531 // resulting IR less confusing to human readers.
2532 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Lazy
);
2533 bool MadeChange
= removeUnreachableBlocks(F
, &DTU
);
2534 // Flush the Dominator Tree.
2537 // Gather all the statepoints which need rewritten. Be careful to only
2538 // consider those in reachable code since we need to ask dominance queries
2539 // when rewriting. We'll delete the unreachable ones in a moment.
2540 SmallVector
<CallBase
*, 64> ParsePointNeeded
;
2541 for (Instruction
&I
: instructions(F
)) {
2542 // TODO: only the ones with the flag set!
2543 if (NeedsRewrite(I
)) {
2544 // NOTE removeUnreachableBlocks() is stronger than
2545 // DominatorTree::isReachableFromEntry(). In other words
2546 // removeUnreachableBlocks can remove some blocks for which
2547 // isReachableFromEntry() returns true.
2548 assert(DT
.isReachableFromEntry(I
.getParent()) &&
2549 "no unreachable blocks expected");
2550 ParsePointNeeded
.push_back(cast
<CallBase
>(&I
));
2554 // Return early if no work to do.
2555 if (ParsePointNeeded
.empty())
2558 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
2559 // These are created by LCSSA. They have the effect of increasing the size
2560 // of liveness sets for no good reason. It may be harder to do this post
2561 // insertion since relocations and base phis can confuse things.
2562 for (BasicBlock
&BB
: F
)
2563 if (BB
.getUniquePredecessor()) {
2565 FoldSingleEntryPHINodes(&BB
);
2568 // Before we start introducing relocations, we want to tweak the IR a bit to
2569 // avoid unfortunate code generation effects. The main example is that we
2570 // want to try to make sure the comparison feeding a branch is after any
2571 // safepoints. Otherwise, we end up with a comparison of pre-relocation
2572 // values feeding a branch after relocation. This is semantically correct,
2573 // but results in extra register pressure since both the pre-relocation and
2574 // post-relocation copies must be available in registers. For code without
2575 // relocations this is handled elsewhere, but teaching the scheduler to
2576 // reverse the transform we're about to do would be slightly complex.
2577 // Note: This may extend the live range of the inputs to the icmp and thus
2578 // increase the liveset of any statepoint we move over. This is profitable
2579 // as long as all statepoints are in rare blocks. If we had in-register
2580 // lowering for live values this would be a much safer transform.
2581 auto getConditionInst
= [](Instruction
*TI
) -> Instruction
* {
2582 if (auto *BI
= dyn_cast
<BranchInst
>(TI
))
2583 if (BI
->isConditional())
2584 return dyn_cast
<Instruction
>(BI
->getCondition());
2585 // TODO: Extend this to handle switches
2588 for (BasicBlock
&BB
: F
) {
2589 Instruction
*TI
= BB
.getTerminator();
2590 if (auto *Cond
= getConditionInst(TI
))
2591 // TODO: Handle more than just ICmps here. We should be able to move
2592 // most instructions without side effects or memory access.
2593 if (isa
<ICmpInst
>(Cond
) && Cond
->hasOneUse()) {
2595 Cond
->moveBefore(TI
);
2599 // Nasty workaround - The base computation code in the main algorithm doesn't
2600 // consider the fact that a GEP can be used to convert a scalar to a vector.
2601 // The right fix for this is to integrate GEPs into the base rewriting
2602 // algorithm properly, this is just a short term workaround to prevent
2603 // crashes by canonicalizing such GEPs into fully vector GEPs.
2604 for (Instruction
&I
: instructions(F
)) {
2605 if (!isa
<GetElementPtrInst
>(I
))
2609 for (unsigned i
= 0; i
< I
.getNumOperands(); i
++)
2610 if (I
.getOperand(i
)->getType()->isVectorTy()) {
2612 VF
== I
.getOperand(i
)->getType()->getVectorNumElements());
2613 VF
= I
.getOperand(i
)->getType()->getVectorNumElements();
2616 // It's the vector to scalar traversal through the pointer operand which
2617 // confuses base pointer rewriting, so limit ourselves to that case.
2618 if (!I
.getOperand(0)->getType()->isVectorTy() && VF
!= 0) {
2620 auto *Splat
= B
.CreateVectorSplat(VF
, I
.getOperand(0));
2621 I
.setOperand(0, Splat
);
2626 MadeChange
|= insertParsePoints(F
, DT
, TTI
, ParsePointNeeded
);
2630 // liveness computation via standard dataflow
2631 // -------------------------------------------------------------------
2633 // TODO: Consider using bitvectors for liveness, the set of potentially
2634 // interesting values should be small and easy to pre-compute.
2636 /// Compute the live-in set for the location rbegin starting from
2637 /// the live-out set of the basic block
2638 static void computeLiveInValues(BasicBlock::reverse_iterator Begin
,
2639 BasicBlock::reverse_iterator End
,
2640 SetVector
<Value
*> &LiveTmp
) {
2641 for (auto &I
: make_range(Begin
, End
)) {
2642 // KILL/Def - Remove this definition from LiveIn
2645 // Don't consider *uses* in PHI nodes, we handle their contribution to
2646 // predecessor blocks when we seed the LiveOut sets
2647 if (isa
<PHINode
>(I
))
2650 // USE - Add to the LiveIn set for this instruction
2651 for (Value
*V
: I
.operands()) {
2652 assert(!isUnhandledGCPointerType(V
->getType()) &&
2653 "support for FCA unimplemented");
2654 if (isHandledGCPointerType(V
->getType()) && !isa
<Constant
>(V
)) {
2655 // The choice to exclude all things constant here is slightly subtle.
2656 // There are two independent reasons:
2657 // - We assume that things which are constant (from LLVM's definition)
2658 // do not move at runtime. For example, the address of a global
2659 // variable is fixed, even though it's contents may not be.
2660 // - Second, we can't disallow arbitrary inttoptr constants even
2661 // if the language frontend does. Optimization passes are free to
2662 // locally exploit facts without respect to global reachability. This
2663 // can create sections of code which are dynamically unreachable and
2664 // contain just about anything. (see constants.ll in tests)
2671 static void computeLiveOutSeed(BasicBlock
*BB
, SetVector
<Value
*> &LiveTmp
) {
2672 for (BasicBlock
*Succ
: successors(BB
)) {
2673 for (auto &I
: *Succ
) {
2674 PHINode
*PN
= dyn_cast
<PHINode
>(&I
);
2678 Value
*V
= PN
->getIncomingValueForBlock(BB
);
2679 assert(!isUnhandledGCPointerType(V
->getType()) &&
2680 "support for FCA unimplemented");
2681 if (isHandledGCPointerType(V
->getType()) && !isa
<Constant
>(V
))
2687 static SetVector
<Value
*> computeKillSet(BasicBlock
*BB
) {
2688 SetVector
<Value
*> KillSet
;
2689 for (Instruction
&I
: *BB
)
2690 if (isHandledGCPointerType(I
.getType()))
2696 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic
2697 /// sanity check for the liveness computation.
2698 static void checkBasicSSA(DominatorTree
&DT
, SetVector
<Value
*> &Live
,
2699 Instruction
*TI
, bool TermOkay
= false) {
2700 for (Value
*V
: Live
) {
2701 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
2702 // The terminator can be a member of the LiveOut set. LLVM's definition
2703 // of instruction dominance states that V does not dominate itself. As
2704 // such, we need to special case this to allow it.
2705 if (TermOkay
&& TI
== I
)
2707 assert(DT
.dominates(I
, TI
) &&
2708 "basic SSA liveness expectation violated by liveness analysis");
2713 /// Check that all the liveness sets used during the computation of liveness
2714 /// obey basic SSA properties. This is useful for finding cases where we miss
2716 static void checkBasicSSA(DominatorTree
&DT
, GCPtrLivenessData
&Data
,
2718 checkBasicSSA(DT
, Data
.LiveSet
[&BB
], BB
.getTerminator());
2719 checkBasicSSA(DT
, Data
.LiveOut
[&BB
], BB
.getTerminator(), true);
2720 checkBasicSSA(DT
, Data
.LiveIn
[&BB
], BB
.getTerminator());
2724 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
2725 GCPtrLivenessData
&Data
) {
2726 SmallSetVector
<BasicBlock
*, 32> Worklist
;
2728 // Seed the liveness for each individual block
2729 for (BasicBlock
&BB
: F
) {
2730 Data
.KillSet
[&BB
] = computeKillSet(&BB
);
2731 Data
.LiveSet
[&BB
].clear();
2732 computeLiveInValues(BB
.rbegin(), BB
.rend(), Data
.LiveSet
[&BB
]);
2735 for (Value
*Kill
: Data
.KillSet
[&BB
])
2736 assert(!Data
.LiveSet
[&BB
].count(Kill
) && "live set contains kill");
2739 Data
.LiveOut
[&BB
] = SetVector
<Value
*>();
2740 computeLiveOutSeed(&BB
, Data
.LiveOut
[&BB
]);
2741 Data
.LiveIn
[&BB
] = Data
.LiveSet
[&BB
];
2742 Data
.LiveIn
[&BB
].set_union(Data
.LiveOut
[&BB
]);
2743 Data
.LiveIn
[&BB
].set_subtract(Data
.KillSet
[&BB
]);
2744 if (!Data
.LiveIn
[&BB
].empty())
2745 Worklist
.insert(pred_begin(&BB
), pred_end(&BB
));
2748 // Propagate that liveness until stable
2749 while (!Worklist
.empty()) {
2750 BasicBlock
*BB
= Worklist
.pop_back_val();
2752 // Compute our new liveout set, then exit early if it hasn't changed despite
2753 // the contribution of our successor.
2754 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
2755 const auto OldLiveOutSize
= LiveOut
.size();
2756 for (BasicBlock
*Succ
: successors(BB
)) {
2757 assert(Data
.LiveIn
.count(Succ
));
2758 LiveOut
.set_union(Data
.LiveIn
[Succ
]);
2760 // assert OutLiveOut is a subset of LiveOut
2761 if (OldLiveOutSize
== LiveOut
.size()) {
2762 // If the sets are the same size, then we didn't actually add anything
2763 // when unioning our successors LiveIn. Thus, the LiveIn of this block
2767 Data
.LiveOut
[BB
] = LiveOut
;
2769 // Apply the effects of this basic block
2770 SetVector
<Value
*> LiveTmp
= LiveOut
;
2771 LiveTmp
.set_union(Data
.LiveSet
[BB
]);
2772 LiveTmp
.set_subtract(Data
.KillSet
[BB
]);
2774 assert(Data
.LiveIn
.count(BB
));
2775 const SetVector
<Value
*> &OldLiveIn
= Data
.LiveIn
[BB
];
2776 // assert: OldLiveIn is a subset of LiveTmp
2777 if (OldLiveIn
.size() != LiveTmp
.size()) {
2778 Data
.LiveIn
[BB
] = LiveTmp
;
2779 Worklist
.insert(pred_begin(BB
), pred_end(BB
));
2781 } // while (!Worklist.empty())
2784 // Sanity check our output against SSA properties. This helps catch any
2785 // missing kills during the above iteration.
2786 for (BasicBlock
&BB
: F
)
2787 checkBasicSSA(DT
, Data
, BB
);
2791 static void findLiveSetAtInst(Instruction
*Inst
, GCPtrLivenessData
&Data
,
2792 StatepointLiveSetTy
&Out
) {
2793 BasicBlock
*BB
= Inst
->getParent();
2795 // Note: The copy is intentional and required
2796 assert(Data
.LiveOut
.count(BB
));
2797 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
2799 // We want to handle the statepoint itself oddly. It's
2800 // call result is not live (normal), nor are it's arguments
2801 // (unless they're used again later). This adjustment is
2802 // specifically what we need to relocate
2803 computeLiveInValues(BB
->rbegin(), ++Inst
->getIterator().getReverse(),
2805 LiveOut
.remove(Inst
);
2806 Out
.insert(LiveOut
.begin(), LiveOut
.end());
2809 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
2811 PartiallyConstructedSafepointRecord
&Info
) {
2812 StatepointLiveSetTy Updated
;
2813 findLiveSetAtInst(Call
, RevisedLivenessData
, Updated
);
2815 // We may have base pointers which are now live that weren't before. We need
2816 // to update the PointerToBase structure to reflect this.
2817 for (auto V
: Updated
)
2818 if (Info
.PointerToBase
.insert({V
, V
}).second
) {
2819 assert(isKnownBaseResult(V
) &&
2820 "Can't find base for unexpected live value!");
2825 for (auto V
: Updated
)
2826 assert(Info
.PointerToBase
.count(V
) &&
2827 "Must be able to find base for live value!");
2830 // Remove any stale base mappings - this can happen since our liveness is
2831 // more precise then the one inherent in the base pointer analysis.
2832 DenseSet
<Value
*> ToErase
;
2833 for (auto KVPair
: Info
.PointerToBase
)
2834 if (!Updated
.count(KVPair
.first
))
2835 ToErase
.insert(KVPair
.first
);
2837 for (auto *V
: ToErase
)
2838 Info
.PointerToBase
.erase(V
);
2841 for (auto KVPair
: Info
.PointerToBase
)
2842 assert(Updated
.count(KVPair
.first
) && "record for non-live value");
2845 Info
.LiveSet
= Updated
;