1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Rewrite call/invoke instructions so as to make potential relocations
10 // performed by the garbage collector explicit in the IR.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Sequence.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/iterator_range.h"
27 #include "llvm/Analysis/DomTreeUpdater.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/IR/Argument.h"
31 #include "llvm/IR/AttributeMask.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GCStrategy.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/InstIterator.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Intrinsics.h"
49 #include "llvm/IR/LLVMContext.h"
50 #include "llvm/IR/MDBuilder.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/Module.h"
53 #include "llvm/IR/Statepoint.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/IR/ValueHandle.h"
58 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
65 #include "llvm/Transforms/Utils/Local.h"
66 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
78 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
82 // Print the liveset found at the insert location
83 static cl::opt
<bool> PrintLiveSet("spp-print-liveset", cl::Hidden
,
85 static cl::opt
<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden
,
88 // Print out the base pointers for debugging
89 static cl::opt
<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden
,
92 // Cost threshold measuring when it is profitable to rematerialize value instead
94 static cl::opt
<unsigned>
95 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden
,
98 #ifdef EXPENSIVE_CHECKS
99 static bool ClobberNonLive
= true;
101 static bool ClobberNonLive
= false;
104 static cl::opt
<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
105 cl::location(ClobberNonLive
),
109 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
110 cl::Hidden
, cl::init(true));
112 static cl::opt
<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses",
113 cl::Hidden
, cl::init(true));
115 /// The IR fed into RewriteStatepointsForGC may have had attributes and
116 /// metadata implying dereferenceability that are no longer valid/correct after
117 /// RewriteStatepointsForGC has run. This is because semantically, after
118 /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
119 /// heap. stripNonValidData (conservatively) restores
120 /// correctness by erasing all attributes in the module that externally imply
121 /// dereferenceability. Similar reasoning also applies to the noalias
122 /// attributes and metadata. gc.statepoint can touch the entire heap including
124 /// Apart from attributes and metadata, we also remove instructions that imply
125 /// constant physical memory: llvm.invariant.start.
126 static void stripNonValidData(Module
&M
);
128 // Find the GC strategy for a function, or null if it doesn't have one.
129 static std::unique_ptr
<GCStrategy
> findGCStrategy(Function
&F
);
131 static bool shouldRewriteStatepointsIn(Function
&F
);
133 PreservedAnalyses
RewriteStatepointsForGC::run(Module
&M
,
134 ModuleAnalysisManager
&AM
) {
135 bool Changed
= false;
136 auto &FAM
= AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
137 for (Function
&F
: M
) {
138 // Nothing to do for declarations.
139 if (F
.isDeclaration() || F
.empty())
142 // Policy choice says not to rewrite - the most common reason is that we're
143 // compiling code without a GCStrategy.
144 if (!shouldRewriteStatepointsIn(F
))
147 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
148 auto &TTI
= FAM
.getResult
<TargetIRAnalysis
>(F
);
149 auto &TLI
= FAM
.getResult
<TargetLibraryAnalysis
>(F
);
150 Changed
|= runOnFunction(F
, DT
, TTI
, TLI
);
153 return PreservedAnalyses::all();
155 // stripNonValidData asserts that shouldRewriteStatepointsIn
156 // returns true for at least one function in the module. Since at least
157 // one function changed, we know that the precondition is satisfied.
158 stripNonValidData(M
);
160 PreservedAnalyses PA
;
161 PA
.preserve
<TargetIRAnalysis
>();
162 PA
.preserve
<TargetLibraryAnalysis
>();
168 struct GCPtrLivenessData
{
169 /// Values defined in this block.
170 MapVector
<BasicBlock
*, SetVector
<Value
*>> KillSet
;
172 /// Values used in this block (and thus live); does not included values
173 /// killed within this block.
174 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveSet
;
176 /// Values live into this basic block (i.e. used by any
177 /// instruction in this basic block or ones reachable from here)
178 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveIn
;
180 /// Values live out of this basic block (i.e. live into
181 /// any successor block)
182 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveOut
;
185 // The type of the internal cache used inside the findBasePointers family
186 // of functions. From the callers perspective, this is an opaque type and
187 // should not be inspected.
189 // In the actual implementation this caches two relations:
190 // - The base relation itself (i.e. this pointer is based on that one)
191 // - The base defining value relation (i.e. before base_phi insertion)
192 // Generally, after the execution of a full findBasePointer call, only the
193 // base relation will remain. Internally, we add a mixture of the two
194 // types, then update all the second type to the first type
195 using DefiningValueMapTy
= MapVector
<Value
*, Value
*>;
196 using IsKnownBaseMapTy
= MapVector
<Value
*, bool>;
197 using PointerToBaseTy
= MapVector
<Value
*, Value
*>;
198 using StatepointLiveSetTy
= SetVector
<Value
*>;
199 using RematerializedValueMapTy
=
200 MapVector
<AssertingVH
<Instruction
>, AssertingVH
<Value
>>;
202 struct PartiallyConstructedSafepointRecord
{
203 /// The set of values known to be live across this safepoint
204 StatepointLiveSetTy LiveSet
;
206 /// The *new* gc.statepoint instruction itself. This produces the token
207 /// that normal path gc.relocates and the gc.result are tied to.
208 GCStatepointInst
*StatepointToken
;
210 /// Instruction to which exceptional gc relocates are attached
211 /// Makes it easier to iterate through them during relocationViaAlloca.
212 Instruction
*UnwindToken
;
214 /// Record live values we are rematerialized instead of relocating.
215 /// They are not included into 'LiveSet' field.
216 /// Maps rematerialized copy to it's original value.
217 RematerializedValueMapTy RematerializedValues
;
220 struct RematerizlizationCandidateRecord
{
221 // Chain from derived pointer to base.
222 SmallVector
<Instruction
*, 3> ChainToBase
;
226 InstructionCost Cost
;
228 using RematCandTy
= MapVector
<Value
*, RematerizlizationCandidateRecord
>;
230 } // end anonymous namespace
232 static ArrayRef
<Use
> GetDeoptBundleOperands(const CallBase
*Call
) {
233 std::optional
<OperandBundleUse
> DeoptBundle
=
234 Call
->getOperandBundle(LLVMContext::OB_deopt
);
237 assert(AllowStatepointWithNoDeoptInfo
&&
238 "Found non-leaf call without deopt info!");
242 return DeoptBundle
->Inputs
;
245 /// Compute the live-in set for every basic block in the function
246 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
247 GCPtrLivenessData
&Data
, GCStrategy
*GC
);
249 /// Given results from the dataflow liveness computation, find the set of live
250 /// Values at a particular instruction.
251 static void findLiveSetAtInst(Instruction
*inst
, GCPtrLivenessData
&Data
,
252 StatepointLiveSetTy
&out
, GCStrategy
*GC
);
254 static bool isGCPointerType(Type
*T
, GCStrategy
*GC
) {
255 assert(GC
&& "GC Strategy for isGCPointerType cannot be null");
257 if (!isa
<PointerType
>(T
))
260 // conservative - same as StatepointLowering
261 return GC
->isGCManagedPointer(T
).value_or(true);
264 // Return true if this type is one which a) is a gc pointer or contains a GC
265 // pointer and b) is of a type this code expects to encounter as a live value.
266 // (The insertion code will assert that a type which matches (a) and not (b)
267 // is not encountered.)
268 static bool isHandledGCPointerType(Type
*T
, GCStrategy
*GC
) {
269 // We fully support gc pointers
270 if (isGCPointerType(T
, GC
))
272 // We partially support vectors of gc pointers. The code will assert if it
273 // can't handle something.
274 if (auto VT
= dyn_cast
<VectorType
>(T
))
275 if (isGCPointerType(VT
->getElementType(), GC
))
281 /// Returns true if this type contains a gc pointer whether we know how to
282 /// handle that type or not.
283 static bool containsGCPtrType(Type
*Ty
, GCStrategy
*GC
) {
284 if (isGCPointerType(Ty
, GC
))
286 if (VectorType
*VT
= dyn_cast
<VectorType
>(Ty
))
287 return isGCPointerType(VT
->getScalarType(), GC
);
288 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(Ty
))
289 return containsGCPtrType(AT
->getElementType(), GC
);
290 if (StructType
*ST
= dyn_cast
<StructType
>(Ty
))
291 return llvm::any_of(ST
->elements(),
292 [GC
](Type
*Ty
) { return containsGCPtrType(Ty
, GC
); });
296 // Returns true if this is a type which a) is a gc pointer or contains a GC
297 // pointer and b) is of a type which the code doesn't expect (i.e. first class
298 // aggregates). Used to trip assertions.
299 static bool isUnhandledGCPointerType(Type
*Ty
, GCStrategy
*GC
) {
300 return containsGCPtrType(Ty
, GC
) && !isHandledGCPointerType(Ty
, GC
);
304 // Return the name of the value suffixed with the provided value, or if the
305 // value didn't have a name, the default value specified.
306 static std::string
suffixed_name_or(Value
*V
, StringRef Suffix
,
307 StringRef DefaultName
) {
308 return V
->hasName() ? (V
->getName() + Suffix
).str() : DefaultName
.str();
311 // Conservatively identifies any definitions which might be live at the
312 // given instruction. The analysis is performed immediately before the
313 // given instruction. Values defined by that instruction are not considered
314 // live. Values used by that instruction are considered live.
315 static void analyzeParsePointLiveness(
316 DominatorTree
&DT
, GCPtrLivenessData
&OriginalLivenessData
, CallBase
*Call
,
317 PartiallyConstructedSafepointRecord
&Result
, GCStrategy
*GC
) {
318 StatepointLiveSetTy LiveSet
;
319 findLiveSetAtInst(Call
, OriginalLivenessData
, LiveSet
, GC
);
322 dbgs() << "Live Variables:\n";
323 for (Value
*V
: LiveSet
)
324 dbgs() << " " << V
->getName() << " " << *V
<< "\n";
326 if (PrintLiveSetSize
) {
327 dbgs() << "Safepoint For: " << Call
->getCalledOperand()->getName() << "\n";
328 dbgs() << "Number live values: " << LiveSet
.size() << "\n";
330 Result
.LiveSet
= LiveSet
;
333 /// Returns true if V is a known base.
334 static bool isKnownBase(Value
*V
, const IsKnownBaseMapTy
&KnownBases
);
336 /// Caches the IsKnownBase flag for a value and asserts that it wasn't present
337 /// in the cache before.
338 static void setKnownBase(Value
*V
, bool IsKnownBase
,
339 IsKnownBaseMapTy
&KnownBases
);
341 static Value
*findBaseDefiningValue(Value
*I
, DefiningValueMapTy
&Cache
,
342 IsKnownBaseMapTy
&KnownBases
);
344 /// Return a base defining value for the 'Index' element of the given vector
345 /// instruction 'I'. If Index is null, returns a BDV for the entire vector
346 /// 'I'. As an optimization, this method will try to determine when the
347 /// element is known to already be a base pointer. If this can be established,
348 /// the second value in the returned pair will be true. Note that either a
349 /// vector or a pointer typed value can be returned. For the former, the
350 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
351 /// If the later, the return pointer is a BDV (or possibly a base) for the
352 /// particular element in 'I'.
353 static Value
*findBaseDefiningValueOfVector(Value
*I
, DefiningValueMapTy
&Cache
,
354 IsKnownBaseMapTy
&KnownBases
) {
355 // Each case parallels findBaseDefiningValue below, see that code for
356 // detailed motivation.
358 auto Cached
= Cache
.find(I
);
359 if (Cached
!= Cache
.end())
360 return Cached
->second
;
362 if (isa
<Argument
>(I
)) {
363 // An incoming argument to the function is a base pointer
365 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
369 if (isa
<Constant
>(I
)) {
370 // Base of constant vector consists only of constant null pointers.
371 // For reasoning see similar case inside 'findBaseDefiningValue' function.
372 auto *CAZ
= ConstantAggregateZero::get(I
->getType());
374 setKnownBase(CAZ
, /* IsKnownBase */true, KnownBases
);
378 if (isa
<LoadInst
>(I
)) {
380 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
384 if (isa
<InsertElementInst
>(I
)) {
385 // We don't know whether this vector contains entirely base pointers or
386 // not. To be conservatively correct, we treat it as a BDV and will
387 // duplicate code as needed to construct a parallel vector of bases.
389 setKnownBase(I
, /* IsKnownBase */false, KnownBases
);
393 if (isa
<ShuffleVectorInst
>(I
)) {
394 // We don't know whether this vector contains entirely base pointers or
395 // not. To be conservatively correct, we treat it as a BDV and will
396 // duplicate code as needed to construct a parallel vector of bases.
397 // TODO: There a number of local optimizations which could be applied here
398 // for particular sufflevector patterns.
400 setKnownBase(I
, /* IsKnownBase */false, KnownBases
);
404 // The behavior of getelementptr instructions is the same for vector and
405 // non-vector data types.
406 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(I
)) {
408 findBaseDefiningValue(GEP
->getPointerOperand(), Cache
, KnownBases
);
413 // The behavior of freeze instructions is the same for vector and
414 // non-vector data types.
415 if (auto *Freeze
= dyn_cast
<FreezeInst
>(I
)) {
416 auto *BDV
= findBaseDefiningValue(Freeze
->getOperand(0), Cache
, KnownBases
);
421 // If the pointer comes through a bitcast of a vector of pointers to
422 // a vector of another type of pointer, then look through the bitcast
423 if (auto *BC
= dyn_cast
<BitCastInst
>(I
)) {
424 auto *BDV
= findBaseDefiningValue(BC
->getOperand(0), Cache
, KnownBases
);
429 // We assume that functions in the source language only return base
430 // pointers. This should probably be generalized via attributes to support
431 // both source language and internal functions.
432 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
)) {
434 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
438 // A PHI or Select is a base defining value. The outer findBasePointer
439 // algorithm is responsible for constructing a base value for this BDV.
440 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
441 "unknown vector instruction - no base found for vector element");
443 setKnownBase(I
, /* IsKnownBase */false, KnownBases
);
447 /// Helper function for findBasePointer - Will return a value which either a)
448 /// defines the base pointer for the input, b) blocks the simple search
449 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
450 /// from pointer to vector type or back.
451 static Value
*findBaseDefiningValue(Value
*I
, DefiningValueMapTy
&Cache
,
452 IsKnownBaseMapTy
&KnownBases
) {
453 assert(I
->getType()->isPtrOrPtrVectorTy() &&
454 "Illegal to ask for the base pointer of a non-pointer type");
455 auto Cached
= Cache
.find(I
);
456 if (Cached
!= Cache
.end())
457 return Cached
->second
;
459 if (I
->getType()->isVectorTy())
460 return findBaseDefiningValueOfVector(I
, Cache
, KnownBases
);
462 if (isa
<Argument
>(I
)) {
463 // An incoming argument to the function is a base pointer
464 // We should have never reached here if this argument isn't an gc value
466 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
470 if (isa
<Constant
>(I
)) {
471 // We assume that objects with a constant base (e.g. a global) can't move
472 // and don't need to be reported to the collector because they are always
473 // live. Besides global references, all kinds of constants (e.g. undef,
474 // constant expressions, null pointers) can be introduced by the inliner or
475 // the optimizer, especially on dynamically dead paths.
476 // Here we treat all of them as having single null base. By doing this we
477 // trying to avoid problems reporting various conflicts in a form of
478 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
479 // See constant.ll file for relevant test cases.
481 auto *CPN
= ConstantPointerNull::get(cast
<PointerType
>(I
->getType()));
483 setKnownBase(CPN
, /* IsKnownBase */true, KnownBases
);
487 // inttoptrs in an integral address space are currently ill-defined. We
488 // treat them as defining base pointers here for consistency with the
489 // constant rule above and because we don't really have a better semantic
490 // to give them. Note that the optimizer is always free to insert undefined
491 // behavior on dynamically dead paths as well.
492 if (isa
<IntToPtrInst
>(I
)) {
494 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
498 if (CastInst
*CI
= dyn_cast
<CastInst
>(I
)) {
499 Value
*Def
= CI
->stripPointerCasts();
500 // If stripping pointer casts changes the address space there is an
501 // addrspacecast in between.
502 assert(cast
<PointerType
>(Def
->getType())->getAddressSpace() ==
503 cast
<PointerType
>(CI
->getType())->getAddressSpace() &&
504 "unsupported addrspacecast");
505 // If we find a cast instruction here, it means we've found a cast which is
506 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
507 // handle int->ptr conversion.
508 assert(!isa
<CastInst
>(Def
) && "shouldn't find another cast here");
509 auto *BDV
= findBaseDefiningValue(Def
, Cache
, KnownBases
);
514 if (isa
<LoadInst
>(I
)) {
515 // The value loaded is an gc base itself
517 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
521 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(I
)) {
522 // The base of this GEP is the base
524 findBaseDefiningValue(GEP
->getPointerOperand(), Cache
, KnownBases
);
529 if (auto *Freeze
= dyn_cast
<FreezeInst
>(I
)) {
530 auto *BDV
= findBaseDefiningValue(Freeze
->getOperand(0), Cache
, KnownBases
);
535 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
536 switch (II
->getIntrinsicID()) {
538 // fall through to general call handling
540 case Intrinsic::experimental_gc_statepoint
:
541 llvm_unreachable("statepoints don't produce pointers");
542 case Intrinsic::experimental_gc_relocate
:
543 // Rerunning safepoint insertion after safepoints are already
544 // inserted is not supported. It could probably be made to work,
545 // but why are you doing this? There's no good reason.
546 llvm_unreachable("repeat safepoint insertion is not supported");
547 case Intrinsic::gcroot
:
548 // Currently, this mechanism hasn't been extended to work with gcroot.
549 // There's no reason it couldn't be, but I haven't thought about the
550 // implications much.
552 "interaction with the gcroot mechanism is not supported");
553 case Intrinsic::experimental_gc_get_pointer_base
:
554 auto *BDV
= findBaseDefiningValue(II
->getOperand(0), Cache
, KnownBases
);
559 // We assume that functions in the source language only return base
560 // pointers. This should probably be generalized via attributes to support
561 // both source language and internal functions.
562 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
)) {
564 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
568 // TODO: I have absolutely no idea how to implement this part yet. It's not
569 // necessarily hard, I just haven't really looked at it yet.
570 assert(!isa
<LandingPadInst
>(I
) && "Landing Pad is unimplemented");
572 if (isa
<AtomicCmpXchgInst
>(I
)) {
573 // A CAS is effectively a atomic store and load combined under a
574 // predicate. From the perspective of base pointers, we just treat it
577 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
581 assert(!isa
<AtomicRMWInst
>(I
) && "Xchg handled above, all others are "
582 "binary ops which don't apply to pointers");
584 // The aggregate ops. Aggregates can either be in the heap or on the
585 // stack, but in either case, this is simply a field load. As a result,
586 // this is a defining definition of the base just like a load is.
587 if (isa
<ExtractValueInst
>(I
)) {
589 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
593 // We should never see an insert vector since that would require we be
594 // tracing back a struct value not a pointer value.
595 assert(!isa
<InsertValueInst
>(I
) &&
596 "Base pointer for a struct is meaningless");
598 // This value might have been generated by findBasePointer() called when
599 // substituting gc.get.pointer.base() intrinsic.
601 isa
<Instruction
>(I
) && cast
<Instruction
>(I
)->getMetadata("is_base_value");
602 setKnownBase(I
, /* IsKnownBase */IsKnownBase
, KnownBases
);
605 // An extractelement produces a base result exactly when it's input does.
606 // We may need to insert a parallel instruction to extract the appropriate
607 // element out of the base vector corresponding to the input. Given this,
608 // it's analogous to the phi and select case even though it's not a merge.
609 if (isa
<ExtractElementInst
>(I
))
610 // Note: There a lot of obvious peephole cases here. This are deliberately
611 // handled after the main base pointer inference algorithm to make writing
612 // test cases to exercise that code easier.
615 // The last two cases here don't return a base pointer. Instead, they
616 // return a value which dynamically selects from among several base
617 // derived pointers (each with it's own base potentially). It's the job of
618 // the caller to resolve these.
619 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
620 "missing instruction case in findBaseDefiningValue");
624 /// Returns the base defining value for this value.
625 static Value
*findBaseDefiningValueCached(Value
*I
, DefiningValueMapTy
&Cache
,
626 IsKnownBaseMapTy
&KnownBases
) {
627 if (!Cache
.contains(I
)) {
628 auto *BDV
= findBaseDefiningValue(I
, Cache
, KnownBases
);
630 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I
->getName() << " -> "
631 << Cache
[I
]->getName() << ", is known base = "
632 << KnownBases
[I
] << "\n");
634 assert(Cache
[I
] != nullptr);
635 assert(KnownBases
.contains(Cache
[I
]) &&
636 "Cached value must be present in known bases map");
640 /// Return a base pointer for this value if known. Otherwise, return it's
641 /// base defining value.
642 static Value
*findBaseOrBDV(Value
*I
, DefiningValueMapTy
&Cache
,
643 IsKnownBaseMapTy
&KnownBases
) {
644 Value
*Def
= findBaseDefiningValueCached(I
, Cache
, KnownBases
);
645 auto Found
= Cache
.find(Def
);
646 if (Found
!= Cache
.end()) {
647 // Either a base-of relation, or a self reference. Caller must check.
648 return Found
->second
;
650 // Only a BDV available
655 /// This value is a base pointer that is not generated by RS4GC, i.e. it already
656 /// exists in the code.
657 static bool isOriginalBaseResult(Value
*V
) {
658 // no recursion possible
659 return !isa
<PHINode
>(V
) && !isa
<SelectInst
>(V
) &&
660 !isa
<ExtractElementInst
>(V
) && !isa
<InsertElementInst
>(V
) &&
661 !isa
<ShuffleVectorInst
>(V
);
665 static bool isKnownBase(Value
*V
, const IsKnownBaseMapTy
&KnownBases
) {
666 auto It
= KnownBases
.find(V
);
667 assert(It
!= KnownBases
.end() && "Value not present in the map");
671 static void setKnownBase(Value
*V
, bool IsKnownBase
,
672 IsKnownBaseMapTy
&KnownBases
) {
674 auto It
= KnownBases
.find(V
);
675 if (It
!= KnownBases
.end())
676 assert(It
->second
== IsKnownBase
&& "Changing already present value");
678 KnownBases
[V
] = IsKnownBase
;
681 // Returns true if First and Second values are both scalar or both vector.
682 static bool areBothVectorOrScalar(Value
*First
, Value
*Second
) {
683 return isa
<VectorType
>(First
->getType()) ==
684 isa
<VectorType
>(Second
->getType());
689 /// Models the state of a single base defining value in the findBasePointer
690 /// algorithm for determining where a new instruction is needed to propagate
691 /// the base of this BDV.
695 // Starting state of lattice
697 // Some specific base value -- does *not* mean that instruction
698 // propagates the base of the object
699 // ex: gep %arg, 16 -> %arg is the base value
701 // Need to insert a node to represent a merge.
706 llvm_unreachable("missing state in map");
709 explicit BDVState(Value
*OriginalValue
)
710 : OriginalValue(OriginalValue
) {}
711 explicit BDVState(Value
*OriginalValue
, StatusTy Status
, Value
*BaseValue
= nullptr)
712 : OriginalValue(OriginalValue
), Status(Status
), BaseValue(BaseValue
) {
713 assert(Status
!= Base
|| BaseValue
);
716 StatusTy
getStatus() const { return Status
; }
717 Value
*getOriginalValue() const { return OriginalValue
; }
718 Value
*getBaseValue() const { return BaseValue
; }
720 bool isBase() const { return getStatus() == Base
; }
721 bool isUnknown() const { return getStatus() == Unknown
; }
722 bool isConflict() const { return getStatus() == Conflict
; }
724 // Values of type BDVState form a lattice, and this function implements the
727 void meet(const BDVState
&Other
) {
728 auto markConflict
= [&]() {
729 Status
= BDVState::Conflict
;
732 // Conflict is a final state.
735 // if we are not known - just take other state.
737 Status
= Other
.getStatus();
738 BaseValue
= Other
.getBaseValue();
742 assert(isBase() && "Unknown state");
743 // If other is unknown - just keep our state.
744 if (Other
.isUnknown())
746 // If other is conflict - it is a final state.
747 if (Other
.isConflict())
748 return markConflict();
749 // Other is base as well.
750 assert(Other
.isBase() && "Unknown state");
751 // If bases are different - Conflict.
752 if (getBaseValue() != Other
.getBaseValue())
753 return markConflict();
754 // We are identical, do nothing.
757 bool operator==(const BDVState
&Other
) const {
758 return OriginalValue
== Other
.OriginalValue
&& BaseValue
== Other
.BaseValue
&&
759 Status
== Other
.Status
;
762 bool operator!=(const BDVState
&other
) const { return !(*this == other
); }
770 void print(raw_ostream
&OS
) const {
771 switch (getStatus()) {
782 OS
<< " (base " << getBaseValue() << " - "
783 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")"
784 << " for " << OriginalValue
->getName() << ":";
788 AssertingVH
<Value
> OriginalValue
; // instruction this state corresponds to
789 StatusTy Status
= Unknown
;
790 AssertingVH
<Value
> BaseValue
= nullptr; // Non-null only if Status == Base.
793 } // end anonymous namespace
796 static raw_ostream
&operator<<(raw_ostream
&OS
, const BDVState
&State
) {
802 /// For a given value or instruction, figure out what base ptr its derived from.
803 /// For gc objects, this is simply itself. On success, returns a value which is
804 /// the base pointer. (This is reliable and can be used for relocation.) On
805 /// failure, returns nullptr.
806 static Value
*findBasePointer(Value
*I
, DefiningValueMapTy
&Cache
,
807 IsKnownBaseMapTy
&KnownBases
) {
808 Value
*Def
= findBaseOrBDV(I
, Cache
, KnownBases
);
810 if (isKnownBase(Def
, KnownBases
) && areBothVectorOrScalar(Def
, I
))
813 // Here's the rough algorithm:
814 // - For every SSA value, construct a mapping to either an actual base
815 // pointer or a PHI which obscures the base pointer.
816 // - Construct a mapping from PHI to unknown TOP state. Use an
817 // optimistic algorithm to propagate base pointer information. Lattice
822 // When algorithm terminates, all PHIs will either have a single concrete
823 // base or be in a conflict state.
824 // - For every conflict, insert a dummy PHI node without arguments. Add
825 // these to the base[Instruction] = BasePtr mapping. For every
826 // non-conflict, add the actual base.
827 // - For every conflict, add arguments for the base[a] of each input
830 // Note: A simpler form of this would be to add the conflict form of all
831 // PHIs without running the optimistic algorithm. This would be
832 // analogous to pessimistic data flow and would likely lead to an
833 // overall worse solution.
836 auto isExpectedBDVType
= [](Value
*BDV
) {
837 return isa
<PHINode
>(BDV
) || isa
<SelectInst
>(BDV
) ||
838 isa
<ExtractElementInst
>(BDV
) || isa
<InsertElementInst
>(BDV
) ||
839 isa
<ShuffleVectorInst
>(BDV
);
843 // Once populated, will contain a mapping from each potentially non-base BDV
844 // to a lattice value (described above) which corresponds to that BDV.
845 // We use the order of insertion (DFS over the def/use graph) to provide a
846 // stable deterministic ordering for visiting DenseMaps (which are unordered)
847 // below. This is important for deterministic compilation.
848 MapVector
<Value
*, BDVState
> States
;
851 auto VerifyStates
= [&]() {
852 for (auto &Entry
: States
) {
853 assert(Entry
.first
== Entry
.second
.getOriginalValue());
858 auto visitBDVOperands
= [](Value
*BDV
, std::function
<void (Value
*)> F
) {
859 if (PHINode
*PN
= dyn_cast
<PHINode
>(BDV
)) {
860 for (Value
*InVal
: PN
->incoming_values())
862 } else if (SelectInst
*SI
= dyn_cast
<SelectInst
>(BDV
)) {
863 F(SI
->getTrueValue());
864 F(SI
->getFalseValue());
865 } else if (auto *EE
= dyn_cast
<ExtractElementInst
>(BDV
)) {
866 F(EE
->getVectorOperand());
867 } else if (auto *IE
= dyn_cast
<InsertElementInst
>(BDV
)) {
868 F(IE
->getOperand(0));
869 F(IE
->getOperand(1));
870 } else if (auto *SV
= dyn_cast
<ShuffleVectorInst
>(BDV
)) {
871 // For a canonical broadcast, ignore the undef argument
872 // (without this, we insert a parallel base shuffle for every broadcast)
873 F(SV
->getOperand(0));
874 if (!SV
->isZeroEltSplat())
875 F(SV
->getOperand(1));
877 llvm_unreachable("unexpected BDV type");
882 // Recursively fill in all base defining values reachable from the initial
883 // one for which we don't already know a definite base value for
885 SmallVector
<Value
*, 16> Worklist
;
886 Worklist
.push_back(Def
);
887 States
.insert({Def
, BDVState(Def
)});
888 while (!Worklist
.empty()) {
889 Value
*Current
= Worklist
.pop_back_val();
890 assert(!isOriginalBaseResult(Current
) && "why did it get added?");
892 auto visitIncomingValue
= [&](Value
*InVal
) {
893 Value
*Base
= findBaseOrBDV(InVal
, Cache
, KnownBases
);
894 if (isKnownBase(Base
, KnownBases
) && areBothVectorOrScalar(Base
, InVal
))
895 // Known bases won't need new instructions introduced and can be
896 // ignored safely. However, this can only be done when InVal and Base
897 // are both scalar or both vector. Otherwise, we need to find a
898 // correct BDV for InVal, by creating an entry in the lattice
901 assert(isExpectedBDVType(Base
) && "the only non-base values "
902 "we see should be base defining values");
903 if (States
.insert(std::make_pair(Base
, BDVState(Base
))).second
)
904 Worklist
.push_back(Base
);
907 visitBDVOperands(Current
, visitIncomingValue
);
913 LLVM_DEBUG(dbgs() << "States after initialization:\n");
914 for (const auto &Pair
: States
) {
915 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
919 // Iterate forward through the value graph pruning any node from the state
920 // list where all of the inputs are base pointers. The purpose of this is to
921 // reuse existing values when the derived pointer we were asked to materialize
922 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
924 SmallVector
<Value
*> ToRemove
;
927 for (auto Pair
: States
) {
928 Value
*BDV
= Pair
.first
;
929 auto canPruneInput
= [&](Value
*V
) {
930 // If the input of the BDV is the BDV itself we can prune it. This is
931 // only possible if the BDV is a PHI node.
932 if (V
->stripPointerCasts() == BDV
)
934 Value
*VBDV
= findBaseOrBDV(V
, Cache
, KnownBases
);
935 if (V
->stripPointerCasts() != VBDV
)
937 // The assumption is that anything not in the state list is
938 // propagates a base pointer.
939 return States
.count(VBDV
) == 0;
942 bool CanPrune
= true;
943 visitBDVOperands(BDV
, [&](Value
*Op
) {
944 CanPrune
= CanPrune
&& canPruneInput(Op
);
947 ToRemove
.push_back(BDV
);
949 for (Value
*V
: ToRemove
) {
951 // Cache the fact V is it's own base for later usage.
954 } while (!ToRemove
.empty());
956 // Did we manage to prove that Def itself must be a base pointer?
957 if (!States
.count(Def
))
960 // Return a phi state for a base defining value. We'll generate a new
961 // base state for known bases and expect to find a cached state otherwise.
962 auto GetStateForBDV
= [&](Value
*BaseValue
, Value
*Input
) {
963 auto I
= States
.find(BaseValue
);
964 if (I
!= States
.end())
966 assert(areBothVectorOrScalar(BaseValue
, Input
));
967 return BDVState(BaseValue
, BDVState::Base
, BaseValue
);
970 // Even though we have identified a concrete base (or a conflict) for all live
971 // pointers at this point, there are cases where the base is of an
972 // incompatible type compared to the original instruction. We conservatively
973 // mark those as conflicts to ensure that corresponding BDVs will be generated
974 // in the next steps.
976 // this is a rather explicit check for all cases where we should mark the
977 // state as a conflict to force the latter stages of the algorithm to emit
979 // TODO: in many cases the instructions emited for the conflicting states
980 // will be identical to the I itself (if the I's operate on their BDVs
981 // themselves). We should exploit this, but can't do it here since it would
982 // break the invariant about the BDVs not being known to be a base.
983 // TODO: the code also does not handle constants at all - the algorithm relies
984 // on all constants having the same BDV and therefore constant-only insns
985 // will never be in conflict, but this check is ignored here. If the
986 // constant conflicts will be to BDVs themselves, they will be identical
987 // instructions and will get optimized away (as in the above TODO)
988 auto MarkConflict
= [&](Instruction
*I
, Value
*BaseValue
) {
989 // II and EE mixes vector & scalar so is always a conflict
990 if (isa
<InsertElementInst
>(I
) || isa
<ExtractElementInst
>(I
))
992 // Shuffle vector is always a conflict as it creates new vector from
994 if (isa
<ShuffleVectorInst
>(I
))
996 // Any instructions where the computed base type differs from the
997 // instruction type. An example is where an extract instruction is used by a
998 // select. Here the select's BDV is a vector (because of extract's BDV),
999 // while the select itself is a scalar type. Note that the IE and EE
1000 // instruction check is not fully subsumed by the vector<->scalar check at
1001 // the end, this is due to the BDV algorithm being ignorant of BDV types at
1003 if (!areBothVectorOrScalar(BaseValue
, I
))
1008 bool Progress
= true;
1011 const size_t OldSize
= States
.size();
1014 // We're only changing values in this loop, thus safe to keep iterators.
1015 // Since this is computing a fixed point, the order of visit does not
1016 // effect the result. TODO: We could use a worklist here and make this run
1018 for (auto Pair
: States
) {
1019 Value
*BDV
= Pair
.first
;
1020 // Only values that do not have known bases or those that have differing
1021 // type (scalar versus vector) from a possible known base should be in the
1023 assert((!isKnownBase(BDV
, KnownBases
) ||
1024 !areBothVectorOrScalar(BDV
, Pair
.second
.getBaseValue())) &&
1025 "why did it get added?");
1027 BDVState
NewState(BDV
);
1028 visitBDVOperands(BDV
, [&](Value
*Op
) {
1029 Value
*BDV
= findBaseOrBDV(Op
, Cache
, KnownBases
);
1030 auto OpState
= GetStateForBDV(BDV
, Op
);
1031 NewState
.meet(OpState
);
1034 // if the instruction has known base, but should in fact be marked as
1035 // conflict because of incompatible in/out types, we mark it as such
1036 // ensuring that it will propagate through the fixpoint iteration
1037 auto I
= cast
<Instruction
>(BDV
);
1038 auto BV
= NewState
.getBaseValue();
1039 if (BV
&& MarkConflict(I
, BV
))
1040 NewState
= BDVState(I
, BDVState::Conflict
);
1042 BDVState OldState
= Pair
.second
;
1043 if (OldState
!= NewState
) {
1045 States
[BDV
] = NewState
;
1049 assert(OldSize
== States
.size() &&
1050 "fixed point shouldn't be adding any new nodes to state");
1055 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
1056 for (const auto &Pair
: States
) {
1057 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
1060 // since we do the conflict marking as part of the fixpoint iteration this
1061 // loop only asserts that invariants are met
1062 for (auto Pair
: States
) {
1063 Instruction
*I
= cast
<Instruction
>(Pair
.first
);
1064 BDVState State
= Pair
.second
;
1065 auto *BaseValue
= State
.getBaseValue();
1066 // Only values that do not have known bases or those that have differing
1067 // type (scalar versus vector) from a possible known base should be in the
1070 (!isKnownBase(I
, KnownBases
) || !areBothVectorOrScalar(I
, BaseValue
)) &&
1071 "why did it get added?");
1072 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1076 // Insert Phis for all conflicts
1077 // TODO: adjust naming patterns to avoid this order of iteration dependency
1078 for (auto Pair
: States
) {
1079 Instruction
*I
= cast
<Instruction
>(Pair
.first
);
1080 BDVState State
= Pair
.second
;
1081 // Only values that do not have known bases or those that have differing
1082 // type (scalar versus vector) from a possible known base should be in the
1084 assert((!isKnownBase(I
, KnownBases
) ||
1085 !areBothVectorOrScalar(I
, State
.getBaseValue())) &&
1086 "why did it get added?");
1087 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1089 // Since we're joining a vector and scalar base, they can never be the
1090 // same. As a result, we should always see insert element having reached
1091 // the conflict state.
1092 assert(!isa
<InsertElementInst
>(I
) || State
.isConflict());
1094 if (!State
.isConflict())
1097 auto getMangledName
= [](Instruction
*I
) -> std::string
{
1098 if (isa
<PHINode
>(I
)) {
1099 return suffixed_name_or(I
, ".base", "base_phi");
1100 } else if (isa
<SelectInst
>(I
)) {
1101 return suffixed_name_or(I
, ".base", "base_select");
1102 } else if (isa
<ExtractElementInst
>(I
)) {
1103 return suffixed_name_or(I
, ".base", "base_ee");
1104 } else if (isa
<InsertElementInst
>(I
)) {
1105 return suffixed_name_or(I
, ".base", "base_ie");
1107 return suffixed_name_or(I
, ".base", "base_sv");
1111 Instruction
*BaseInst
= I
->clone();
1112 BaseInst
->insertBefore(I
);
1113 BaseInst
->setName(getMangledName(I
));
1114 // Add metadata marking this as a base value
1115 BaseInst
->setMetadata("is_base_value", MDNode::get(I
->getContext(), {}));
1116 States
[I
] = BDVState(I
, BDVState::Conflict
, BaseInst
);
1117 setKnownBase(BaseInst
, /* IsKnownBase */true, KnownBases
);
1124 // Returns a instruction which produces the base pointer for a given
1125 // instruction. The instruction is assumed to be an input to one of the BDVs
1126 // seen in the inference algorithm above. As such, we must either already
1127 // know it's base defining value is a base, or have inserted a new
1128 // instruction to propagate the base of it's BDV and have entered that newly
1129 // introduced instruction into the state table. In either case, we are
1130 // assured to be able to determine an instruction which produces it's base
1132 auto getBaseForInput
= [&](Value
*Input
, Instruction
*InsertPt
) {
1133 Value
*BDV
= findBaseOrBDV(Input
, Cache
, KnownBases
);
1134 Value
*Base
= nullptr;
1135 if (!States
.count(BDV
)) {
1136 assert(areBothVectorOrScalar(BDV
, Input
));
1139 // Either conflict or base.
1140 assert(States
.count(BDV
));
1141 Base
= States
[BDV
].getBaseValue();
1143 assert(Base
&& "Can't be null");
1144 // The cast is needed since base traversal may strip away bitcasts
1145 if (Base
->getType() != Input
->getType() && InsertPt
)
1146 Base
= new BitCastInst(Base
, Input
->getType(), "cast", InsertPt
);
1150 // Fixup all the inputs of the new PHIs. Visit order needs to be
1151 // deterministic and predictable because we're naming newly created
1153 for (auto Pair
: States
) {
1154 Instruction
*BDV
= cast
<Instruction
>(Pair
.first
);
1155 BDVState State
= Pair
.second
;
1157 // Only values that do not have known bases or those that have differing
1158 // type (scalar versus vector) from a possible known base should be in the
1160 assert((!isKnownBase(BDV
, KnownBases
) ||
1161 !areBothVectorOrScalar(BDV
, State
.getBaseValue())) &&
1162 "why did it get added?");
1163 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1164 if (!State
.isConflict())
1167 if (PHINode
*BasePHI
= dyn_cast
<PHINode
>(State
.getBaseValue())) {
1168 PHINode
*PN
= cast
<PHINode
>(BDV
);
1169 const unsigned NumPHIValues
= PN
->getNumIncomingValues();
1171 // The IR verifier requires phi nodes with multiple entries from the
1172 // same basic block to have the same incoming value for each of those
1173 // entries. Since we're inserting bitcasts in the loop, make sure we
1174 // do so at least once per incoming block.
1175 DenseMap
<BasicBlock
*, Value
*> BlockToValue
;
1176 for (unsigned i
= 0; i
< NumPHIValues
; i
++) {
1177 Value
*InVal
= PN
->getIncomingValue(i
);
1178 BasicBlock
*InBB
= PN
->getIncomingBlock(i
);
1179 if (!BlockToValue
.count(InBB
))
1180 BlockToValue
[InBB
] = getBaseForInput(InVal
, InBB
->getTerminator());
1183 Value
*OldBase
= BlockToValue
[InBB
];
1184 Value
*Base
= getBaseForInput(InVal
, nullptr);
1186 // We can't use `stripPointerCasts` instead of this function because
1187 // `stripPointerCasts` doesn't handle vectors of pointers.
1188 auto StripBitCasts
= [](Value
*V
) -> Value
* {
1189 while (auto *BC
= dyn_cast
<BitCastInst
>(V
))
1190 V
= BC
->getOperand(0);
1193 // In essence this assert states: the only way two values
1194 // incoming from the same basic block may be different is by
1195 // being different bitcasts of the same value. A cleanup
1196 // that remains TODO is changing findBaseOrBDV to return an
1197 // llvm::Value of the correct type (and still remain pure).
1198 // This will remove the need to add bitcasts.
1199 assert(StripBitCasts(Base
) == StripBitCasts(OldBase
) &&
1200 "findBaseOrBDV should be pure!");
1203 Value
*Base
= BlockToValue
[InBB
];
1204 BasePHI
->setIncomingValue(i
, Base
);
1206 } else if (SelectInst
*BaseSI
=
1207 dyn_cast
<SelectInst
>(State
.getBaseValue())) {
1208 SelectInst
*SI
= cast
<SelectInst
>(BDV
);
1210 // Find the instruction which produces the base for each input.
1211 // We may need to insert a bitcast.
1212 BaseSI
->setTrueValue(getBaseForInput(SI
->getTrueValue(), BaseSI
));
1213 BaseSI
->setFalseValue(getBaseForInput(SI
->getFalseValue(), BaseSI
));
1214 } else if (auto *BaseEE
=
1215 dyn_cast
<ExtractElementInst
>(State
.getBaseValue())) {
1216 Value
*InVal
= cast
<ExtractElementInst
>(BDV
)->getVectorOperand();
1217 // Find the instruction which produces the base for each input. We may
1218 // need to insert a bitcast.
1219 BaseEE
->setOperand(0, getBaseForInput(InVal
, BaseEE
));
1220 } else if (auto *BaseIE
= dyn_cast
<InsertElementInst
>(State
.getBaseValue())){
1221 auto *BdvIE
= cast
<InsertElementInst
>(BDV
);
1222 auto UpdateOperand
= [&](int OperandIdx
) {
1223 Value
*InVal
= BdvIE
->getOperand(OperandIdx
);
1224 Value
*Base
= getBaseForInput(InVal
, BaseIE
);
1225 BaseIE
->setOperand(OperandIdx
, Base
);
1227 UpdateOperand(0); // vector operand
1228 UpdateOperand(1); // scalar operand
1230 auto *BaseSV
= cast
<ShuffleVectorInst
>(State
.getBaseValue());
1231 auto *BdvSV
= cast
<ShuffleVectorInst
>(BDV
);
1232 auto UpdateOperand
= [&](int OperandIdx
) {
1233 Value
*InVal
= BdvSV
->getOperand(OperandIdx
);
1234 Value
*Base
= getBaseForInput(InVal
, BaseSV
);
1235 BaseSV
->setOperand(OperandIdx
, Base
);
1237 UpdateOperand(0); // vector operand
1238 if (!BdvSV
->isZeroEltSplat())
1239 UpdateOperand(1); // vector operand
1241 // Never read, so just use poison
1242 Value
*InVal
= BdvSV
->getOperand(1);
1243 BaseSV
->setOperand(1, PoisonValue::get(InVal
->getType()));
1252 // get the data layout to compare the sizes of base/derived pointer values
1253 [[maybe_unused
]] auto &DL
=
1254 cast
<llvm::Instruction
>(Def
)->getModule()->getDataLayout();
1255 // Cache all of our results so we can cheaply reuse them
1256 // NOTE: This is actually two caches: one of the base defining value
1257 // relation and one of the base pointer relation! FIXME
1258 for (auto Pair
: States
) {
1259 auto *BDV
= Pair
.first
;
1260 Value
*Base
= Pair
.second
.getBaseValue();
1261 assert(BDV
&& Base
);
1262 // Whenever we have a derived ptr(s), their base
1263 // ptr(s) must be of the same size, not necessarily the same type
1264 assert(DL
.getTypeAllocSize(BDV
->getType()) ==
1265 DL
.getTypeAllocSize(Base
->getType()) &&
1266 "Derived and base values should have same size");
1267 // Only values that do not have known bases or those that have differing
1268 // type (scalar versus vector) from a possible known base should be in the
1271 (!isKnownBase(BDV
, KnownBases
) || !areBothVectorOrScalar(BDV
, Base
)) &&
1272 "why did it get added?");
1275 dbgs() << "Updating base value cache"
1276 << " for: " << BDV
->getName() << " from: "
1277 << (Cache
.count(BDV
) ? Cache
[BDV
]->getName().str() : "none")
1278 << " to: " << Base
->getName() << "\n");
1282 assert(Cache
.count(Def
));
1286 // For a set of live pointers (base and/or derived), identify the base
1287 // pointer of the object which they are derived from. This routine will
1288 // mutate the IR graph as needed to make the 'base' pointer live at the
1289 // definition site of 'derived'. This ensures that any use of 'derived' can
1290 // also use 'base'. This may involve the insertion of a number of
1291 // additional PHI nodes.
1293 // preconditions: live is a set of pointer type Values
1295 // side effects: may insert PHI nodes into the existing CFG, will preserve
1296 // CFG, will not remove or mutate any existing nodes
1298 // post condition: PointerToBase contains one (derived, base) pair for every
1299 // pointer in live. Note that derived can be equal to base if the original
1300 // pointer was a base pointer.
1301 static void findBasePointers(const StatepointLiveSetTy
&live
,
1302 PointerToBaseTy
&PointerToBase
, DominatorTree
*DT
,
1303 DefiningValueMapTy
&DVCache
,
1304 IsKnownBaseMapTy
&KnownBases
) {
1305 for (Value
*ptr
: live
) {
1306 Value
*base
= findBasePointer(ptr
, DVCache
, KnownBases
);
1307 assert(base
&& "failed to find base pointer");
1308 PointerToBase
[ptr
] = base
;
1309 assert((!isa
<Instruction
>(base
) || !isa
<Instruction
>(ptr
) ||
1310 DT
->dominates(cast
<Instruction
>(base
)->getParent(),
1311 cast
<Instruction
>(ptr
)->getParent())) &&
1312 "The base we found better dominate the derived pointer");
1316 /// Find the required based pointers (and adjust the live set) for the given
1318 static void findBasePointers(DominatorTree
&DT
, DefiningValueMapTy
&DVCache
,
1320 PartiallyConstructedSafepointRecord
&result
,
1321 PointerToBaseTy
&PointerToBase
,
1322 IsKnownBaseMapTy
&KnownBases
) {
1323 StatepointLiveSetTy PotentiallyDerivedPointers
= result
.LiveSet
;
1324 // We assume that all pointers passed to deopt are base pointers; as an
1325 // optimization, we can use this to avoid seperately materializing the base
1326 // pointer graph. This is only relevant since we're very conservative about
1327 // generating new conflict nodes during base pointer insertion. If we were
1328 // smarter there, this would be irrelevant.
1329 if (auto Opt
= Call
->getOperandBundle(LLVMContext::OB_deopt
))
1330 for (Value
*V
: Opt
->Inputs
) {
1331 if (!PotentiallyDerivedPointers
.count(V
))
1333 PotentiallyDerivedPointers
.remove(V
);
1334 PointerToBase
[V
] = V
;
1336 findBasePointers(PotentiallyDerivedPointers
, PointerToBase
, &DT
, DVCache
,
1340 /// Given an updated version of the dataflow liveness results, update the
1341 /// liveset and base pointer maps for the call site CS.
1342 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
1344 PartiallyConstructedSafepointRecord
&result
,
1345 PointerToBaseTy
&PointerToBase
,
1348 static void recomputeLiveInValues(
1349 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
1350 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
,
1351 PointerToBaseTy
&PointerToBase
, GCStrategy
*GC
) {
1352 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1353 // again. The old values are still live and will help it stabilize quickly.
1354 GCPtrLivenessData RevisedLivenessData
;
1355 computeLiveInValues(DT
, F
, RevisedLivenessData
, GC
);
1356 for (size_t i
= 0; i
< records
.size(); i
++) {
1357 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
1358 recomputeLiveInValues(RevisedLivenessData
, toUpdate
[i
], info
, PointerToBase
,
1363 // Utility function which clones all instructions from "ChainToBase"
1364 // and inserts them before "InsertBefore". Returns rematerialized value
1365 // which should be used after statepoint.
1366 static Instruction
*rematerializeChain(ArrayRef
<Instruction
*> ChainToBase
,
1367 Instruction
*InsertBefore
,
1369 Value
*AlternateLiveBase
) {
1370 Instruction
*LastClonedValue
= nullptr;
1371 Instruction
*LastValue
= nullptr;
1372 // Walk backwards to visit top-most instructions first.
1373 for (Instruction
*Instr
:
1374 make_range(ChainToBase
.rbegin(), ChainToBase
.rend())) {
1375 // Only GEP's and casts are supported as we need to be careful to not
1376 // introduce any new uses of pointers not in the liveset.
1377 // Note that it's fine to introduce new uses of pointers which were
1378 // otherwise not used after this statepoint.
1379 assert(isa
<GetElementPtrInst
>(Instr
) || isa
<CastInst
>(Instr
));
1381 Instruction
*ClonedValue
= Instr
->clone();
1382 ClonedValue
->insertBefore(InsertBefore
);
1383 ClonedValue
->setName(Instr
->getName() + ".remat");
1385 // If it is not first instruction in the chain then it uses previously
1386 // cloned value. We should update it to use cloned value.
1387 if (LastClonedValue
) {
1389 ClonedValue
->replaceUsesOfWith(LastValue
, LastClonedValue
);
1391 for (auto *OpValue
: ClonedValue
->operand_values()) {
1392 // Assert that cloned instruction does not use any instructions from
1393 // this chain other than LastClonedValue
1394 assert(!is_contained(ChainToBase
, OpValue
) &&
1395 "incorrect use in rematerialization chain");
1396 // Assert that the cloned instruction does not use the RootOfChain
1397 // or the AlternateLiveBase.
1398 assert(OpValue
!= RootOfChain
&& OpValue
!= AlternateLiveBase
);
1402 // For the first instruction, replace the use of unrelocated base i.e.
1403 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
1404 // live set. They have been proved to be the same PHI nodes. Note
1405 // that the *only* use of the RootOfChain in the ChainToBase list is
1406 // the first Value in the list.
1407 if (RootOfChain
!= AlternateLiveBase
)
1408 ClonedValue
->replaceUsesOfWith(RootOfChain
, AlternateLiveBase
);
1411 LastClonedValue
= ClonedValue
;
1414 assert(LastClonedValue
);
1415 return LastClonedValue
;
1418 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1419 // no uses of the original value / return value between the gc.statepoint and
1420 // the gc.relocate / gc.result call. One case which can arise is a phi node
1421 // starting one of the successor blocks. We also need to be able to insert the
1422 // gc.relocates only on the path which goes through the statepoint. We might
1423 // need to split an edge to make this possible.
1425 normalizeForInvokeSafepoint(BasicBlock
*BB
, BasicBlock
*InvokeParent
,
1426 DominatorTree
&DT
) {
1427 BasicBlock
*Ret
= BB
;
1428 if (!BB
->getUniquePredecessor())
1429 Ret
= SplitBlockPredecessors(BB
, InvokeParent
, "", &DT
);
1431 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1433 FoldSingleEntryPHINodes(Ret
);
1434 assert(!isa
<PHINode
>(Ret
->begin()) &&
1435 "All PHI nodes should have been removed!");
1437 // At this point, we can safely insert a gc.relocate or gc.result as the first
1438 // instruction in Ret if needed.
1442 // List of all function attributes which must be stripped when lowering from
1443 // abstract machine model to physical machine model. Essentially, these are
1444 // all the effects a safepoint might have which we ignored in the abstract
1445 // machine model for purposes of optimization. We have to strip these on
1446 // both function declarations and call sites.
1447 static constexpr Attribute::AttrKind FnAttrsToStrip
[] =
1448 {Attribute::Memory
, Attribute::NoSync
, Attribute::NoFree
};
1450 // Create new attribute set containing only attributes which can be transferred
1451 // from the original call to the safepoint.
1452 static AttributeList
legalizeCallAttributes(CallBase
*Call
, bool IsMemIntrinsic
,
1453 AttributeList StatepointAL
) {
1454 AttributeList OrigAL
= Call
->getAttributes();
1455 if (OrigAL
.isEmpty())
1456 return StatepointAL
;
1458 // Remove the readonly, readnone, and statepoint function attributes.
1459 LLVMContext
&Ctx
= Call
->getContext();
1460 AttrBuilder
FnAttrs(Ctx
, OrigAL
.getFnAttrs());
1461 for (auto Attr
: FnAttrsToStrip
)
1462 FnAttrs
.removeAttribute(Attr
);
1464 for (Attribute A
: OrigAL
.getFnAttrs()) {
1465 if (isStatepointDirectiveAttr(A
))
1466 FnAttrs
.removeAttribute(A
);
1469 StatepointAL
= StatepointAL
.addFnAttributes(Ctx
, FnAttrs
);
1471 // The memory intrinsics do not have a 1:1 correspondence of the original
1472 // call arguments to the produced statepoint. Do not transfer the argument
1473 // attributes to avoid putting them on incorrect arguments.
1475 return StatepointAL
;
1477 // Attach the argument attributes from the original call at the corresponding
1478 // arguments in the statepoint. Note that any argument attributes that are
1479 // invalid after lowering are stripped in stripNonValidDataFromBody.
1480 for (unsigned I
: llvm::seq(Call
->arg_size()))
1481 StatepointAL
= StatepointAL
.addParamAttributes(
1482 Ctx
, GCStatepointInst::CallArgsBeginPos
+ I
,
1483 AttrBuilder(Ctx
, OrigAL
.getParamAttrs(I
)));
1485 // Return attributes are later attached to the gc.result intrinsic.
1486 return StatepointAL
;
1489 /// Helper function to place all gc relocates necessary for the given
1492 /// liveVariables - list of variables to be relocated.
1493 /// basePtrs - base pointers.
1494 /// statepointToken - statepoint instruction to which relocates should be
1496 /// Builder - Llvm IR builder to be used to construct new calls.
1497 static void CreateGCRelocates(ArrayRef
<Value
*> LiveVariables
,
1498 ArrayRef
<Value
*> BasePtrs
,
1499 Instruction
*StatepointToken
,
1500 IRBuilder
<> &Builder
, GCStrategy
*GC
) {
1501 if (LiveVariables
.empty())
1504 auto FindIndex
= [](ArrayRef
<Value
*> LiveVec
, Value
*Val
) {
1505 auto ValIt
= llvm::find(LiveVec
, Val
);
1506 assert(ValIt
!= LiveVec
.end() && "Val not found in LiveVec!");
1507 size_t Index
= std::distance(LiveVec
.begin(), ValIt
);
1508 assert(Index
< LiveVec
.size() && "Bug in std::find?");
1511 Module
*M
= StatepointToken
->getModule();
1513 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1514 // element type is i8 addrspace(1)*). We originally generated unique
1515 // declarations for each pointer type, but this proved problematic because
1516 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1517 // towards a single unified pointer type anyways, we can just cast everything
1518 // to an i8* of the right address space. A bitcast is added later to convert
1519 // gc_relocate to the actual value's type.
1520 auto getGCRelocateDecl
= [&](Type
*Ty
) {
1521 assert(isHandledGCPointerType(Ty
, GC
));
1522 auto AS
= Ty
->getScalarType()->getPointerAddressSpace();
1523 Type
*NewTy
= PointerType::get(M
->getContext(), AS
);
1524 if (auto *VT
= dyn_cast
<VectorType
>(Ty
))
1525 NewTy
= FixedVectorType::get(NewTy
,
1526 cast
<FixedVectorType
>(VT
)->getNumElements());
1527 return Intrinsic::getDeclaration(M
, Intrinsic::experimental_gc_relocate
,
1531 // Lazily populated map from input types to the canonicalized form mentioned
1532 // in the comment above. This should probably be cached somewhere more
1534 DenseMap
<Type
*, Function
*> TypeToDeclMap
;
1536 for (unsigned i
= 0; i
< LiveVariables
.size(); i
++) {
1537 // Generate the gc.relocate call and save the result
1538 Value
*BaseIdx
= Builder
.getInt32(FindIndex(LiveVariables
, BasePtrs
[i
]));
1539 Value
*LiveIdx
= Builder
.getInt32(i
);
1541 Type
*Ty
= LiveVariables
[i
]->getType();
1542 if (!TypeToDeclMap
.count(Ty
))
1543 TypeToDeclMap
[Ty
] = getGCRelocateDecl(Ty
);
1544 Function
*GCRelocateDecl
= TypeToDeclMap
[Ty
];
1546 // only specify a debug name if we can give a useful one
1547 CallInst
*Reloc
= Builder
.CreateCall(
1548 GCRelocateDecl
, {StatepointToken
, BaseIdx
, LiveIdx
},
1549 suffixed_name_or(LiveVariables
[i
], ".relocated", ""));
1550 // Trick CodeGen into thinking there are lots of free registers at this
1552 Reloc
->setCallingConv(CallingConv::Cold
);
1558 /// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1559 /// avoids having to worry about keeping around dangling pointers to Values.
1560 class DeferredReplacement
{
1561 AssertingVH
<Instruction
> Old
;
1562 AssertingVH
<Instruction
> New
;
1563 bool IsDeoptimize
= false;
1565 DeferredReplacement() = default;
1568 static DeferredReplacement
createRAUW(Instruction
*Old
, Instruction
*New
) {
1569 assert(Old
!= New
&& Old
&& New
&&
1570 "Cannot RAUW equal values or to / from null!");
1572 DeferredReplacement D
;
1578 static DeferredReplacement
createDelete(Instruction
*ToErase
) {
1579 DeferredReplacement D
;
1584 static DeferredReplacement
createDeoptimizeReplacement(Instruction
*Old
) {
1586 auto *F
= cast
<CallInst
>(Old
)->getCalledFunction();
1587 assert(F
&& F
->getIntrinsicID() == Intrinsic::experimental_deoptimize
&&
1588 "Only way to construct a deoptimize deferred replacement");
1590 DeferredReplacement D
;
1592 D
.IsDeoptimize
= true;
1596 /// Does the task represented by this instance.
1597 void doReplacement() {
1598 Instruction
*OldI
= Old
;
1599 Instruction
*NewI
= New
;
1601 assert(OldI
!= NewI
&& "Disallowed at construction?!");
1602 assert((!IsDeoptimize
|| !New
) &&
1603 "Deoptimize intrinsics are not replaced!");
1609 OldI
->replaceAllUsesWith(NewI
);
1612 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1613 // not necessarily be followed by the matching return.
1614 auto *RI
= cast
<ReturnInst
>(OldI
->getParent()->getTerminator());
1615 new UnreachableInst(RI
->getContext(), RI
);
1616 RI
->eraseFromParent();
1619 OldI
->eraseFromParent();
1623 } // end anonymous namespace
1625 static StringRef
getDeoptLowering(CallBase
*Call
) {
1626 const char *DeoptLowering
= "deopt-lowering";
1627 if (Call
->hasFnAttr(DeoptLowering
)) {
1628 // FIXME: Calls have a *really* confusing interface around attributes
1630 const AttributeList
&CSAS
= Call
->getAttributes();
1631 if (CSAS
.hasFnAttr(DeoptLowering
))
1632 return CSAS
.getFnAttr(DeoptLowering
).getValueAsString();
1633 Function
*F
= Call
->getCalledFunction();
1634 assert(F
&& F
->hasFnAttribute(DeoptLowering
));
1635 return F
->getFnAttribute(DeoptLowering
).getValueAsString();
1637 return "live-through";
1641 makeStatepointExplicitImpl(CallBase
*Call
, /* to replace */
1642 const SmallVectorImpl
<Value
*> &BasePtrs
,
1643 const SmallVectorImpl
<Value
*> &LiveVariables
,
1644 PartiallyConstructedSafepointRecord
&Result
,
1645 std::vector
<DeferredReplacement
> &Replacements
,
1646 const PointerToBaseTy
&PointerToBase
,
1648 assert(BasePtrs
.size() == LiveVariables
.size());
1650 // Then go ahead and use the builder do actually do the inserts. We insert
1651 // immediately before the previous instruction under the assumption that all
1652 // arguments will be available here. We can't insert afterwards since we may
1653 // be replacing a terminator.
1654 IRBuilder
<> Builder(Call
);
1656 ArrayRef
<Value
*> GCArgs(LiveVariables
);
1657 uint64_t StatepointID
= StatepointDirectives::DefaultStatepointID
;
1658 uint32_t NumPatchBytes
= 0;
1659 uint32_t Flags
= uint32_t(StatepointFlags::None
);
1661 SmallVector
<Value
*, 8> CallArgs(Call
->args());
1662 std::optional
<ArrayRef
<Use
>> DeoptArgs
;
1663 if (auto Bundle
= Call
->getOperandBundle(LLVMContext::OB_deopt
))
1664 DeoptArgs
= Bundle
->Inputs
;
1665 std::optional
<ArrayRef
<Use
>> TransitionArgs
;
1666 if (auto Bundle
= Call
->getOperandBundle(LLVMContext::OB_gc_transition
)) {
1667 TransitionArgs
= Bundle
->Inputs
;
1668 // TODO: This flag no longer serves a purpose and can be removed later
1669 Flags
|= uint32_t(StatepointFlags::GCTransition
);
1672 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1673 // with a return value, we lower then as never returning calls to
1674 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1675 bool IsDeoptimize
= false;
1676 bool IsMemIntrinsic
= false;
1678 StatepointDirectives SD
=
1679 parseStatepointDirectivesFromAttrs(Call
->getAttributes());
1680 if (SD
.NumPatchBytes
)
1681 NumPatchBytes
= *SD
.NumPatchBytes
;
1682 if (SD
.StatepointID
)
1683 StatepointID
= *SD
.StatepointID
;
1685 // Pass through the requested lowering if any. The default is live-through.
1686 StringRef DeoptLowering
= getDeoptLowering(Call
);
1687 if (DeoptLowering
.equals("live-in"))
1688 Flags
|= uint32_t(StatepointFlags::DeoptLiveIn
);
1690 assert(DeoptLowering
.equals("live-through") && "Unsupported value!");
1693 FunctionCallee
CallTarget(Call
->getFunctionType(), Call
->getCalledOperand());
1694 if (Function
*F
= dyn_cast
<Function
>(CallTarget
.getCallee())) {
1695 auto IID
= F
->getIntrinsicID();
1696 if (IID
== Intrinsic::experimental_deoptimize
) {
1697 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1698 // __llvm_deoptimize symbol. We want to resolve this now, since the
1699 // verifier does not allow taking the address of an intrinsic function.
1701 SmallVector
<Type
*, 8> DomainTy
;
1702 for (Value
*Arg
: CallArgs
)
1703 DomainTy
.push_back(Arg
->getType());
1704 auto *FTy
= FunctionType::get(Type::getVoidTy(F
->getContext()), DomainTy
,
1705 /* isVarArg = */ false);
1707 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1708 // calls to @llvm.experimental.deoptimize with different argument types in
1709 // the same module. This is fine -- we assume the frontend knew what it
1710 // was doing when generating this kind of IR.
1711 CallTarget
= F
->getParent()
1712 ->getOrInsertFunction("__llvm_deoptimize", FTy
);
1714 IsDeoptimize
= true;
1715 } else if (IID
== Intrinsic::memcpy_element_unordered_atomic
||
1716 IID
== Intrinsic::memmove_element_unordered_atomic
) {
1717 IsMemIntrinsic
= true;
1719 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1720 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1721 // Specifically, these calls should be lowered to the
1722 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1723 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1724 // verifier does not allow taking the address of an intrinsic function.
1726 // Moreover we need to shuffle the arguments for the call in order to
1727 // accommodate GC. The underlying source and destination objects might be
1728 // relocated during copy operation should the GC occur. To relocate the
1729 // derived source and destination pointers the implementation of the
1730 // intrinsic should know the corresponding base pointers.
1732 // To make the base pointers available pass them explicitly as arguments:
1733 // memcpy(dest_derived, source_derived, ...) =>
1734 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1735 auto &Context
= Call
->getContext();
1736 auto &DL
= Call
->getModule()->getDataLayout();
1737 auto GetBaseAndOffset
= [&](Value
*Derived
) {
1738 Value
*Base
= nullptr;
1739 // Optimizations in unreachable code might substitute the real pointer
1740 // with undef, poison or null-derived constant. Return null base for
1741 // them to be consistent with the handling in the main algorithm in
1742 // findBaseDefiningValue.
1743 if (isa
<Constant
>(Derived
))
1745 ConstantPointerNull::get(cast
<PointerType
>(Derived
->getType()));
1747 assert(PointerToBase
.count(Derived
));
1748 Base
= PointerToBase
.find(Derived
)->second
;
1750 unsigned AddressSpace
= Derived
->getType()->getPointerAddressSpace();
1751 unsigned IntPtrSize
= DL
.getPointerSizeInBits(AddressSpace
);
1752 Value
*Base_int
= Builder
.CreatePtrToInt(
1753 Base
, Type::getIntNTy(Context
, IntPtrSize
));
1754 Value
*Derived_int
= Builder
.CreatePtrToInt(
1755 Derived
, Type::getIntNTy(Context
, IntPtrSize
));
1756 return std::make_pair(Base
, Builder
.CreateSub(Derived_int
, Base_int
));
1759 auto *Dest
= CallArgs
[0];
1760 Value
*DestBase
, *DestOffset
;
1761 std::tie(DestBase
, DestOffset
) = GetBaseAndOffset(Dest
);
1763 auto *Source
= CallArgs
[1];
1764 Value
*SourceBase
, *SourceOffset
;
1765 std::tie(SourceBase
, SourceOffset
) = GetBaseAndOffset(Source
);
1767 auto *LengthInBytes
= CallArgs
[2];
1768 auto *ElementSizeCI
= cast
<ConstantInt
>(CallArgs
[3]);
1771 CallArgs
.push_back(DestBase
);
1772 CallArgs
.push_back(DestOffset
);
1773 CallArgs
.push_back(SourceBase
);
1774 CallArgs
.push_back(SourceOffset
);
1775 CallArgs
.push_back(LengthInBytes
);
1777 SmallVector
<Type
*, 8> DomainTy
;
1778 for (Value
*Arg
: CallArgs
)
1779 DomainTy
.push_back(Arg
->getType());
1780 auto *FTy
= FunctionType::get(Type::getVoidTy(F
->getContext()), DomainTy
,
1781 /* isVarArg = */ false);
1783 auto GetFunctionName
= [](Intrinsic::ID IID
, ConstantInt
*ElementSizeCI
) {
1784 uint64_t ElementSize
= ElementSizeCI
->getZExtValue();
1785 if (IID
== Intrinsic::memcpy_element_unordered_atomic
) {
1786 switch (ElementSize
) {
1788 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1790 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1792 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1794 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1796 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1798 llvm_unreachable("unexpected element size!");
1801 assert(IID
== Intrinsic::memmove_element_unordered_atomic
);
1802 switch (ElementSize
) {
1804 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1806 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1808 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1810 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1812 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1814 llvm_unreachable("unexpected element size!");
1820 ->getOrInsertFunction(GetFunctionName(IID
, ElementSizeCI
), FTy
);
1824 // Create the statepoint given all the arguments
1825 GCStatepointInst
*Token
= nullptr;
1826 if (auto *CI
= dyn_cast
<CallInst
>(Call
)) {
1827 CallInst
*SPCall
= Builder
.CreateGCStatepointCall(
1828 StatepointID
, NumPatchBytes
, CallTarget
, Flags
, CallArgs
,
1829 TransitionArgs
, DeoptArgs
, GCArgs
, "safepoint_token");
1831 SPCall
->setTailCallKind(CI
->getTailCallKind());
1832 SPCall
->setCallingConv(CI
->getCallingConv());
1834 // Set up function attrs directly on statepoint and return attrs later for
1835 // gc_result intrinsic.
1836 SPCall
->setAttributes(
1837 legalizeCallAttributes(CI
, IsMemIntrinsic
, SPCall
->getAttributes()));
1839 Token
= cast
<GCStatepointInst
>(SPCall
);
1841 // Put the following gc_result and gc_relocate calls immediately after the
1842 // the old call (which we're about to delete)
1843 assert(CI
->getNextNode() && "Not a terminator, must have next!");
1844 Builder
.SetInsertPoint(CI
->getNextNode());
1845 Builder
.SetCurrentDebugLocation(CI
->getNextNode()->getDebugLoc());
1847 auto *II
= cast
<InvokeInst
>(Call
);
1849 // Insert the new invoke into the old block. We'll remove the old one in a
1850 // moment at which point this will become the new terminator for the
1852 InvokeInst
*SPInvoke
= Builder
.CreateGCStatepointInvoke(
1853 StatepointID
, NumPatchBytes
, CallTarget
, II
->getNormalDest(),
1854 II
->getUnwindDest(), Flags
, CallArgs
, TransitionArgs
, DeoptArgs
, GCArgs
,
1855 "statepoint_token");
1857 SPInvoke
->setCallingConv(II
->getCallingConv());
1859 // Set up function attrs directly on statepoint and return attrs later for
1860 // gc_result intrinsic.
1861 SPInvoke
->setAttributes(
1862 legalizeCallAttributes(II
, IsMemIntrinsic
, SPInvoke
->getAttributes()));
1864 Token
= cast
<GCStatepointInst
>(SPInvoke
);
1866 // Generate gc relocates in exceptional path
1867 BasicBlock
*UnwindBlock
= II
->getUnwindDest();
1868 assert(!isa
<PHINode
>(UnwindBlock
->begin()) &&
1869 UnwindBlock
->getUniquePredecessor() &&
1870 "can't safely insert in this block!");
1872 Builder
.SetInsertPoint(UnwindBlock
, UnwindBlock
->getFirstInsertionPt());
1873 Builder
.SetCurrentDebugLocation(II
->getDebugLoc());
1875 // Attach exceptional gc relocates to the landingpad.
1876 Instruction
*ExceptionalToken
= UnwindBlock
->getLandingPadInst();
1877 Result
.UnwindToken
= ExceptionalToken
;
1879 CreateGCRelocates(LiveVariables
, BasePtrs
, ExceptionalToken
, Builder
, GC
);
1881 // Generate gc relocates and returns for normal block
1882 BasicBlock
*NormalDest
= II
->getNormalDest();
1883 assert(!isa
<PHINode
>(NormalDest
->begin()) &&
1884 NormalDest
->getUniquePredecessor() &&
1885 "can't safely insert in this block!");
1887 Builder
.SetInsertPoint(NormalDest
, NormalDest
->getFirstInsertionPt());
1889 // gc relocates will be generated later as if it were regular call
1892 assert(Token
&& "Should be set in one of the above branches!");
1895 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1896 // transform the tail-call like structure to a call to a void function
1897 // followed by unreachable to get better codegen.
1898 Replacements
.push_back(
1899 DeferredReplacement::createDeoptimizeReplacement(Call
));
1901 Token
->setName("statepoint_token");
1902 if (!Call
->getType()->isVoidTy() && !Call
->use_empty()) {
1903 StringRef Name
= Call
->hasName() ? Call
->getName() : "";
1904 CallInst
*GCResult
= Builder
.CreateGCResult(Token
, Call
->getType(), Name
);
1905 GCResult
->setAttributes(
1906 AttributeList::get(GCResult
->getContext(), AttributeList::ReturnIndex
,
1907 Call
->getAttributes().getRetAttrs()));
1909 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1910 // live set of some other safepoint, in which case that safepoint's
1911 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1912 // llvm::Instruction. Instead, we defer the replacement and deletion to
1913 // after the live sets have been made explicit in the IR, and we no longer
1914 // have raw pointers to worry about.
1915 Replacements
.emplace_back(
1916 DeferredReplacement::createRAUW(Call
, GCResult
));
1918 Replacements
.emplace_back(DeferredReplacement::createDelete(Call
));
1922 Result
.StatepointToken
= Token
;
1924 // Second, create a gc.relocate for every live variable
1925 CreateGCRelocates(LiveVariables
, BasePtrs
, Token
, Builder
, GC
);
1928 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1929 // which make the relocations happening at this safepoint explicit.
1931 // WARNING: Does not do any fixup to adjust users of the original live
1932 // values. That's the callers responsibility.
1934 makeStatepointExplicit(DominatorTree
&DT
, CallBase
*Call
,
1935 PartiallyConstructedSafepointRecord
&Result
,
1936 std::vector
<DeferredReplacement
> &Replacements
,
1937 const PointerToBaseTy
&PointerToBase
, GCStrategy
*GC
) {
1938 const auto &LiveSet
= Result
.LiveSet
;
1940 // Convert to vector for efficient cross referencing.
1941 SmallVector
<Value
*, 64> BaseVec
, LiveVec
;
1942 LiveVec
.reserve(LiveSet
.size());
1943 BaseVec
.reserve(LiveSet
.size());
1944 for (Value
*L
: LiveSet
) {
1945 LiveVec
.push_back(L
);
1946 assert(PointerToBase
.count(L
));
1947 Value
*Base
= PointerToBase
.find(L
)->second
;
1948 BaseVec
.push_back(Base
);
1950 assert(LiveVec
.size() == BaseVec
.size());
1952 // Do the actual rewriting and delete the old statepoint
1953 makeStatepointExplicitImpl(Call
, BaseVec
, LiveVec
, Result
, Replacements
,
1957 // Helper function for the relocationViaAlloca.
1959 // It receives iterator to the statepoint gc relocates and emits a store to the
1960 // assigned location (via allocaMap) for the each one of them. It adds the
1961 // visited values into the visitedLiveValues set, which we will later use them
1962 // for validation checking.
1964 insertRelocationStores(iterator_range
<Value::user_iterator
> GCRelocs
,
1965 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1966 DenseSet
<Value
*> &VisitedLiveValues
) {
1967 for (User
*U
: GCRelocs
) {
1968 GCRelocateInst
*Relocate
= dyn_cast
<GCRelocateInst
>(U
);
1972 Value
*OriginalValue
= Relocate
->getDerivedPtr();
1973 assert(AllocaMap
.count(OriginalValue
));
1974 Value
*Alloca
= AllocaMap
[OriginalValue
];
1976 // Emit store into the related alloca.
1977 assert(Relocate
->getNextNode() &&
1978 "Should always have one since it's not a terminator");
1979 new StoreInst(Relocate
, Alloca
, Relocate
->getNextNode());
1982 VisitedLiveValues
.insert(OriginalValue
);
1987 // Helper function for the "relocationViaAlloca". Similar to the
1988 // "insertRelocationStores" but works for rematerialized values.
1989 static void insertRematerializationStores(
1990 const RematerializedValueMapTy
&RematerializedValues
,
1991 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1992 DenseSet
<Value
*> &VisitedLiveValues
) {
1993 for (auto RematerializedValuePair
: RematerializedValues
) {
1994 Instruction
*RematerializedValue
= RematerializedValuePair
.first
;
1995 Value
*OriginalValue
= RematerializedValuePair
.second
;
1997 assert(AllocaMap
.count(OriginalValue
) &&
1998 "Can not find alloca for rematerialized value");
1999 Value
*Alloca
= AllocaMap
[OriginalValue
];
2001 new StoreInst(RematerializedValue
, Alloca
,
2002 RematerializedValue
->getNextNode());
2005 VisitedLiveValues
.insert(OriginalValue
);
2010 /// Do all the relocation update via allocas and mem2reg
2011 static void relocationViaAlloca(
2012 Function
&F
, DominatorTree
&DT
, ArrayRef
<Value
*> Live
,
2013 ArrayRef
<PartiallyConstructedSafepointRecord
> Records
) {
2015 // record initial number of (static) allocas; we'll check we have the same
2016 // number when we get done.
2017 int InitialAllocaNum
= 0;
2018 for (Instruction
&I
: F
.getEntryBlock())
2019 if (isa
<AllocaInst
>(I
))
2023 // TODO-PERF: change data structures, reserve
2024 DenseMap
<Value
*, AllocaInst
*> AllocaMap
;
2025 SmallVector
<AllocaInst
*, 200> PromotableAllocas
;
2026 // Used later to chack that we have enough allocas to store all values
2027 std::size_t NumRematerializedValues
= 0;
2028 PromotableAllocas
.reserve(Live
.size());
2030 // Emit alloca for "LiveValue" and record it in "allocaMap" and
2031 // "PromotableAllocas"
2032 const DataLayout
&DL
= F
.getParent()->getDataLayout();
2033 auto emitAllocaFor
= [&](Value
*LiveValue
) {
2034 AllocaInst
*Alloca
= new AllocaInst(LiveValue
->getType(),
2035 DL
.getAllocaAddrSpace(), "",
2036 F
.getEntryBlock().getFirstNonPHI());
2037 AllocaMap
[LiveValue
] = Alloca
;
2038 PromotableAllocas
.push_back(Alloca
);
2041 // Emit alloca for each live gc pointer
2042 for (Value
*V
: Live
)
2045 // Emit allocas for rematerialized values
2046 for (const auto &Info
: Records
)
2047 for (auto RematerializedValuePair
: Info
.RematerializedValues
) {
2048 Value
*OriginalValue
= RematerializedValuePair
.second
;
2049 if (AllocaMap
.contains(OriginalValue
))
2052 emitAllocaFor(OriginalValue
);
2053 ++NumRematerializedValues
;
2056 // The next two loops are part of the same conceptual operation. We need to
2057 // insert a store to the alloca after the original def and at each
2058 // redefinition. We need to insert a load before each use. These are split
2059 // into distinct loops for performance reasons.
2061 // Update gc pointer after each statepoint: either store a relocated value or
2062 // null (if no relocated value was found for this gc pointer and it is not a
2063 // gc_result). This must happen before we update the statepoint with load of
2064 // alloca otherwise we lose the link between statepoint and old def.
2065 for (const auto &Info
: Records
) {
2066 Value
*Statepoint
= Info
.StatepointToken
;
2068 // This will be used for consistency check
2069 DenseSet
<Value
*> VisitedLiveValues
;
2071 // Insert stores for normal statepoint gc relocates
2072 insertRelocationStores(Statepoint
->users(), AllocaMap
, VisitedLiveValues
);
2074 // In case if it was invoke statepoint
2075 // we will insert stores for exceptional path gc relocates.
2076 if (isa
<InvokeInst
>(Statepoint
)) {
2077 insertRelocationStores(Info
.UnwindToken
->users(), AllocaMap
,
2081 // Do similar thing with rematerialized values
2082 insertRematerializationStores(Info
.RematerializedValues
, AllocaMap
,
2085 if (ClobberNonLive
) {
2086 // As a debugging aid, pretend that an unrelocated pointer becomes null at
2087 // the gc.statepoint. This will turn some subtle GC problems into
2088 // slightly easier to debug SEGVs. Note that on large IR files with
2089 // lots of gc.statepoints this is extremely costly both memory and time
2091 SmallVector
<AllocaInst
*, 64> ToClobber
;
2092 for (auto Pair
: AllocaMap
) {
2093 Value
*Def
= Pair
.first
;
2094 AllocaInst
*Alloca
= Pair
.second
;
2096 // This value was relocated
2097 if (VisitedLiveValues
.count(Def
)) {
2100 ToClobber
.push_back(Alloca
);
2103 auto InsertClobbersAt
= [&](Instruction
*IP
) {
2104 for (auto *AI
: ToClobber
) {
2105 auto AT
= AI
->getAllocatedType();
2107 if (AT
->isVectorTy())
2108 CPN
= ConstantAggregateZero::get(AT
);
2110 CPN
= ConstantPointerNull::get(cast
<PointerType
>(AT
));
2111 new StoreInst(CPN
, AI
, IP
);
2115 // Insert the clobbering stores. These may get intermixed with the
2116 // gc.results and gc.relocates, but that's fine.
2117 if (auto II
= dyn_cast
<InvokeInst
>(Statepoint
)) {
2118 InsertClobbersAt(&*II
->getNormalDest()->getFirstInsertionPt());
2119 InsertClobbersAt(&*II
->getUnwindDest()->getFirstInsertionPt());
2121 InsertClobbersAt(cast
<Instruction
>(Statepoint
)->getNextNode());
2126 // Update use with load allocas and add store for gc_relocated.
2127 for (auto Pair
: AllocaMap
) {
2128 Value
*Def
= Pair
.first
;
2129 AllocaInst
*Alloca
= Pair
.second
;
2131 // We pre-record the uses of allocas so that we dont have to worry about
2132 // later update that changes the user information..
2134 SmallVector
<Instruction
*, 20> Uses
;
2135 // PERF: trade a linear scan for repeated reallocation
2136 Uses
.reserve(Def
->getNumUses());
2137 for (User
*U
: Def
->users()) {
2138 if (!isa
<ConstantExpr
>(U
)) {
2139 // If the def has a ConstantExpr use, then the def is either a
2140 // ConstantExpr use itself or null. In either case
2141 // (recursively in the first, directly in the second), the oop
2142 // it is ultimately dependent on is null and this particular
2143 // use does not need to be fixed up.
2144 Uses
.push_back(cast
<Instruction
>(U
));
2149 auto Last
= std::unique(Uses
.begin(), Uses
.end());
2150 Uses
.erase(Last
, Uses
.end());
2152 for (Instruction
*Use
: Uses
) {
2153 if (isa
<PHINode
>(Use
)) {
2154 PHINode
*Phi
= cast
<PHINode
>(Use
);
2155 for (unsigned i
= 0; i
< Phi
->getNumIncomingValues(); i
++) {
2156 if (Def
== Phi
->getIncomingValue(i
)) {
2158 new LoadInst(Alloca
->getAllocatedType(), Alloca
, "",
2159 Phi
->getIncomingBlock(i
)->getTerminator());
2160 Phi
->setIncomingValue(i
, Load
);
2165 new LoadInst(Alloca
->getAllocatedType(), Alloca
, "", Use
);
2166 Use
->replaceUsesOfWith(Def
, Load
);
2170 // Emit store for the initial gc value. Store must be inserted after load,
2171 // otherwise store will be in alloca's use list and an extra load will be
2172 // inserted before it.
2173 StoreInst
*Store
= new StoreInst(Def
, Alloca
, /*volatile*/ false,
2174 DL
.getABITypeAlign(Def
->getType()));
2175 if (Instruction
*Inst
= dyn_cast
<Instruction
>(Def
)) {
2176 if (InvokeInst
*Invoke
= dyn_cast
<InvokeInst
>(Inst
)) {
2177 // InvokeInst is a terminator so the store need to be inserted into its
2178 // normal destination block.
2179 BasicBlock
*NormalDest
= Invoke
->getNormalDest();
2180 Store
->insertBefore(NormalDest
->getFirstNonPHI());
2182 assert(!Inst
->isTerminator() &&
2183 "The only terminator that can produce a value is "
2184 "InvokeInst which is handled above.");
2185 Store
->insertAfter(Inst
);
2188 assert(isa
<Argument
>(Def
));
2189 Store
->insertAfter(cast
<Instruction
>(Alloca
));
2193 assert(PromotableAllocas
.size() == Live
.size() + NumRematerializedValues
&&
2194 "we must have the same allocas with lives");
2195 (void) NumRematerializedValues
;
2196 if (!PromotableAllocas
.empty()) {
2197 // Apply mem2reg to promote alloca to SSA
2198 PromoteMemToReg(PromotableAllocas
, DT
);
2202 for (auto &I
: F
.getEntryBlock())
2203 if (isa
<AllocaInst
>(I
))
2205 assert(InitialAllocaNum
== 0 && "We must not introduce any extra allocas");
2209 /// Implement a unique function which doesn't require we sort the input
2210 /// vector. Doing so has the effect of changing the output of a couple of
2211 /// tests in ways which make them less useful in testing fused safepoints.
2212 template <typename T
> static void unique_unsorted(SmallVectorImpl
<T
> &Vec
) {
2213 SmallSet
<T
, 8> Seen
;
2214 erase_if(Vec
, [&](const T
&V
) { return !Seen
.insert(V
).second
; });
2217 /// Insert holders so that each Value is obviously live through the entire
2218 /// lifetime of the call.
2219 static void insertUseHolderAfter(CallBase
*Call
, const ArrayRef
<Value
*> Values
,
2220 SmallVectorImpl
<CallInst
*> &Holders
) {
2222 // No values to hold live, might as well not insert the empty holder
2225 Module
*M
= Call
->getModule();
2226 // Use a dummy vararg function to actually hold the values live
2227 FunctionCallee Func
= M
->getOrInsertFunction(
2228 "__tmp_use", FunctionType::get(Type::getVoidTy(M
->getContext()), true));
2229 if (isa
<CallInst
>(Call
)) {
2230 // For call safepoints insert dummy calls right after safepoint
2232 CallInst::Create(Func
, Values
, "", &*++Call
->getIterator()));
2235 // For invoke safepooints insert dummy calls both in normal and
2236 // exceptional destination blocks
2237 auto *II
= cast
<InvokeInst
>(Call
);
2238 Holders
.push_back(CallInst::Create(
2239 Func
, Values
, "", &*II
->getNormalDest()->getFirstInsertionPt()));
2240 Holders
.push_back(CallInst::Create(
2241 Func
, Values
, "", &*II
->getUnwindDest()->getFirstInsertionPt()));
2244 static void findLiveReferences(
2245 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
2246 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
,
2248 GCPtrLivenessData OriginalLivenessData
;
2249 computeLiveInValues(DT
, F
, OriginalLivenessData
, GC
);
2250 for (size_t i
= 0; i
< records
.size(); i
++) {
2251 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
2252 analyzeParsePointLiveness(DT
, OriginalLivenessData
, toUpdate
[i
], info
, GC
);
2256 // Helper function for the "rematerializeLiveValues". It walks use chain
2257 // starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2258 // the base or a value it cannot process. Only "simple" values are processed
2259 // (currently it is GEP's and casts). The returned root is examined by the
2260 // callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2261 // with all visited values.
2262 static Value
* findRematerializableChainToBasePointer(
2263 SmallVectorImpl
<Instruction
*> &ChainToBase
,
2264 Value
*CurrentValue
) {
2265 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(CurrentValue
)) {
2266 ChainToBase
.push_back(GEP
);
2267 return findRematerializableChainToBasePointer(ChainToBase
,
2268 GEP
->getPointerOperand());
2271 if (CastInst
*CI
= dyn_cast
<CastInst
>(CurrentValue
)) {
2272 if (!CI
->isNoopCast(CI
->getModule()->getDataLayout()))
2275 ChainToBase
.push_back(CI
);
2276 return findRematerializableChainToBasePointer(ChainToBase
,
2280 // We have reached the root of the chain, which is either equal to the base or
2281 // is the first unsupported value along the use chain.
2282 return CurrentValue
;
2285 // Helper function for the "rematerializeLiveValues". Compute cost of the use
2286 // chain we are going to rematerialize.
2287 static InstructionCost
2288 chainToBasePointerCost(SmallVectorImpl
<Instruction
*> &Chain
,
2289 TargetTransformInfo
&TTI
) {
2290 InstructionCost Cost
= 0;
2292 for (Instruction
*Instr
: Chain
) {
2293 if (CastInst
*CI
= dyn_cast
<CastInst
>(Instr
)) {
2294 assert(CI
->isNoopCast(CI
->getModule()->getDataLayout()) &&
2295 "non noop cast is found during rematerialization");
2297 Type
*SrcTy
= CI
->getOperand(0)->getType();
2298 Cost
+= TTI
.getCastInstrCost(CI
->getOpcode(), CI
->getType(), SrcTy
,
2299 TTI::getCastContextHint(CI
),
2300 TargetTransformInfo::TCK_SizeAndLatency
, CI
);
2302 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Instr
)) {
2303 // Cost of the address calculation
2304 Type
*ValTy
= GEP
->getSourceElementType();
2305 Cost
+= TTI
.getAddressComputationCost(ValTy
);
2307 // And cost of the GEP itself
2308 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2309 // allowed for the external usage)
2310 if (!GEP
->hasAllConstantIndices())
2314 llvm_unreachable("unsupported instruction type during rematerialization");
2321 static bool AreEquivalentPhiNodes(PHINode
&OrigRootPhi
, PHINode
&AlternateRootPhi
) {
2322 unsigned PhiNum
= OrigRootPhi
.getNumIncomingValues();
2323 if (PhiNum
!= AlternateRootPhi
.getNumIncomingValues() ||
2324 OrigRootPhi
.getParent() != AlternateRootPhi
.getParent())
2326 // Map of incoming values and their corresponding basic blocks of
2328 SmallDenseMap
<Value
*, BasicBlock
*, 8> CurrentIncomingValues
;
2329 for (unsigned i
= 0; i
< PhiNum
; i
++)
2330 CurrentIncomingValues
[OrigRootPhi
.getIncomingValue(i
)] =
2331 OrigRootPhi
.getIncomingBlock(i
);
2333 // Both current and base PHIs should have same incoming values and
2334 // the same basic blocks corresponding to the incoming values.
2335 for (unsigned i
= 0; i
< PhiNum
; i
++) {
2337 CurrentIncomingValues
.find(AlternateRootPhi
.getIncomingValue(i
));
2338 if (CIVI
== CurrentIncomingValues
.end())
2340 BasicBlock
*CurrentIncomingBB
= CIVI
->second
;
2341 if (CurrentIncomingBB
!= AlternateRootPhi
.getIncomingBlock(i
))
2347 // Find derived pointers that can be recomputed cheap enough and fill
2348 // RematerizationCandidates with such candidates.
2350 findRematerializationCandidates(PointerToBaseTy PointerToBase
,
2351 RematCandTy
&RematerizationCandidates
,
2352 TargetTransformInfo
&TTI
) {
2353 const unsigned int ChainLengthThreshold
= 10;
2355 for (auto P2B
: PointerToBase
) {
2356 auto *Derived
= P2B
.first
;
2357 auto *Base
= P2B
.second
;
2358 // Consider only derived pointers.
2359 if (Derived
== Base
)
2362 // For each live pointer find its defining chain.
2363 SmallVector
<Instruction
*, 3> ChainToBase
;
2364 Value
*RootOfChain
=
2365 findRematerializableChainToBasePointer(ChainToBase
, Derived
);
2367 // Nothing to do, or chain is too long
2368 if ( ChainToBase
.size() == 0 ||
2369 ChainToBase
.size() > ChainLengthThreshold
)
2372 // Handle the scenario where the RootOfChain is not equal to the
2373 // Base Value, but they are essentially the same phi values.
2374 if (RootOfChain
!= PointerToBase
[Derived
]) {
2375 PHINode
*OrigRootPhi
= dyn_cast
<PHINode
>(RootOfChain
);
2376 PHINode
*AlternateRootPhi
= dyn_cast
<PHINode
>(PointerToBase
[Derived
]);
2377 if (!OrigRootPhi
|| !AlternateRootPhi
)
2379 // PHI nodes that have the same incoming values, and belonging to the same
2380 // basic blocks are essentially the same SSA value. When the original phi
2381 // has incoming values with different base pointers, the original phi is
2382 // marked as conflict, and an additional `AlternateRootPhi` with the same
2383 // incoming values get generated by the findBasePointer function. We need
2384 // to identify the newly generated AlternateRootPhi (.base version of phi)
2385 // and RootOfChain (the original phi node itself) are the same, so that we
2386 // can rematerialize the gep and casts. This is a workaround for the
2387 // deficiency in the findBasePointer algorithm.
2388 if (!AreEquivalentPhiNodes(*OrigRootPhi
, *AlternateRootPhi
))
2391 // Compute cost of this chain.
2392 InstructionCost Cost
= chainToBasePointerCost(ChainToBase
, TTI
);
2393 // TODO: We can also account for cases when we will be able to remove some
2394 // of the rematerialized values by later optimization passes. I.e if
2395 // we rematerialized several intersecting chains. Or if original values
2396 // don't have any uses besides this statepoint.
2398 // Ok, there is a candidate.
2399 RematerizlizationCandidateRecord Record
;
2400 Record
.ChainToBase
= ChainToBase
;
2401 Record
.RootOfChain
= RootOfChain
;
2403 RematerizationCandidates
.insert({ Derived
, Record
});
2407 // Try to rematerialize derived pointers immediately before their uses
2408 // (instead of rematerializing after every statepoint it is live through).
2409 // This can be beneficial when derived pointer is live across many
2410 // statepoints, but uses are rare.
2411 static void rematerializeLiveValuesAtUses(
2412 RematCandTy
&RematerizationCandidates
,
2413 MutableArrayRef
<PartiallyConstructedSafepointRecord
> Records
,
2414 PointerToBaseTy
&PointerToBase
) {
2415 if (!RematDerivedAtUses
)
2418 SmallVector
<Instruction
*, 32> LiveValuesToBeDeleted
;
2420 LLVM_DEBUG(dbgs() << "Rematerialize derived pointers at uses, "
2421 << "Num statepoints: " << Records
.size() << '\n');
2423 for (auto &It
: RematerizationCandidates
) {
2424 Instruction
*Cand
= cast
<Instruction
>(It
.first
);
2425 auto &Record
= It
.second
;
2427 if (Record
.Cost
>= RematerializationThreshold
)
2430 if (Cand
->user_empty())
2433 if (Cand
->hasOneUse())
2434 if (auto *U
= dyn_cast
<Instruction
>(Cand
->getUniqueUndroppableUser()))
2435 if (U
->getParent() == Cand
->getParent())
2438 // Rematerialization before PHI nodes is not implemented.
2439 if (llvm::any_of(Cand
->users(),
2440 [](const auto *U
) { return isa
<PHINode
>(U
); }))
2443 LLVM_DEBUG(dbgs() << "Trying cand " << *Cand
<< " ... ");
2445 // Count of rematerialization instructions we introduce is equal to number
2446 // of candidate uses.
2447 // Count of rematerialization instructions we eliminate is equal to number
2448 // of statepoints it is live through.
2449 // Consider transformation profitable if latter is greater than former
2450 // (in other words, we create less than eliminate).
2451 unsigned NumLiveStatepoints
= llvm::count_if(
2452 Records
, [Cand
](const auto &R
) { return R
.LiveSet
.contains(Cand
); });
2453 unsigned NumUses
= Cand
->getNumUses();
2455 LLVM_DEBUG(dbgs() << "Num uses: " << NumUses
<< " Num live statepoints: "
2456 << NumLiveStatepoints
<< " ");
2458 if (NumLiveStatepoints
< NumUses
) {
2459 LLVM_DEBUG(dbgs() << "not profitable\n");
2463 // If rematerialization is 'free', then favor rematerialization at
2464 // uses as it generally shortens live ranges.
2465 // TODO: Short (size ==1) chains only?
2466 if (NumLiveStatepoints
== NumUses
&& Record
.Cost
> 0) {
2467 LLVM_DEBUG(dbgs() << "not profitable\n");
2471 LLVM_DEBUG(dbgs() << "looks profitable\n");
2473 // ChainToBase may contain another remat candidate (as a sub chain) which
2474 // has been rewritten by now. Need to recollect chain to have up to date
2476 // TODO: sort records in findRematerializationCandidates() in
2477 // decreasing chain size order?
2478 if (Record
.ChainToBase
.size() > 1) {
2479 Record
.ChainToBase
.clear();
2480 findRematerializableChainToBasePointer(Record
.ChainToBase
, Cand
);
2483 // Current rematerialization algorithm is very simple: we rematerialize
2484 // immediately before EVERY use, even if there are several uses in same
2485 // block or if use is local to Cand Def. The reason is that this allows
2486 // us to avoid recomputing liveness without complicated analysis:
2487 // - If we did not eliminate all uses of original Candidate, we do not
2488 // know exaclty in what BBs it is still live.
2489 // - If we rematerialize once per BB, we need to find proper insertion
2490 // place (first use in block, but after Def) and analyze if there is
2491 // statepoint between uses in the block.
2492 while (!Cand
->user_empty()) {
2493 Instruction
*UserI
= cast
<Instruction
>(*Cand
->user_begin());
2494 Instruction
*RematChain
= rematerializeChain(
2495 Record
.ChainToBase
, UserI
, Record
.RootOfChain
, PointerToBase
[Cand
]);
2496 UserI
->replaceUsesOfWith(Cand
, RematChain
);
2497 PointerToBase
[RematChain
] = PointerToBase
[Cand
];
2499 LiveValuesToBeDeleted
.push_back(Cand
);
2502 LLVM_DEBUG(dbgs() << "Rematerialized " << LiveValuesToBeDeleted
.size()
2503 << " derived pointers\n");
2504 for (auto *Cand
: LiveValuesToBeDeleted
) {
2505 assert(Cand
->use_empty() && "Unexpected user remain");
2506 RematerizationCandidates
.erase(Cand
);
2507 for (auto &R
: Records
) {
2508 assert(!R
.LiveSet
.contains(Cand
) ||
2509 R
.LiveSet
.contains(PointerToBase
[Cand
]));
2510 R
.LiveSet
.remove(Cand
);
2514 // Recollect not rematerialized chains - we might have rewritten
2515 // their sub-chains.
2516 if (!LiveValuesToBeDeleted
.empty()) {
2517 for (auto &P
: RematerizationCandidates
) {
2519 if (R
.ChainToBase
.size() > 1) {
2520 R
.ChainToBase
.clear();
2521 findRematerializableChainToBasePointer(R
.ChainToBase
, P
.first
);
2527 // From the statepoint live set pick values that are cheaper to recompute then
2528 // to relocate. Remove this values from the live set, rematerialize them after
2529 // statepoint and record them in "Info" structure. Note that similar to
2530 // relocated values we don't do any user adjustments here.
2531 static void rematerializeLiveValues(CallBase
*Call
,
2532 PartiallyConstructedSafepointRecord
&Info
,
2533 PointerToBaseTy
&PointerToBase
,
2534 RematCandTy
&RematerizationCandidates
,
2535 TargetTransformInfo
&TTI
) {
2536 // Record values we are going to delete from this statepoint live set.
2537 // We can not di this in following loop due to iterator invalidation.
2538 SmallVector
<Value
*, 32> LiveValuesToBeDeleted
;
2540 for (Value
*LiveValue
: Info
.LiveSet
) {
2541 auto It
= RematerizationCandidates
.find(LiveValue
);
2542 if (It
== RematerizationCandidates
.end())
2545 RematerizlizationCandidateRecord
&Record
= It
->second
;
2547 InstructionCost Cost
= Record
.Cost
;
2548 // For invokes we need to rematerialize each chain twice - for normal and
2549 // for unwind basic blocks. Model this by multiplying cost by two.
2550 if (isa
<InvokeInst
>(Call
))
2553 // If it's too expensive - skip it.
2554 if (Cost
>= RematerializationThreshold
)
2557 // Remove value from the live set
2558 LiveValuesToBeDeleted
.push_back(LiveValue
);
2560 // Clone instructions and record them inside "Info" structure.
2562 // Different cases for calls and invokes. For invokes we need to clone
2563 // instructions both on normal and unwind path.
2564 if (isa
<CallInst
>(Call
)) {
2565 Instruction
*InsertBefore
= Call
->getNextNode();
2566 assert(InsertBefore
);
2567 Instruction
*RematerializedValue
=
2568 rematerializeChain(Record
.ChainToBase
, InsertBefore
,
2569 Record
.RootOfChain
, PointerToBase
[LiveValue
]);
2570 Info
.RematerializedValues
[RematerializedValue
] = LiveValue
;
2572 auto *Invoke
= cast
<InvokeInst
>(Call
);
2574 Instruction
*NormalInsertBefore
=
2575 &*Invoke
->getNormalDest()->getFirstInsertionPt();
2576 Instruction
*UnwindInsertBefore
=
2577 &*Invoke
->getUnwindDest()->getFirstInsertionPt();
2579 Instruction
*NormalRematerializedValue
=
2580 rematerializeChain(Record
.ChainToBase
, NormalInsertBefore
,
2581 Record
.RootOfChain
, PointerToBase
[LiveValue
]);
2582 Instruction
*UnwindRematerializedValue
=
2583 rematerializeChain(Record
.ChainToBase
, UnwindInsertBefore
,
2584 Record
.RootOfChain
, PointerToBase
[LiveValue
]);
2586 Info
.RematerializedValues
[NormalRematerializedValue
] = LiveValue
;
2587 Info
.RematerializedValues
[UnwindRematerializedValue
] = LiveValue
;
2591 // Remove rematerialized values from the live set.
2592 for (auto *LiveValue
: LiveValuesToBeDeleted
) {
2593 Info
.LiveSet
.remove(LiveValue
);
2597 static bool inlineGetBaseAndOffset(Function
&F
,
2598 SmallVectorImpl
<CallInst
*> &Intrinsics
,
2599 DefiningValueMapTy
&DVCache
,
2600 IsKnownBaseMapTy
&KnownBases
) {
2601 auto &Context
= F
.getContext();
2602 auto &DL
= F
.getParent()->getDataLayout();
2603 bool Changed
= false;
2605 for (auto *Callsite
: Intrinsics
)
2606 switch (Callsite
->getIntrinsicID()) {
2607 case Intrinsic::experimental_gc_get_pointer_base
: {
2610 findBasePointer(Callsite
->getOperand(0), DVCache
, KnownBases
);
2611 assert(!DVCache
.count(Callsite
));
2612 Callsite
->replaceAllUsesWith(Base
);
2613 if (!Base
->hasName())
2614 Base
->takeName(Callsite
);
2615 Callsite
->eraseFromParent();
2618 case Intrinsic::experimental_gc_get_pointer_offset
: {
2620 Value
*Derived
= Callsite
->getOperand(0);
2621 Value
*Base
= findBasePointer(Derived
, DVCache
, KnownBases
);
2622 assert(!DVCache
.count(Callsite
));
2623 unsigned AddressSpace
= Derived
->getType()->getPointerAddressSpace();
2624 unsigned IntPtrSize
= DL
.getPointerSizeInBits(AddressSpace
);
2625 IRBuilder
<> Builder(Callsite
);
2627 Builder
.CreatePtrToInt(Base
, Type::getIntNTy(Context
, IntPtrSize
),
2628 suffixed_name_or(Base
, ".int", ""));
2630 Builder
.CreatePtrToInt(Derived
, Type::getIntNTy(Context
, IntPtrSize
),
2631 suffixed_name_or(Derived
, ".int", ""));
2632 Value
*Offset
= Builder
.CreateSub(DerivedInt
, BaseInt
);
2633 Callsite
->replaceAllUsesWith(Offset
);
2634 Offset
->takeName(Callsite
);
2635 Callsite
->eraseFromParent();
2639 llvm_unreachable("Unknown intrinsic");
2645 static bool insertParsePoints(Function
&F
, DominatorTree
&DT
,
2646 TargetTransformInfo
&TTI
,
2647 SmallVectorImpl
<CallBase
*> &ToUpdate
,
2648 DefiningValueMapTy
&DVCache
,
2649 IsKnownBaseMapTy
&KnownBases
) {
2650 std::unique_ptr
<GCStrategy
> GC
= findGCStrategy(F
);
2653 // Validate the input
2654 std::set
<CallBase
*> Uniqued
;
2655 Uniqued
.insert(ToUpdate
.begin(), ToUpdate
.end());
2656 assert(Uniqued
.size() == ToUpdate
.size() && "no duplicates please!");
2658 for (CallBase
*Call
: ToUpdate
)
2659 assert(Call
->getFunction() == &F
);
2662 // When inserting gc.relocates for invokes, we need to be able to insert at
2663 // the top of the successor blocks. See the comment on
2664 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2665 // may restructure the CFG.
2666 for (CallBase
*Call
: ToUpdate
) {
2667 auto *II
= dyn_cast
<InvokeInst
>(Call
);
2670 normalizeForInvokeSafepoint(II
->getNormalDest(), II
->getParent(), DT
);
2671 normalizeForInvokeSafepoint(II
->getUnwindDest(), II
->getParent(), DT
);
2674 // A list of dummy calls added to the IR to keep various values obviously
2675 // live in the IR. We'll remove all of these when done.
2676 SmallVector
<CallInst
*, 64> Holders
;
2678 // Insert a dummy call with all of the deopt operands we'll need for the
2679 // actual safepoint insertion as arguments. This ensures reference operands
2680 // in the deopt argument list are considered live through the safepoint (and
2681 // thus makes sure they get relocated.)
2682 for (CallBase
*Call
: ToUpdate
) {
2683 SmallVector
<Value
*, 64> DeoptValues
;
2685 for (Value
*Arg
: GetDeoptBundleOperands(Call
)) {
2686 assert(!isUnhandledGCPointerType(Arg
->getType(), GC
.get()) &&
2687 "support for FCA unimplemented");
2688 if (isHandledGCPointerType(Arg
->getType(), GC
.get()))
2689 DeoptValues
.push_back(Arg
);
2692 insertUseHolderAfter(Call
, DeoptValues
, Holders
);
2695 SmallVector
<PartiallyConstructedSafepointRecord
, 64> Records(ToUpdate
.size());
2697 // A) Identify all gc pointers which are statically live at the given call
2699 findLiveReferences(F
, DT
, ToUpdate
, Records
, GC
.get());
2701 /// Global mapping from live pointers to a base-defining-value.
2702 PointerToBaseTy PointerToBase
;
2704 // B) Find the base pointers for each live pointer
2705 for (size_t i
= 0; i
< Records
.size(); i
++) {
2706 PartiallyConstructedSafepointRecord
&info
= Records
[i
];
2707 findBasePointers(DT
, DVCache
, ToUpdate
[i
], info
, PointerToBase
, KnownBases
);
2709 if (PrintBasePointers
) {
2710 errs() << "Base Pairs (w/o Relocation):\n";
2711 for (auto &Pair
: PointerToBase
) {
2712 errs() << " derived ";
2713 Pair
.first
->printAsOperand(errs(), false);
2715 Pair
.second
->printAsOperand(errs(), false);
2721 // The base phi insertion logic (for any safepoint) may have inserted new
2722 // instructions which are now live at some safepoint. The simplest such
2725 // phi a <-- will be a new base_phi here
2726 // safepoint 1 <-- that needs to be live here
2730 // We insert some dummy calls after each safepoint to definitely hold live
2731 // the base pointers which were identified for that safepoint. We'll then
2732 // ask liveness for _every_ base inserted to see what is now live. Then we
2733 // remove the dummy calls.
2734 Holders
.reserve(Holders
.size() + Records
.size());
2735 for (size_t i
= 0; i
< Records
.size(); i
++) {
2736 PartiallyConstructedSafepointRecord
&Info
= Records
[i
];
2738 SmallVector
<Value
*, 128> Bases
;
2739 for (auto *Derived
: Info
.LiveSet
) {
2740 assert(PointerToBase
.count(Derived
) && "Missed base for derived pointer");
2741 Bases
.push_back(PointerToBase
[Derived
]);
2744 insertUseHolderAfter(ToUpdate
[i
], Bases
, Holders
);
2747 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2748 // need to rerun liveness. We may *also* have inserted new defs, but that's
2749 // not the key issue.
2750 recomputeLiveInValues(F
, DT
, ToUpdate
, Records
, PointerToBase
, GC
.get());
2752 if (PrintBasePointers
) {
2753 errs() << "Base Pairs: (w/Relocation)\n";
2754 for (auto Pair
: PointerToBase
) {
2755 errs() << " derived ";
2756 Pair
.first
->printAsOperand(errs(), false);
2758 Pair
.second
->printAsOperand(errs(), false);
2763 // It is possible that non-constant live variables have a constant base. For
2764 // example, a GEP with a variable offset from a global. In this case we can
2765 // remove it from the liveset. We already don't add constants to the liveset
2766 // because we assume they won't move at runtime and the GC doesn't need to be
2767 // informed about them. The same reasoning applies if the base is constant.
2768 // Note that the relocation placement code relies on this filtering for
2769 // correctness as it expects the base to be in the liveset, which isn't true
2770 // if the base is constant.
2771 for (auto &Info
: Records
) {
2772 Info
.LiveSet
.remove_if([&](Value
*LiveV
) {
2773 assert(PointerToBase
.count(LiveV
) && "Missed base for derived pointer");
2774 return isa
<Constant
>(PointerToBase
[LiveV
]);
2778 for (CallInst
*CI
: Holders
)
2779 CI
->eraseFromParent();
2783 // Compute the cost of possible re-materialization of derived pointers.
2784 RematCandTy RematerizationCandidates
;
2785 findRematerializationCandidates(PointerToBase
, RematerizationCandidates
, TTI
);
2787 // In order to reduce live set of statepoint we might choose to rematerialize
2788 // some values instead of relocating them. This is purely an optimization and
2789 // does not influence correctness.
2790 // First try rematerialization at uses, then after statepoints.
2791 rematerializeLiveValuesAtUses(RematerizationCandidates
, Records
,
2793 for (size_t i
= 0; i
< Records
.size(); i
++)
2794 rematerializeLiveValues(ToUpdate
[i
], Records
[i
], PointerToBase
,
2795 RematerizationCandidates
, TTI
);
2797 // We need this to safely RAUW and delete call or invoke return values that
2798 // may themselves be live over a statepoint. For details, please see usage in
2799 // makeStatepointExplicitImpl.
2800 std::vector
<DeferredReplacement
> Replacements
;
2802 // Now run through and replace the existing statepoints with new ones with
2803 // the live variables listed. We do not yet update uses of the values being
2804 // relocated. We have references to live variables that need to
2805 // survive to the last iteration of this loop. (By construction, the
2806 // previous statepoint can not be a live variable, thus we can and remove
2807 // the old statepoint calls as we go.)
2808 for (size_t i
= 0; i
< Records
.size(); i
++)
2809 makeStatepointExplicit(DT
, ToUpdate
[i
], Records
[i
], Replacements
,
2810 PointerToBase
, GC
.get());
2812 ToUpdate
.clear(); // prevent accident use of invalid calls.
2814 for (auto &PR
: Replacements
)
2817 Replacements
.clear();
2819 for (auto &Info
: Records
) {
2820 // These live sets may contain state Value pointers, since we replaced calls
2821 // with operand bundles with calls wrapped in gc.statepoint, and some of
2822 // those calls may have been def'ing live gc pointers. Clear these out to
2823 // avoid accidentally using them.
2825 // TODO: We should create a separate data structure that does not contain
2826 // these live sets, and migrate to using that data structure from this point
2828 Info
.LiveSet
.clear();
2830 PointerToBase
.clear();
2832 // Do all the fixups of the original live variables to their relocated selves
2833 SmallVector
<Value
*, 128> Live
;
2834 for (const PartiallyConstructedSafepointRecord
&Info
: Records
) {
2835 // We can't simply save the live set from the original insertion. One of
2836 // the live values might be the result of a call which needs a safepoint.
2837 // That Value* no longer exists and we need to use the new gc_result.
2838 // Thankfully, the live set is embedded in the statepoint (and updated), so
2839 // we just grab that.
2840 llvm::append_range(Live
, Info
.StatepointToken
->gc_args());
2842 // Do some basic validation checking on our liveness results before
2843 // performing relocation. Relocation can and will turn mistakes in liveness
2844 // results into non-sensical code which is must harder to debug.
2845 // TODO: It would be nice to test consistency as well
2846 assert(DT
.isReachableFromEntry(Info
.StatepointToken
->getParent()) &&
2847 "statepoint must be reachable or liveness is meaningless");
2848 for (Value
*V
: Info
.StatepointToken
->gc_args()) {
2849 if (!isa
<Instruction
>(V
))
2850 // Non-instruction values trivial dominate all possible uses
2852 auto *LiveInst
= cast
<Instruction
>(V
);
2853 assert(DT
.isReachableFromEntry(LiveInst
->getParent()) &&
2854 "unreachable values should never be live");
2855 assert(DT
.dominates(LiveInst
, Info
.StatepointToken
) &&
2856 "basic SSA liveness expectation violated by liveness analysis");
2860 unique_unsorted(Live
);
2864 for (auto *Ptr
: Live
)
2865 assert(isHandledGCPointerType(Ptr
->getType(), GC
.get()) &&
2866 "must be a gc pointer type");
2869 relocationViaAlloca(F
, DT
, Live
, Records
);
2870 return !Records
.empty();
2873 // List of all parameter and return attributes which must be stripped when
2874 // lowering from the abstract machine model. Note that we list attributes
2875 // here which aren't valid as return attributes, that is okay.
2876 static AttributeMask
getParamAndReturnAttributesToRemove() {
2878 R
.addAttribute(Attribute::Dereferenceable
);
2879 R
.addAttribute(Attribute::DereferenceableOrNull
);
2880 R
.addAttribute(Attribute::ReadNone
);
2881 R
.addAttribute(Attribute::ReadOnly
);
2882 R
.addAttribute(Attribute::WriteOnly
);
2883 R
.addAttribute(Attribute::NoAlias
);
2884 R
.addAttribute(Attribute::NoFree
);
2888 static void stripNonValidAttributesFromPrototype(Function
&F
) {
2889 LLVMContext
&Ctx
= F
.getContext();
2891 // Intrinsics are very delicate. Lowering sometimes depends the presence
2892 // of certain attributes for correctness, but we may have also inferred
2893 // additional ones in the abstract machine model which need stripped. This
2894 // assumes that the attributes defined in Intrinsic.td are conservatively
2895 // correct for both physical and abstract model.
2896 if (Intrinsic::ID id
= F
.getIntrinsicID()) {
2897 F
.setAttributes(Intrinsic::getAttributes(Ctx
, id
));
2901 AttributeMask R
= getParamAndReturnAttributesToRemove();
2902 for (Argument
&A
: F
.args())
2903 if (isa
<PointerType
>(A
.getType()))
2904 F
.removeParamAttrs(A
.getArgNo(), R
);
2906 if (isa
<PointerType
>(F
.getReturnType()))
2907 F
.removeRetAttrs(R
);
2909 for (auto Attr
: FnAttrsToStrip
)
2910 F
.removeFnAttr(Attr
);
2913 /// Certain metadata on instructions are invalid after running RS4GC.
2914 /// Optimizations that run after RS4GC can incorrectly use this metadata to
2915 /// optimize functions. We drop such metadata on the instruction.
2916 static void stripInvalidMetadataFromInstruction(Instruction
&I
) {
2917 if (!isa
<LoadInst
>(I
) && !isa
<StoreInst
>(I
))
2919 // These are the attributes that are still valid on loads and stores after
2921 // The metadata implying dereferenceability and noalias are (conservatively)
2922 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2923 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2924 // touch the entire heap including noalias objects. Note: The reasoning is
2925 // same as stripping the dereferenceability and noalias attributes that are
2926 // analogous to the metadata counterparts.
2927 // We also drop the invariant.load metadata on the load because that metadata
2928 // implies the address operand to the load points to memory that is never
2929 // changed once it became dereferenceable. This is no longer true after RS4GC.
2930 // Similar reasoning applies to invariant.group metadata, which applies to
2931 // loads within a group.
2932 unsigned ValidMetadataAfterRS4GC
[] = {LLVMContext::MD_tbaa
,
2933 LLVMContext::MD_range
,
2934 LLVMContext::MD_alias_scope
,
2935 LLVMContext::MD_nontemporal
,
2936 LLVMContext::MD_nonnull
,
2937 LLVMContext::MD_align
,
2938 LLVMContext::MD_type
};
2940 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2941 I
.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC
);
2944 static void stripNonValidDataFromBody(Function
&F
) {
2948 LLVMContext
&Ctx
= F
.getContext();
2949 MDBuilder
Builder(Ctx
);
2951 // Set of invariantstart instructions that we need to remove.
2952 // Use this to avoid invalidating the instruction iterator.
2953 SmallVector
<IntrinsicInst
*, 12> InvariantStartInstructions
;
2955 for (Instruction
&I
: instructions(F
)) {
2956 // invariant.start on memory location implies that the referenced memory
2957 // location is constant and unchanging. This is no longer true after
2958 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2959 // which frees the entire heap and the presence of invariant.start allows
2960 // the optimizer to sink the load of a memory location past a statepoint,
2961 // which is incorrect.
2962 if (auto *II
= dyn_cast
<IntrinsicInst
>(&I
))
2963 if (II
->getIntrinsicID() == Intrinsic::invariant_start
) {
2964 InvariantStartInstructions
.push_back(II
);
2968 if (MDNode
*Tag
= I
.getMetadata(LLVMContext::MD_tbaa
)) {
2969 MDNode
*MutableTBAA
= Builder
.createMutableTBAAAccessTag(Tag
);
2970 I
.setMetadata(LLVMContext::MD_tbaa
, MutableTBAA
);
2973 stripInvalidMetadataFromInstruction(I
);
2975 AttributeMask R
= getParamAndReturnAttributesToRemove();
2976 if (auto *Call
= dyn_cast
<CallBase
>(&I
)) {
2977 for (int i
= 0, e
= Call
->arg_size(); i
!= e
; i
++)
2978 if (isa
<PointerType
>(Call
->getArgOperand(i
)->getType()))
2979 Call
->removeParamAttrs(i
, R
);
2980 if (isa
<PointerType
>(Call
->getType()))
2981 Call
->removeRetAttrs(R
);
2985 // Delete the invariant.start instructions and RAUW poison.
2986 for (auto *II
: InvariantStartInstructions
) {
2987 II
->replaceAllUsesWith(PoisonValue::get(II
->getType()));
2988 II
->eraseFromParent();
2992 /// Looks up the GC strategy for a given function, returning null if the
2993 /// function doesn't have a GC tag. The strategy is stored in the cache.
2994 static std::unique_ptr
<GCStrategy
> findGCStrategy(Function
&F
) {
2998 return getGCStrategy(F
.getGC());
3001 /// Returns true if this function should be rewritten by this pass. The main
3002 /// point of this function is as an extension point for custom logic.
3003 static bool shouldRewriteStatepointsIn(Function
&F
) {
3007 std::unique_ptr
<GCStrategy
> Strategy
= findGCStrategy(F
);
3009 assert(Strategy
&& "GC strategy is required by function, but was not found");
3011 return Strategy
->useRS4GC();
3014 static void stripNonValidData(Module
&M
) {
3016 assert(llvm::any_of(M
, shouldRewriteStatepointsIn
) && "precondition!");
3019 for (Function
&F
: M
)
3020 stripNonValidAttributesFromPrototype(F
);
3022 for (Function
&F
: M
)
3023 stripNonValidDataFromBody(F
);
3026 bool RewriteStatepointsForGC::runOnFunction(Function
&F
, DominatorTree
&DT
,
3027 TargetTransformInfo
&TTI
,
3028 const TargetLibraryInfo
&TLI
) {
3029 assert(!F
.isDeclaration() && !F
.empty() &&
3030 "need function body to rewrite statepoints in");
3031 assert(shouldRewriteStatepointsIn(F
) && "mismatch in rewrite decision");
3033 auto NeedsRewrite
= [&TLI
](Instruction
&I
) {
3034 if (const auto *Call
= dyn_cast
<CallBase
>(&I
)) {
3035 if (isa
<GCStatepointInst
>(Call
))
3037 if (callsGCLeafFunction(Call
, TLI
))
3040 // Normally it's up to the frontend to make sure that non-leaf calls also
3041 // have proper deopt state if it is required. We make an exception for
3042 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
3043 // these are non-leaf by default. They might be generated by the optimizer
3044 // which doesn't know how to produce a proper deopt state. So if we see a
3045 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
3046 // copy and don't produce a statepoint.
3047 if (!AllowStatepointWithNoDeoptInfo
&&
3048 !Call
->getOperandBundle(LLVMContext::OB_deopt
)) {
3049 assert((isa
<AtomicMemCpyInst
>(Call
) || isa
<AtomicMemMoveInst
>(Call
)) &&
3050 "Don't expect any other calls here!");
3058 // Delete any unreachable statepoints so that we don't have unrewritten
3059 // statepoints surviving this pass. This makes testing easier and the
3060 // resulting IR less confusing to human readers.
3061 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Lazy
);
3062 bool MadeChange
= removeUnreachableBlocks(F
, &DTU
);
3063 // Flush the Dominator Tree.
3066 // Gather all the statepoints which need rewritten. Be careful to only
3067 // consider those in reachable code since we need to ask dominance queries
3068 // when rewriting. We'll delete the unreachable ones in a moment.
3069 SmallVector
<CallBase
*, 64> ParsePointNeeded
;
3070 SmallVector
<CallInst
*, 64> Intrinsics
;
3071 for (Instruction
&I
: instructions(F
)) {
3072 // TODO: only the ones with the flag set!
3073 if (NeedsRewrite(I
)) {
3074 // NOTE removeUnreachableBlocks() is stronger than
3075 // DominatorTree::isReachableFromEntry(). In other words
3076 // removeUnreachableBlocks can remove some blocks for which
3077 // isReachableFromEntry() returns true.
3078 assert(DT
.isReachableFromEntry(I
.getParent()) &&
3079 "no unreachable blocks expected");
3080 ParsePointNeeded
.push_back(cast
<CallBase
>(&I
));
3082 if (auto *CI
= dyn_cast
<CallInst
>(&I
))
3083 if (CI
->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base
||
3084 CI
->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset
)
3085 Intrinsics
.emplace_back(CI
);
3088 // Return early if no work to do.
3089 if (ParsePointNeeded
.empty() && Intrinsics
.empty())
3092 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
3093 // These are created by LCSSA. They have the effect of increasing the size
3094 // of liveness sets for no good reason. It may be harder to do this post
3095 // insertion since relocations and base phis can confuse things.
3096 for (BasicBlock
&BB
: F
)
3097 if (BB
.getUniquePredecessor())
3098 MadeChange
|= FoldSingleEntryPHINodes(&BB
);
3100 // Before we start introducing relocations, we want to tweak the IR a bit to
3101 // avoid unfortunate code generation effects. The main example is that we
3102 // want to try to make sure the comparison feeding a branch is after any
3103 // safepoints. Otherwise, we end up with a comparison of pre-relocation
3104 // values feeding a branch after relocation. This is semantically correct,
3105 // but results in extra register pressure since both the pre-relocation and
3106 // post-relocation copies must be available in registers. For code without
3107 // relocations this is handled elsewhere, but teaching the scheduler to
3108 // reverse the transform we're about to do would be slightly complex.
3109 // Note: This may extend the live range of the inputs to the icmp and thus
3110 // increase the liveset of any statepoint we move over. This is profitable
3111 // as long as all statepoints are in rare blocks. If we had in-register
3112 // lowering for live values this would be a much safer transform.
3113 auto getConditionInst
= [](Instruction
*TI
) -> Instruction
* {
3114 if (auto *BI
= dyn_cast
<BranchInst
>(TI
))
3115 if (BI
->isConditional())
3116 return dyn_cast
<Instruction
>(BI
->getCondition());
3117 // TODO: Extend this to handle switches
3120 for (BasicBlock
&BB
: F
) {
3121 Instruction
*TI
= BB
.getTerminator();
3122 if (auto *Cond
= getConditionInst(TI
))
3123 // TODO: Handle more than just ICmps here. We should be able to move
3124 // most instructions without side effects or memory access.
3125 if (isa
<ICmpInst
>(Cond
) && Cond
->hasOneUse()) {
3127 Cond
->moveBefore(TI
);
3131 // Nasty workaround - The base computation code in the main algorithm doesn't
3132 // consider the fact that a GEP can be used to convert a scalar to a vector.
3133 // The right fix for this is to integrate GEPs into the base rewriting
3134 // algorithm properly, this is just a short term workaround to prevent
3135 // crashes by canonicalizing such GEPs into fully vector GEPs.
3136 for (Instruction
&I
: instructions(F
)) {
3137 if (!isa
<GetElementPtrInst
>(I
))
3141 for (unsigned i
= 0; i
< I
.getNumOperands(); i
++)
3142 if (auto *OpndVTy
= dyn_cast
<VectorType
>(I
.getOperand(i
)->getType())) {
3144 VF
== cast
<FixedVectorType
>(OpndVTy
)->getNumElements());
3145 VF
= cast
<FixedVectorType
>(OpndVTy
)->getNumElements();
3148 // It's the vector to scalar traversal through the pointer operand which
3149 // confuses base pointer rewriting, so limit ourselves to that case.
3150 if (!I
.getOperand(0)->getType()->isVectorTy() && VF
!= 0) {
3152 auto *Splat
= B
.CreateVectorSplat(VF
, I
.getOperand(0));
3153 I
.setOperand(0, Splat
);
3158 // Cache the 'defining value' relation used in the computation and
3159 // insertion of base phis and selects. This ensures that we don't insert
3160 // large numbers of duplicate base_phis. Use one cache for both
3161 // inlineGetBaseAndOffset() and insertParsePoints().
3162 DefiningValueMapTy DVCache
;
3164 // Mapping between a base values and a flag indicating whether it's a known
3166 IsKnownBaseMapTy KnownBases
;
3168 if (!Intrinsics
.empty())
3169 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
3171 MadeChange
|= inlineGetBaseAndOffset(F
, Intrinsics
, DVCache
, KnownBases
);
3173 if (!ParsePointNeeded
.empty())
3175 insertParsePoints(F
, DT
, TTI
, ParsePointNeeded
, DVCache
, KnownBases
);
3180 // liveness computation via standard dataflow
3181 // -------------------------------------------------------------------
3183 // TODO: Consider using bitvectors for liveness, the set of potentially
3184 // interesting values should be small and easy to pre-compute.
3186 /// Compute the live-in set for the location rbegin starting from
3187 /// the live-out set of the basic block
3188 static void computeLiveInValues(BasicBlock::reverse_iterator Begin
,
3189 BasicBlock::reverse_iterator End
,
3190 SetVector
<Value
*> &LiveTmp
, GCStrategy
*GC
) {
3191 for (auto &I
: make_range(Begin
, End
)) {
3192 // KILL/Def - Remove this definition from LiveIn
3195 // Don't consider *uses* in PHI nodes, we handle their contribution to
3196 // predecessor blocks when we seed the LiveOut sets
3197 if (isa
<PHINode
>(I
))
3200 // USE - Add to the LiveIn set for this instruction
3201 for (Value
*V
: I
.operands()) {
3202 assert(!isUnhandledGCPointerType(V
->getType(), GC
) &&
3203 "support for FCA unimplemented");
3204 if (isHandledGCPointerType(V
->getType(), GC
) && !isa
<Constant
>(V
)) {
3205 // The choice to exclude all things constant here is slightly subtle.
3206 // There are two independent reasons:
3207 // - We assume that things which are constant (from LLVM's definition)
3208 // do not move at runtime. For example, the address of a global
3209 // variable is fixed, even though it's contents may not be.
3210 // - Second, we can't disallow arbitrary inttoptr constants even
3211 // if the language frontend does. Optimization passes are free to
3212 // locally exploit facts without respect to global reachability. This
3213 // can create sections of code which are dynamically unreachable and
3214 // contain just about anything. (see constants.ll in tests)
3221 static void computeLiveOutSeed(BasicBlock
*BB
, SetVector
<Value
*> &LiveTmp
,
3223 for (BasicBlock
*Succ
: successors(BB
)) {
3224 for (auto &I
: *Succ
) {
3225 PHINode
*PN
= dyn_cast
<PHINode
>(&I
);
3229 Value
*V
= PN
->getIncomingValueForBlock(BB
);
3230 assert(!isUnhandledGCPointerType(V
->getType(), GC
) &&
3231 "support for FCA unimplemented");
3232 if (isHandledGCPointerType(V
->getType(), GC
) && !isa
<Constant
>(V
))
3238 static SetVector
<Value
*> computeKillSet(BasicBlock
*BB
, GCStrategy
*GC
) {
3239 SetVector
<Value
*> KillSet
;
3240 for (Instruction
&I
: *BB
)
3241 if (isHandledGCPointerType(I
.getType(), GC
))
3247 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3248 /// validation check for the liveness computation.
3249 static void checkBasicSSA(DominatorTree
&DT
, SetVector
<Value
*> &Live
,
3250 Instruction
*TI
, bool TermOkay
= false) {
3251 for (Value
*V
: Live
) {
3252 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
3253 // The terminator can be a member of the LiveOut set. LLVM's definition
3254 // of instruction dominance states that V does not dominate itself. As
3255 // such, we need to special case this to allow it.
3256 if (TermOkay
&& TI
== I
)
3258 assert(DT
.dominates(I
, TI
) &&
3259 "basic SSA liveness expectation violated by liveness analysis");
3264 /// Check that all the liveness sets used during the computation of liveness
3265 /// obey basic SSA properties. This is useful for finding cases where we miss
3267 static void checkBasicSSA(DominatorTree
&DT
, GCPtrLivenessData
&Data
,
3269 checkBasicSSA(DT
, Data
.LiveSet
[&BB
], BB
.getTerminator());
3270 checkBasicSSA(DT
, Data
.LiveOut
[&BB
], BB
.getTerminator(), true);
3271 checkBasicSSA(DT
, Data
.LiveIn
[&BB
], BB
.getTerminator());
3275 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
3276 GCPtrLivenessData
&Data
, GCStrategy
*GC
) {
3277 SmallSetVector
<BasicBlock
*, 32> Worklist
;
3279 // Seed the liveness for each individual block
3280 for (BasicBlock
&BB
: F
) {
3281 Data
.KillSet
[&BB
] = computeKillSet(&BB
, GC
);
3282 Data
.LiveSet
[&BB
].clear();
3283 computeLiveInValues(BB
.rbegin(), BB
.rend(), Data
.LiveSet
[&BB
], GC
);
3286 for (Value
*Kill
: Data
.KillSet
[&BB
])
3287 assert(!Data
.LiveSet
[&BB
].count(Kill
) && "live set contains kill");
3290 Data
.LiveOut
[&BB
] = SetVector
<Value
*>();
3291 computeLiveOutSeed(&BB
, Data
.LiveOut
[&BB
], GC
);
3292 Data
.LiveIn
[&BB
] = Data
.LiveSet
[&BB
];
3293 Data
.LiveIn
[&BB
].set_union(Data
.LiveOut
[&BB
]);
3294 Data
.LiveIn
[&BB
].set_subtract(Data
.KillSet
[&BB
]);
3295 if (!Data
.LiveIn
[&BB
].empty())
3296 Worklist
.insert(pred_begin(&BB
), pred_end(&BB
));
3299 // Propagate that liveness until stable
3300 while (!Worklist
.empty()) {
3301 BasicBlock
*BB
= Worklist
.pop_back_val();
3303 // Compute our new liveout set, then exit early if it hasn't changed despite
3304 // the contribution of our successor.
3305 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
3306 const auto OldLiveOutSize
= LiveOut
.size();
3307 for (BasicBlock
*Succ
: successors(BB
)) {
3308 assert(Data
.LiveIn
.count(Succ
));
3309 LiveOut
.set_union(Data
.LiveIn
[Succ
]);
3311 // assert OutLiveOut is a subset of LiveOut
3312 if (OldLiveOutSize
== LiveOut
.size()) {
3313 // If the sets are the same size, then we didn't actually add anything
3314 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3318 Data
.LiveOut
[BB
] = LiveOut
;
3320 // Apply the effects of this basic block
3321 SetVector
<Value
*> LiveTmp
= LiveOut
;
3322 LiveTmp
.set_union(Data
.LiveSet
[BB
]);
3323 LiveTmp
.set_subtract(Data
.KillSet
[BB
]);
3325 assert(Data
.LiveIn
.count(BB
));
3326 const SetVector
<Value
*> &OldLiveIn
= Data
.LiveIn
[BB
];
3327 // assert: OldLiveIn is a subset of LiveTmp
3328 if (OldLiveIn
.size() != LiveTmp
.size()) {
3329 Data
.LiveIn
[BB
] = LiveTmp
;
3330 Worklist
.insert(pred_begin(BB
), pred_end(BB
));
3332 } // while (!Worklist.empty())
3335 // Verify our output against SSA properties. This helps catch any
3336 // missing kills during the above iteration.
3337 for (BasicBlock
&BB
: F
)
3338 checkBasicSSA(DT
, Data
, BB
);
3342 static void findLiveSetAtInst(Instruction
*Inst
, GCPtrLivenessData
&Data
,
3343 StatepointLiveSetTy
&Out
, GCStrategy
*GC
) {
3344 BasicBlock
*BB
= Inst
->getParent();
3346 // Note: The copy is intentional and required
3347 assert(Data
.LiveOut
.count(BB
));
3348 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
3350 // We want to handle the statepoint itself oddly. It's
3351 // call result is not live (normal), nor are it's arguments
3352 // (unless they're used again later). This adjustment is
3353 // specifically what we need to relocate
3354 computeLiveInValues(BB
->rbegin(), ++Inst
->getIterator().getReverse(), LiveOut
,
3356 LiveOut
.remove(Inst
);
3357 Out
.insert(LiveOut
.begin(), LiveOut
.end());
3360 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
3362 PartiallyConstructedSafepointRecord
&Info
,
3363 PointerToBaseTy
&PointerToBase
,
3365 StatepointLiveSetTy Updated
;
3366 findLiveSetAtInst(Call
, RevisedLivenessData
, Updated
, GC
);
3368 // We may have base pointers which are now live that weren't before. We need
3369 // to update the PointerToBase structure to reflect this.
3370 for (auto *V
: Updated
)
3371 PointerToBase
.insert({ V
, V
});
3373 Info
.LiveSet
= Updated
;