1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Rewrite call/invoke instructions so as to make potential relocations
10 // performed by the garbage collector explicit in the IR.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Sequence.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/iterator_range.h"
27 #include "llvm/Analysis/DomTreeUpdater.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/IR/Argument.h"
31 #include "llvm/IR/AttributeMask.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GCStrategy.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/InstIterator.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Intrinsics.h"
49 #include "llvm/IR/LLVMContext.h"
50 #include "llvm/IR/MDBuilder.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/Module.h"
53 #include "llvm/IR/Statepoint.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/IR/ValueHandle.h"
58 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
65 #include "llvm/Transforms/Utils/Local.h"
66 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
77 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
81 // Print the liveset found at the insert location
82 static cl::opt
<bool> PrintLiveSet("spp-print-liveset", cl::Hidden
,
84 static cl::opt
<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden
,
87 // Print out the base pointers for debugging
88 static cl::opt
<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden
,
91 // Cost threshold measuring when it is profitable to rematerialize value instead
93 static cl::opt
<unsigned>
94 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden
,
97 #ifdef EXPENSIVE_CHECKS
98 static bool ClobberNonLive
= true;
100 static bool ClobberNonLive
= false;
103 static cl::opt
<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
104 cl::location(ClobberNonLive
),
108 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
109 cl::Hidden
, cl::init(true));
111 static cl::opt
<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses",
112 cl::Hidden
, cl::init(true));
114 /// The IR fed into RewriteStatepointsForGC may have had attributes and
115 /// metadata implying dereferenceability that are no longer valid/correct after
116 /// RewriteStatepointsForGC has run. This is because semantically, after
117 /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
118 /// heap. stripNonValidData (conservatively) restores
119 /// correctness by erasing all attributes in the module that externally imply
120 /// dereferenceability. Similar reasoning also applies to the noalias
121 /// attributes and metadata. gc.statepoint can touch the entire heap including
123 /// Apart from attributes and metadata, we also remove instructions that imply
124 /// constant physical memory: llvm.invariant.start.
125 static void stripNonValidData(Module
&M
);
127 // Find the GC strategy for a function, or null if it doesn't have one.
128 static std::unique_ptr
<GCStrategy
> findGCStrategy(Function
&F
);
130 static bool shouldRewriteStatepointsIn(Function
&F
);
132 PreservedAnalyses
RewriteStatepointsForGC::run(Module
&M
,
133 ModuleAnalysisManager
&AM
) {
134 bool Changed
= false;
135 auto &FAM
= AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
136 for (Function
&F
: M
) {
137 // Nothing to do for declarations.
138 if (F
.isDeclaration() || F
.empty())
141 // Policy choice says not to rewrite - the most common reason is that we're
142 // compiling code without a GCStrategy.
143 if (!shouldRewriteStatepointsIn(F
))
146 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
147 auto &TTI
= FAM
.getResult
<TargetIRAnalysis
>(F
);
148 auto &TLI
= FAM
.getResult
<TargetLibraryAnalysis
>(F
);
149 Changed
|= runOnFunction(F
, DT
, TTI
, TLI
);
152 return PreservedAnalyses::all();
154 // stripNonValidData asserts that shouldRewriteStatepointsIn
155 // returns true for at least one function in the module. Since at least
156 // one function changed, we know that the precondition is satisfied.
157 stripNonValidData(M
);
159 PreservedAnalyses PA
;
160 PA
.preserve
<TargetIRAnalysis
>();
161 PA
.preserve
<TargetLibraryAnalysis
>();
167 struct GCPtrLivenessData
{
168 /// Values defined in this block.
169 MapVector
<BasicBlock
*, SetVector
<Value
*>> KillSet
;
171 /// Values used in this block (and thus live); does not included values
172 /// killed within this block.
173 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveSet
;
175 /// Values live into this basic block (i.e. used by any
176 /// instruction in this basic block or ones reachable from here)
177 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveIn
;
179 /// Values live out of this basic block (i.e. live into
180 /// any successor block)
181 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveOut
;
184 // The type of the internal cache used inside the findBasePointers family
185 // of functions. From the callers perspective, this is an opaque type and
186 // should not be inspected.
188 // In the actual implementation this caches two relations:
189 // - The base relation itself (i.e. this pointer is based on that one)
190 // - The base defining value relation (i.e. before base_phi insertion)
191 // Generally, after the execution of a full findBasePointer call, only the
192 // base relation will remain. Internally, we add a mixture of the two
193 // types, then update all the second type to the first type
194 using DefiningValueMapTy
= MapVector
<Value
*, Value
*>;
195 using IsKnownBaseMapTy
= MapVector
<Value
*, bool>;
196 using PointerToBaseTy
= MapVector
<Value
*, Value
*>;
197 using StatepointLiveSetTy
= SetVector
<Value
*>;
198 using RematerializedValueMapTy
=
199 MapVector
<AssertingVH
<Instruction
>, AssertingVH
<Value
>>;
201 struct PartiallyConstructedSafepointRecord
{
202 /// The set of values known to be live across this safepoint
203 StatepointLiveSetTy LiveSet
;
205 /// The *new* gc.statepoint instruction itself. This produces the token
206 /// that normal path gc.relocates and the gc.result are tied to.
207 GCStatepointInst
*StatepointToken
;
209 /// Instruction to which exceptional gc relocates are attached
210 /// Makes it easier to iterate through them during relocationViaAlloca.
211 Instruction
*UnwindToken
;
213 /// Record live values we are rematerialized instead of relocating.
214 /// They are not included into 'LiveSet' field.
215 /// Maps rematerialized copy to it's original value.
216 RematerializedValueMapTy RematerializedValues
;
219 struct RematerizlizationCandidateRecord
{
220 // Chain from derived pointer to base.
221 SmallVector
<Instruction
*, 3> ChainToBase
;
225 InstructionCost Cost
;
227 using RematCandTy
= MapVector
<Value
*, RematerizlizationCandidateRecord
>;
229 } // end anonymous namespace
231 static ArrayRef
<Use
> GetDeoptBundleOperands(const CallBase
*Call
) {
232 std::optional
<OperandBundleUse
> DeoptBundle
=
233 Call
->getOperandBundle(LLVMContext::OB_deopt
);
236 assert(AllowStatepointWithNoDeoptInfo
&&
237 "Found non-leaf call without deopt info!");
241 return DeoptBundle
->Inputs
;
244 /// Compute the live-in set for every basic block in the function
245 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
246 GCPtrLivenessData
&Data
, GCStrategy
*GC
);
248 /// Given results from the dataflow liveness computation, find the set of live
249 /// Values at a particular instruction.
250 static void findLiveSetAtInst(Instruction
*inst
, GCPtrLivenessData
&Data
,
251 StatepointLiveSetTy
&out
, GCStrategy
*GC
);
253 static bool isGCPointerType(Type
*T
, GCStrategy
*GC
) {
254 assert(GC
&& "GC Strategy for isGCPointerType cannot be null");
256 if (!isa
<PointerType
>(T
))
259 // conservative - same as StatepointLowering
260 return GC
->isGCManagedPointer(T
).value_or(true);
263 // Return true if this type is one which a) is a gc pointer or contains a GC
264 // pointer and b) is of a type this code expects to encounter as a live value.
265 // (The insertion code will assert that a type which matches (a) and not (b)
266 // is not encountered.)
267 static bool isHandledGCPointerType(Type
*T
, GCStrategy
*GC
) {
268 // We fully support gc pointers
269 if (isGCPointerType(T
, GC
))
271 // We partially support vectors of gc pointers. The code will assert if it
272 // can't handle something.
273 if (auto VT
= dyn_cast
<VectorType
>(T
))
274 if (isGCPointerType(VT
->getElementType(), GC
))
280 /// Returns true if this type contains a gc pointer whether we know how to
281 /// handle that type or not.
282 static bool containsGCPtrType(Type
*Ty
, GCStrategy
*GC
) {
283 if (isGCPointerType(Ty
, GC
))
285 if (VectorType
*VT
= dyn_cast
<VectorType
>(Ty
))
286 return isGCPointerType(VT
->getScalarType(), GC
);
287 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(Ty
))
288 return containsGCPtrType(AT
->getElementType(), GC
);
289 if (StructType
*ST
= dyn_cast
<StructType
>(Ty
))
290 return llvm::any_of(ST
->elements(),
291 [GC
](Type
*Ty
) { return containsGCPtrType(Ty
, GC
); });
295 // Returns true if this is a type which a) is a gc pointer or contains a GC
296 // pointer and b) is of a type which the code doesn't expect (i.e. first class
297 // aggregates). Used to trip assertions.
298 static bool isUnhandledGCPointerType(Type
*Ty
, GCStrategy
*GC
) {
299 return containsGCPtrType(Ty
, GC
) && !isHandledGCPointerType(Ty
, GC
);
303 // Return the name of the value suffixed with the provided value, or if the
304 // value didn't have a name, the default value specified.
305 static std::string
suffixed_name_or(Value
*V
, StringRef Suffix
,
306 StringRef DefaultName
) {
307 return V
->hasName() ? (V
->getName() + Suffix
).str() : DefaultName
.str();
310 // Conservatively identifies any definitions which might be live at the
311 // given instruction. The analysis is performed immediately before the
312 // given instruction. Values defined by that instruction are not considered
313 // live. Values used by that instruction are considered live.
314 static void analyzeParsePointLiveness(
315 DominatorTree
&DT
, GCPtrLivenessData
&OriginalLivenessData
, CallBase
*Call
,
316 PartiallyConstructedSafepointRecord
&Result
, GCStrategy
*GC
) {
317 StatepointLiveSetTy LiveSet
;
318 findLiveSetAtInst(Call
, OriginalLivenessData
, LiveSet
, GC
);
321 dbgs() << "Live Variables:\n";
322 for (Value
*V
: LiveSet
)
323 dbgs() << " " << V
->getName() << " " << *V
<< "\n";
325 if (PrintLiveSetSize
) {
326 dbgs() << "Safepoint For: " << Call
->getCalledOperand()->getName() << "\n";
327 dbgs() << "Number live values: " << LiveSet
.size() << "\n";
329 Result
.LiveSet
= LiveSet
;
332 /// Returns true if V is a known base.
333 static bool isKnownBase(Value
*V
, const IsKnownBaseMapTy
&KnownBases
);
335 /// Caches the IsKnownBase flag for a value and asserts that it wasn't present
336 /// in the cache before.
337 static void setKnownBase(Value
*V
, bool IsKnownBase
,
338 IsKnownBaseMapTy
&KnownBases
);
340 static Value
*findBaseDefiningValue(Value
*I
, DefiningValueMapTy
&Cache
,
341 IsKnownBaseMapTy
&KnownBases
);
343 /// Return a base defining value for the 'Index' element of the given vector
344 /// instruction 'I'. If Index is null, returns a BDV for the entire vector
345 /// 'I'. As an optimization, this method will try to determine when the
346 /// element is known to already be a base pointer. If this can be established,
347 /// the second value in the returned pair will be true. Note that either a
348 /// vector or a pointer typed value can be returned. For the former, the
349 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
350 /// If the later, the return pointer is a BDV (or possibly a base) for the
351 /// particular element in 'I'.
352 static Value
*findBaseDefiningValueOfVector(Value
*I
, DefiningValueMapTy
&Cache
,
353 IsKnownBaseMapTy
&KnownBases
) {
354 // Each case parallels findBaseDefiningValue below, see that code for
355 // detailed motivation.
357 auto Cached
= Cache
.find(I
);
358 if (Cached
!= Cache
.end())
359 return Cached
->second
;
361 if (isa
<Argument
>(I
)) {
362 // An incoming argument to the function is a base pointer
364 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
368 if (isa
<Constant
>(I
)) {
369 // Base of constant vector consists only of constant null pointers.
370 // For reasoning see similar case inside 'findBaseDefiningValue' function.
371 auto *CAZ
= ConstantAggregateZero::get(I
->getType());
373 setKnownBase(CAZ
, /* IsKnownBase */true, KnownBases
);
377 if (isa
<LoadInst
>(I
)) {
379 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
383 if (isa
<InsertElementInst
>(I
)) {
384 // We don't know whether this vector contains entirely base pointers or
385 // not. To be conservatively correct, we treat it as a BDV and will
386 // duplicate code as needed to construct a parallel vector of bases.
388 setKnownBase(I
, /* IsKnownBase */false, KnownBases
);
392 if (isa
<ShuffleVectorInst
>(I
)) {
393 // We don't know whether this vector contains entirely base pointers or
394 // not. To be conservatively correct, we treat it as a BDV and will
395 // duplicate code as needed to construct a parallel vector of bases.
396 // TODO: There a number of local optimizations which could be applied here
397 // for particular sufflevector patterns.
399 setKnownBase(I
, /* IsKnownBase */false, KnownBases
);
403 // The behavior of getelementptr instructions is the same for vector and
404 // non-vector data types.
405 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(I
)) {
407 findBaseDefiningValue(GEP
->getPointerOperand(), Cache
, KnownBases
);
412 // The behavior of freeze instructions is the same for vector and
413 // non-vector data types.
414 if (auto *Freeze
= dyn_cast
<FreezeInst
>(I
)) {
415 auto *BDV
= findBaseDefiningValue(Freeze
->getOperand(0), Cache
, KnownBases
);
420 // If the pointer comes through a bitcast of a vector of pointers to
421 // a vector of another type of pointer, then look through the bitcast
422 if (auto *BC
= dyn_cast
<BitCastInst
>(I
)) {
423 auto *BDV
= findBaseDefiningValue(BC
->getOperand(0), Cache
, KnownBases
);
428 // We assume that functions in the source language only return base
429 // pointers. This should probably be generalized via attributes to support
430 // both source language and internal functions.
431 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
)) {
433 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
437 // A PHI or Select is a base defining value. The outer findBasePointer
438 // algorithm is responsible for constructing a base value for this BDV.
439 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
440 "unknown vector instruction - no base found for vector element");
442 setKnownBase(I
, /* IsKnownBase */false, KnownBases
);
446 /// Helper function for findBasePointer - Will return a value which either a)
447 /// defines the base pointer for the input, b) blocks the simple search
448 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
449 /// from pointer to vector type or back.
450 static Value
*findBaseDefiningValue(Value
*I
, DefiningValueMapTy
&Cache
,
451 IsKnownBaseMapTy
&KnownBases
) {
452 assert(I
->getType()->isPtrOrPtrVectorTy() &&
453 "Illegal to ask for the base pointer of a non-pointer type");
454 auto Cached
= Cache
.find(I
);
455 if (Cached
!= Cache
.end())
456 return Cached
->second
;
458 if (I
->getType()->isVectorTy())
459 return findBaseDefiningValueOfVector(I
, Cache
, KnownBases
);
461 if (isa
<Argument
>(I
)) {
462 // An incoming argument to the function is a base pointer
463 // We should have never reached here if this argument isn't an gc value
465 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
469 if (isa
<Constant
>(I
)) {
470 // We assume that objects with a constant base (e.g. a global) can't move
471 // and don't need to be reported to the collector because they are always
472 // live. Besides global references, all kinds of constants (e.g. undef,
473 // constant expressions, null pointers) can be introduced by the inliner or
474 // the optimizer, especially on dynamically dead paths.
475 // Here we treat all of them as having single null base. By doing this we
476 // trying to avoid problems reporting various conflicts in a form of
477 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
478 // See constant.ll file for relevant test cases.
480 auto *CPN
= ConstantPointerNull::get(cast
<PointerType
>(I
->getType()));
482 setKnownBase(CPN
, /* IsKnownBase */true, KnownBases
);
486 // inttoptrs in an integral address space are currently ill-defined. We
487 // treat them as defining base pointers here for consistency with the
488 // constant rule above and because we don't really have a better semantic
489 // to give them. Note that the optimizer is always free to insert undefined
490 // behavior on dynamically dead paths as well.
491 if (isa
<IntToPtrInst
>(I
)) {
493 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
497 if (CastInst
*CI
= dyn_cast
<CastInst
>(I
)) {
498 Value
*Def
= CI
->stripPointerCasts();
499 // If stripping pointer casts changes the address space there is an
500 // addrspacecast in between.
501 assert(cast
<PointerType
>(Def
->getType())->getAddressSpace() ==
502 cast
<PointerType
>(CI
->getType())->getAddressSpace() &&
503 "unsupported addrspacecast");
504 // If we find a cast instruction here, it means we've found a cast which is
505 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
506 // handle int->ptr conversion.
507 assert(!isa
<CastInst
>(Def
) && "shouldn't find another cast here");
508 auto *BDV
= findBaseDefiningValue(Def
, Cache
, KnownBases
);
513 if (isa
<LoadInst
>(I
)) {
514 // The value loaded is an gc base itself
516 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
520 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(I
)) {
521 // The base of this GEP is the base
523 findBaseDefiningValue(GEP
->getPointerOperand(), Cache
, KnownBases
);
528 if (auto *Freeze
= dyn_cast
<FreezeInst
>(I
)) {
529 auto *BDV
= findBaseDefiningValue(Freeze
->getOperand(0), Cache
, KnownBases
);
534 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
535 switch (II
->getIntrinsicID()) {
537 // fall through to general call handling
539 case Intrinsic::experimental_gc_statepoint
:
540 llvm_unreachable("statepoints don't produce pointers");
541 case Intrinsic::experimental_gc_relocate
:
542 // Rerunning safepoint insertion after safepoints are already
543 // inserted is not supported. It could probably be made to work,
544 // but why are you doing this? There's no good reason.
545 llvm_unreachable("repeat safepoint insertion is not supported");
546 case Intrinsic::gcroot
:
547 // Currently, this mechanism hasn't been extended to work with gcroot.
548 // There's no reason it couldn't be, but I haven't thought about the
549 // implications much.
551 "interaction with the gcroot mechanism is not supported");
552 case Intrinsic::experimental_gc_get_pointer_base
:
553 auto *BDV
= findBaseDefiningValue(II
->getOperand(0), Cache
, KnownBases
);
558 // We assume that functions in the source language only return base
559 // pointers. This should probably be generalized via attributes to support
560 // both source language and internal functions.
561 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
)) {
563 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
567 // TODO: I have absolutely no idea how to implement this part yet. It's not
568 // necessarily hard, I just haven't really looked at it yet.
569 assert(!isa
<LandingPadInst
>(I
) && "Landing Pad is unimplemented");
571 if (isa
<AtomicCmpXchgInst
>(I
)) {
572 // A CAS is effectively a atomic store and load combined under a
573 // predicate. From the perspective of base pointers, we just treat it
576 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
580 if (isa
<AtomicRMWInst
>(I
)) {
581 assert(cast
<AtomicRMWInst
>(I
)->getOperation() == AtomicRMWInst::Xchg
&&
582 "Only Xchg is allowed for pointer values");
583 // A RMW Xchg is a combined atomic load and store, so we can treat the
584 // loaded value as a base pointer.
586 setKnownBase(I
, /* IsKnownBase */ true, KnownBases
);
590 // The aggregate ops. Aggregates can either be in the heap or on the
591 // stack, but in either case, this is simply a field load. As a result,
592 // this is a defining definition of the base just like a load is.
593 if (isa
<ExtractValueInst
>(I
)) {
595 setKnownBase(I
, /* IsKnownBase */true, KnownBases
);
599 // We should never see an insert vector since that would require we be
600 // tracing back a struct value not a pointer value.
601 assert(!isa
<InsertValueInst
>(I
) &&
602 "Base pointer for a struct is meaningless");
604 // This value might have been generated by findBasePointer() called when
605 // substituting gc.get.pointer.base() intrinsic.
607 isa
<Instruction
>(I
) && cast
<Instruction
>(I
)->getMetadata("is_base_value");
608 setKnownBase(I
, /* IsKnownBase */IsKnownBase
, KnownBases
);
611 // An extractelement produces a base result exactly when it's input does.
612 // We may need to insert a parallel instruction to extract the appropriate
613 // element out of the base vector corresponding to the input. Given this,
614 // it's analogous to the phi and select case even though it's not a merge.
615 if (isa
<ExtractElementInst
>(I
))
616 // Note: There a lot of obvious peephole cases here. This are deliberately
617 // handled after the main base pointer inference algorithm to make writing
618 // test cases to exercise that code easier.
621 // The last two cases here don't return a base pointer. Instead, they
622 // return a value which dynamically selects from among several base
623 // derived pointers (each with it's own base potentially). It's the job of
624 // the caller to resolve these.
625 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
626 "missing instruction case in findBaseDefiningValue");
630 /// Returns the base defining value for this value.
631 static Value
*findBaseDefiningValueCached(Value
*I
, DefiningValueMapTy
&Cache
,
632 IsKnownBaseMapTy
&KnownBases
) {
633 if (!Cache
.contains(I
)) {
634 auto *BDV
= findBaseDefiningValue(I
, Cache
, KnownBases
);
636 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I
->getName() << " -> "
637 << Cache
[I
]->getName() << ", is known base = "
638 << KnownBases
[I
] << "\n");
640 assert(Cache
[I
] != nullptr);
641 assert(KnownBases
.contains(Cache
[I
]) &&
642 "Cached value must be present in known bases map");
646 /// Return a base pointer for this value if known. Otherwise, return it's
647 /// base defining value.
648 static Value
*findBaseOrBDV(Value
*I
, DefiningValueMapTy
&Cache
,
649 IsKnownBaseMapTy
&KnownBases
) {
650 Value
*Def
= findBaseDefiningValueCached(I
, Cache
, KnownBases
);
651 auto Found
= Cache
.find(Def
);
652 if (Found
!= Cache
.end()) {
653 // Either a base-of relation, or a self reference. Caller must check.
654 return Found
->second
;
656 // Only a BDV available
661 /// This value is a base pointer that is not generated by RS4GC, i.e. it already
662 /// exists in the code.
663 static bool isOriginalBaseResult(Value
*V
) {
664 // no recursion possible
665 return !isa
<PHINode
>(V
) && !isa
<SelectInst
>(V
) &&
666 !isa
<ExtractElementInst
>(V
) && !isa
<InsertElementInst
>(V
) &&
667 !isa
<ShuffleVectorInst
>(V
);
671 static bool isKnownBase(Value
*V
, const IsKnownBaseMapTy
&KnownBases
) {
672 auto It
= KnownBases
.find(V
);
673 assert(It
!= KnownBases
.end() && "Value not present in the map");
677 static void setKnownBase(Value
*V
, bool IsKnownBase
,
678 IsKnownBaseMapTy
&KnownBases
) {
680 auto It
= KnownBases
.find(V
);
681 if (It
!= KnownBases
.end())
682 assert(It
->second
== IsKnownBase
&& "Changing already present value");
684 KnownBases
[V
] = IsKnownBase
;
687 // Returns true if First and Second values are both scalar or both vector.
688 static bool areBothVectorOrScalar(Value
*First
, Value
*Second
) {
689 return isa
<VectorType
>(First
->getType()) ==
690 isa
<VectorType
>(Second
->getType());
695 /// Models the state of a single base defining value in the findBasePointer
696 /// algorithm for determining where a new instruction is needed to propagate
697 /// the base of this BDV.
701 // Starting state of lattice
703 // Some specific base value -- does *not* mean that instruction
704 // propagates the base of the object
705 // ex: gep %arg, 16 -> %arg is the base value
707 // Need to insert a node to represent a merge.
712 llvm_unreachable("missing state in map");
715 explicit BDVState(Value
*OriginalValue
)
716 : OriginalValue(OriginalValue
) {}
717 explicit BDVState(Value
*OriginalValue
, StatusTy Status
, Value
*BaseValue
= nullptr)
718 : OriginalValue(OriginalValue
), Status(Status
), BaseValue(BaseValue
) {
719 assert(Status
!= Base
|| BaseValue
);
722 StatusTy
getStatus() const { return Status
; }
723 Value
*getOriginalValue() const { return OriginalValue
; }
724 Value
*getBaseValue() const { return BaseValue
; }
726 bool isBase() const { return getStatus() == Base
; }
727 bool isUnknown() const { return getStatus() == Unknown
; }
728 bool isConflict() const { return getStatus() == Conflict
; }
730 // Values of type BDVState form a lattice, and this function implements the
733 void meet(const BDVState
&Other
) {
734 auto markConflict
= [&]() {
735 Status
= BDVState::Conflict
;
738 // Conflict is a final state.
741 // if we are not known - just take other state.
743 Status
= Other
.getStatus();
744 BaseValue
= Other
.getBaseValue();
748 assert(isBase() && "Unknown state");
749 // If other is unknown - just keep our state.
750 if (Other
.isUnknown())
752 // If other is conflict - it is a final state.
753 if (Other
.isConflict())
754 return markConflict();
755 // Other is base as well.
756 assert(Other
.isBase() && "Unknown state");
757 // If bases are different - Conflict.
758 if (getBaseValue() != Other
.getBaseValue())
759 return markConflict();
760 // We are identical, do nothing.
763 bool operator==(const BDVState
&Other
) const {
764 return OriginalValue
== Other
.OriginalValue
&& BaseValue
== Other
.BaseValue
&&
765 Status
== Other
.Status
;
768 bool operator!=(const BDVState
&other
) const { return !(*this == other
); }
776 void print(raw_ostream
&OS
) const {
777 switch (getStatus()) {
788 OS
<< " (base " << getBaseValue() << " - "
789 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")"
790 << " for " << OriginalValue
->getName() << ":";
794 AssertingVH
<Value
> OriginalValue
; // instruction this state corresponds to
795 StatusTy Status
= Unknown
;
796 AssertingVH
<Value
> BaseValue
= nullptr; // Non-null only if Status == Base.
799 } // end anonymous namespace
802 static raw_ostream
&operator<<(raw_ostream
&OS
, const BDVState
&State
) {
808 /// For a given value or instruction, figure out what base ptr its derived from.
809 /// For gc objects, this is simply itself. On success, returns a value which is
810 /// the base pointer. (This is reliable and can be used for relocation.) On
811 /// failure, returns nullptr.
812 static Value
*findBasePointer(Value
*I
, DefiningValueMapTy
&Cache
,
813 IsKnownBaseMapTy
&KnownBases
) {
814 Value
*Def
= findBaseOrBDV(I
, Cache
, KnownBases
);
816 if (isKnownBase(Def
, KnownBases
) && areBothVectorOrScalar(Def
, I
))
819 // Here's the rough algorithm:
820 // - For every SSA value, construct a mapping to either an actual base
821 // pointer or a PHI which obscures the base pointer.
822 // - Construct a mapping from PHI to unknown TOP state. Use an
823 // optimistic algorithm to propagate base pointer information. Lattice
828 // When algorithm terminates, all PHIs will either have a single concrete
829 // base or be in a conflict state.
830 // - For every conflict, insert a dummy PHI node without arguments. Add
831 // these to the base[Instruction] = BasePtr mapping. For every
832 // non-conflict, add the actual base.
833 // - For every conflict, add arguments for the base[a] of each input
836 // Note: A simpler form of this would be to add the conflict form of all
837 // PHIs without running the optimistic algorithm. This would be
838 // analogous to pessimistic data flow and would likely lead to an
839 // overall worse solution.
842 auto isExpectedBDVType
= [](Value
*BDV
) {
843 return isa
<PHINode
>(BDV
) || isa
<SelectInst
>(BDV
) ||
844 isa
<ExtractElementInst
>(BDV
) || isa
<InsertElementInst
>(BDV
) ||
845 isa
<ShuffleVectorInst
>(BDV
);
849 // Once populated, will contain a mapping from each potentially non-base BDV
850 // to a lattice value (described above) which corresponds to that BDV.
851 // We use the order of insertion (DFS over the def/use graph) to provide a
852 // stable deterministic ordering for visiting DenseMaps (which are unordered)
853 // below. This is important for deterministic compilation.
854 MapVector
<Value
*, BDVState
> States
;
857 auto VerifyStates
= [&]() {
858 for (auto &Entry
: States
) {
859 assert(Entry
.first
== Entry
.second
.getOriginalValue());
864 auto visitBDVOperands
= [](Value
*BDV
, std::function
<void (Value
*)> F
) {
865 if (PHINode
*PN
= dyn_cast
<PHINode
>(BDV
)) {
866 for (Value
*InVal
: PN
->incoming_values())
868 } else if (SelectInst
*SI
= dyn_cast
<SelectInst
>(BDV
)) {
869 F(SI
->getTrueValue());
870 F(SI
->getFalseValue());
871 } else if (auto *EE
= dyn_cast
<ExtractElementInst
>(BDV
)) {
872 F(EE
->getVectorOperand());
873 } else if (auto *IE
= dyn_cast
<InsertElementInst
>(BDV
)) {
874 F(IE
->getOperand(0));
875 F(IE
->getOperand(1));
876 } else if (auto *SV
= dyn_cast
<ShuffleVectorInst
>(BDV
)) {
877 // For a canonical broadcast, ignore the undef argument
878 // (without this, we insert a parallel base shuffle for every broadcast)
879 F(SV
->getOperand(0));
880 if (!SV
->isZeroEltSplat())
881 F(SV
->getOperand(1));
883 llvm_unreachable("unexpected BDV type");
888 // Recursively fill in all base defining values reachable from the initial
889 // one for which we don't already know a definite base value for
891 SmallVector
<Value
*, 16> Worklist
;
892 Worklist
.push_back(Def
);
893 States
.insert({Def
, BDVState(Def
)});
894 while (!Worklist
.empty()) {
895 Value
*Current
= Worklist
.pop_back_val();
896 assert(!isOriginalBaseResult(Current
) && "why did it get added?");
898 auto visitIncomingValue
= [&](Value
*InVal
) {
899 Value
*Base
= findBaseOrBDV(InVal
, Cache
, KnownBases
);
900 if (isKnownBase(Base
, KnownBases
) && areBothVectorOrScalar(Base
, InVal
))
901 // Known bases won't need new instructions introduced and can be
902 // ignored safely. However, this can only be done when InVal and Base
903 // are both scalar or both vector. Otherwise, we need to find a
904 // correct BDV for InVal, by creating an entry in the lattice
907 assert(isExpectedBDVType(Base
) && "the only non-base values "
908 "we see should be base defining values");
909 if (States
.insert(std::make_pair(Base
, BDVState(Base
))).second
)
910 Worklist
.push_back(Base
);
913 visitBDVOperands(Current
, visitIncomingValue
);
919 LLVM_DEBUG(dbgs() << "States after initialization:\n");
920 for (const auto &Pair
: States
) {
921 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
925 // Iterate forward through the value graph pruning any node from the state
926 // list where all of the inputs are base pointers. The purpose of this is to
927 // reuse existing values when the derived pointer we were asked to materialize
928 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
930 SmallVector
<Value
*> ToRemove
;
933 for (auto Pair
: States
) {
934 Value
*BDV
= Pair
.first
;
935 auto canPruneInput
= [&](Value
*V
) {
936 // If the input of the BDV is the BDV itself we can prune it. This is
937 // only possible if the BDV is a PHI node.
938 if (V
->stripPointerCasts() == BDV
)
940 Value
*VBDV
= findBaseOrBDV(V
, Cache
, KnownBases
);
941 if (V
->stripPointerCasts() != VBDV
)
943 // The assumption is that anything not in the state list is
944 // propagates a base pointer.
945 return States
.count(VBDV
) == 0;
948 bool CanPrune
= true;
949 visitBDVOperands(BDV
, [&](Value
*Op
) {
950 CanPrune
= CanPrune
&& canPruneInput(Op
);
953 ToRemove
.push_back(BDV
);
955 for (Value
*V
: ToRemove
) {
957 // Cache the fact V is it's own base for later usage.
960 } while (!ToRemove
.empty());
962 // Did we manage to prove that Def itself must be a base pointer?
963 if (!States
.count(Def
))
966 // Return a phi state for a base defining value. We'll generate a new
967 // base state for known bases and expect to find a cached state otherwise.
968 auto GetStateForBDV
= [&](Value
*BaseValue
, Value
*Input
) {
969 auto I
= States
.find(BaseValue
);
970 if (I
!= States
.end())
972 assert(areBothVectorOrScalar(BaseValue
, Input
));
973 return BDVState(BaseValue
, BDVState::Base
, BaseValue
);
976 // Even though we have identified a concrete base (or a conflict) for all live
977 // pointers at this point, there are cases where the base is of an
978 // incompatible type compared to the original instruction. We conservatively
979 // mark those as conflicts to ensure that corresponding BDVs will be generated
980 // in the next steps.
982 // this is a rather explicit check for all cases where we should mark the
983 // state as a conflict to force the latter stages of the algorithm to emit
985 // TODO: in many cases the instructions emited for the conflicting states
986 // will be identical to the I itself (if the I's operate on their BDVs
987 // themselves). We should exploit this, but can't do it here since it would
988 // break the invariant about the BDVs not being known to be a base.
989 // TODO: the code also does not handle constants at all - the algorithm relies
990 // on all constants having the same BDV and therefore constant-only insns
991 // will never be in conflict, but this check is ignored here. If the
992 // constant conflicts will be to BDVs themselves, they will be identical
993 // instructions and will get optimized away (as in the above TODO)
994 auto MarkConflict
= [&](Instruction
*I
, Value
*BaseValue
) {
995 // II and EE mixes vector & scalar so is always a conflict
996 if (isa
<InsertElementInst
>(I
) || isa
<ExtractElementInst
>(I
))
998 // Shuffle vector is always a conflict as it creates new vector from
1000 if (isa
<ShuffleVectorInst
>(I
))
1002 // Any instructions where the computed base type differs from the
1003 // instruction type. An example is where an extract instruction is used by a
1004 // select. Here the select's BDV is a vector (because of extract's BDV),
1005 // while the select itself is a scalar type. Note that the IE and EE
1006 // instruction check is not fully subsumed by the vector<->scalar check at
1007 // the end, this is due to the BDV algorithm being ignorant of BDV types at
1009 if (!areBothVectorOrScalar(BaseValue
, I
))
1014 bool Progress
= true;
1017 const size_t OldSize
= States
.size();
1020 // We're only changing values in this loop, thus safe to keep iterators.
1021 // Since this is computing a fixed point, the order of visit does not
1022 // effect the result. TODO: We could use a worklist here and make this run
1024 for (auto Pair
: States
) {
1025 Value
*BDV
= Pair
.first
;
1026 // Only values that do not have known bases or those that have differing
1027 // type (scalar versus vector) from a possible known base should be in the
1029 assert((!isKnownBase(BDV
, KnownBases
) ||
1030 !areBothVectorOrScalar(BDV
, Pair
.second
.getBaseValue())) &&
1031 "why did it get added?");
1033 BDVState
NewState(BDV
);
1034 visitBDVOperands(BDV
, [&](Value
*Op
) {
1035 Value
*BDV
= findBaseOrBDV(Op
, Cache
, KnownBases
);
1036 auto OpState
= GetStateForBDV(BDV
, Op
);
1037 NewState
.meet(OpState
);
1040 // if the instruction has known base, but should in fact be marked as
1041 // conflict because of incompatible in/out types, we mark it as such
1042 // ensuring that it will propagate through the fixpoint iteration
1043 auto I
= cast
<Instruction
>(BDV
);
1044 auto BV
= NewState
.getBaseValue();
1045 if (BV
&& MarkConflict(I
, BV
))
1046 NewState
= BDVState(I
, BDVState::Conflict
);
1048 BDVState OldState
= Pair
.second
;
1049 if (OldState
!= NewState
) {
1051 States
[BDV
] = NewState
;
1055 assert(OldSize
== States
.size() &&
1056 "fixed point shouldn't be adding any new nodes to state");
1061 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
1062 for (const auto &Pair
: States
) {
1063 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
1066 // since we do the conflict marking as part of the fixpoint iteration this
1067 // loop only asserts that invariants are met
1068 for (auto Pair
: States
) {
1069 Instruction
*I
= cast
<Instruction
>(Pair
.first
);
1070 BDVState State
= Pair
.second
;
1071 auto *BaseValue
= State
.getBaseValue();
1072 // Only values that do not have known bases or those that have differing
1073 // type (scalar versus vector) from a possible known base should be in the
1076 (!isKnownBase(I
, KnownBases
) || !areBothVectorOrScalar(I
, BaseValue
)) &&
1077 "why did it get added?");
1078 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1082 // Insert Phis for all conflicts
1083 // TODO: adjust naming patterns to avoid this order of iteration dependency
1084 for (auto Pair
: States
) {
1085 Instruction
*I
= cast
<Instruction
>(Pair
.first
);
1086 BDVState State
= Pair
.second
;
1087 // Only values that do not have known bases or those that have differing
1088 // type (scalar versus vector) from a possible known base should be in the
1090 assert((!isKnownBase(I
, KnownBases
) ||
1091 !areBothVectorOrScalar(I
, State
.getBaseValue())) &&
1092 "why did it get added?");
1093 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1095 // Since we're joining a vector and scalar base, they can never be the
1096 // same. As a result, we should always see insert element having reached
1097 // the conflict state.
1098 assert(!isa
<InsertElementInst
>(I
) || State
.isConflict());
1100 if (!State
.isConflict())
1103 auto getMangledName
= [](Instruction
*I
) -> std::string
{
1104 if (isa
<PHINode
>(I
)) {
1105 return suffixed_name_or(I
, ".base", "base_phi");
1106 } else if (isa
<SelectInst
>(I
)) {
1107 return suffixed_name_or(I
, ".base", "base_select");
1108 } else if (isa
<ExtractElementInst
>(I
)) {
1109 return suffixed_name_or(I
, ".base", "base_ee");
1110 } else if (isa
<InsertElementInst
>(I
)) {
1111 return suffixed_name_or(I
, ".base", "base_ie");
1113 return suffixed_name_or(I
, ".base", "base_sv");
1117 Instruction
*BaseInst
= I
->clone();
1118 BaseInst
->insertBefore(I
);
1119 BaseInst
->setName(getMangledName(I
));
1120 // Add metadata marking this as a base value
1121 BaseInst
->setMetadata("is_base_value", MDNode::get(I
->getContext(), {}));
1122 States
[I
] = BDVState(I
, BDVState::Conflict
, BaseInst
);
1123 setKnownBase(BaseInst
, /* IsKnownBase */true, KnownBases
);
1130 // Returns a instruction which produces the base pointer for a given
1131 // instruction. The instruction is assumed to be an input to one of the BDVs
1132 // seen in the inference algorithm above. As such, we must either already
1133 // know it's base defining value is a base, or have inserted a new
1134 // instruction to propagate the base of it's BDV and have entered that newly
1135 // introduced instruction into the state table. In either case, we are
1136 // assured to be able to determine an instruction which produces it's base
1138 auto getBaseForInput
= [&](Value
*Input
, Instruction
*InsertPt
) {
1139 Value
*BDV
= findBaseOrBDV(Input
, Cache
, KnownBases
);
1140 Value
*Base
= nullptr;
1141 if (!States
.count(BDV
)) {
1142 assert(areBothVectorOrScalar(BDV
, Input
));
1145 // Either conflict or base.
1146 assert(States
.count(BDV
));
1147 Base
= States
[BDV
].getBaseValue();
1149 assert(Base
&& "Can't be null");
1150 // The cast is needed since base traversal may strip away bitcasts
1151 if (Base
->getType() != Input
->getType() && InsertPt
)
1152 Base
= new BitCastInst(Base
, Input
->getType(), "cast",
1153 InsertPt
->getIterator());
1157 // Fixup all the inputs of the new PHIs. Visit order needs to be
1158 // deterministic and predictable because we're naming newly created
1160 for (auto Pair
: States
) {
1161 Instruction
*BDV
= cast
<Instruction
>(Pair
.first
);
1162 BDVState State
= Pair
.second
;
1164 // Only values that do not have known bases or those that have differing
1165 // type (scalar versus vector) from a possible known base should be in the
1167 assert((!isKnownBase(BDV
, KnownBases
) ||
1168 !areBothVectorOrScalar(BDV
, State
.getBaseValue())) &&
1169 "why did it get added?");
1170 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1171 if (!State
.isConflict())
1174 if (PHINode
*BasePHI
= dyn_cast
<PHINode
>(State
.getBaseValue())) {
1175 PHINode
*PN
= cast
<PHINode
>(BDV
);
1176 const unsigned NumPHIValues
= PN
->getNumIncomingValues();
1178 // The IR verifier requires phi nodes with multiple entries from the
1179 // same basic block to have the same incoming value for each of those
1180 // entries. Since we're inserting bitcasts in the loop, make sure we
1181 // do so at least once per incoming block.
1182 DenseMap
<BasicBlock
*, Value
*> BlockToValue
;
1183 for (unsigned i
= 0; i
< NumPHIValues
; i
++) {
1184 Value
*InVal
= PN
->getIncomingValue(i
);
1185 BasicBlock
*InBB
= PN
->getIncomingBlock(i
);
1186 if (!BlockToValue
.count(InBB
))
1187 BlockToValue
[InBB
] = getBaseForInput(InVal
, InBB
->getTerminator());
1190 Value
*OldBase
= BlockToValue
[InBB
];
1191 Value
*Base
= getBaseForInput(InVal
, nullptr);
1193 // We can't use `stripPointerCasts` instead of this function because
1194 // `stripPointerCasts` doesn't handle vectors of pointers.
1195 auto StripBitCasts
= [](Value
*V
) -> Value
* {
1196 while (auto *BC
= dyn_cast
<BitCastInst
>(V
))
1197 V
= BC
->getOperand(0);
1200 // In essence this assert states: the only way two values
1201 // incoming from the same basic block may be different is by
1202 // being different bitcasts of the same value. A cleanup
1203 // that remains TODO is changing findBaseOrBDV to return an
1204 // llvm::Value of the correct type (and still remain pure).
1205 // This will remove the need to add bitcasts.
1206 assert(StripBitCasts(Base
) == StripBitCasts(OldBase
) &&
1207 "findBaseOrBDV should be pure!");
1210 Value
*Base
= BlockToValue
[InBB
];
1211 BasePHI
->setIncomingValue(i
, Base
);
1213 } else if (SelectInst
*BaseSI
=
1214 dyn_cast
<SelectInst
>(State
.getBaseValue())) {
1215 SelectInst
*SI
= cast
<SelectInst
>(BDV
);
1217 // Find the instruction which produces the base for each input.
1218 // We may need to insert a bitcast.
1219 BaseSI
->setTrueValue(getBaseForInput(SI
->getTrueValue(), BaseSI
));
1220 BaseSI
->setFalseValue(getBaseForInput(SI
->getFalseValue(), BaseSI
));
1221 } else if (auto *BaseEE
=
1222 dyn_cast
<ExtractElementInst
>(State
.getBaseValue())) {
1223 Value
*InVal
= cast
<ExtractElementInst
>(BDV
)->getVectorOperand();
1224 // Find the instruction which produces the base for each input. We may
1225 // need to insert a bitcast.
1226 BaseEE
->setOperand(0, getBaseForInput(InVal
, BaseEE
));
1227 } else if (auto *BaseIE
= dyn_cast
<InsertElementInst
>(State
.getBaseValue())){
1228 auto *BdvIE
= cast
<InsertElementInst
>(BDV
);
1229 auto UpdateOperand
= [&](int OperandIdx
) {
1230 Value
*InVal
= BdvIE
->getOperand(OperandIdx
);
1231 Value
*Base
= getBaseForInput(InVal
, BaseIE
);
1232 BaseIE
->setOperand(OperandIdx
, Base
);
1234 UpdateOperand(0); // vector operand
1235 UpdateOperand(1); // scalar operand
1237 auto *BaseSV
= cast
<ShuffleVectorInst
>(State
.getBaseValue());
1238 auto *BdvSV
= cast
<ShuffleVectorInst
>(BDV
);
1239 auto UpdateOperand
= [&](int OperandIdx
) {
1240 Value
*InVal
= BdvSV
->getOperand(OperandIdx
);
1241 Value
*Base
= getBaseForInput(InVal
, BaseSV
);
1242 BaseSV
->setOperand(OperandIdx
, Base
);
1244 UpdateOperand(0); // vector operand
1245 if (!BdvSV
->isZeroEltSplat())
1246 UpdateOperand(1); // vector operand
1248 // Never read, so just use poison
1249 Value
*InVal
= BdvSV
->getOperand(1);
1250 BaseSV
->setOperand(1, PoisonValue::get(InVal
->getType()));
1259 // get the data layout to compare the sizes of base/derived pointer values
1260 [[maybe_unused
]] auto &DL
=
1261 cast
<llvm::Instruction
>(Def
)->getDataLayout();
1262 // Cache all of our results so we can cheaply reuse them
1263 // NOTE: This is actually two caches: one of the base defining value
1264 // relation and one of the base pointer relation! FIXME
1265 for (auto Pair
: States
) {
1266 auto *BDV
= Pair
.first
;
1267 Value
*Base
= Pair
.second
.getBaseValue();
1268 assert(BDV
&& Base
);
1269 // Whenever we have a derived ptr(s), their base
1270 // ptr(s) must be of the same size, not necessarily the same type
1271 assert(DL
.getTypeAllocSize(BDV
->getType()) ==
1272 DL
.getTypeAllocSize(Base
->getType()) &&
1273 "Derived and base values should have same size");
1274 // Only values that do not have known bases or those that have differing
1275 // type (scalar versus vector) from a possible known base should be in the
1278 (!isKnownBase(BDV
, KnownBases
) || !areBothVectorOrScalar(BDV
, Base
)) &&
1279 "why did it get added?");
1282 dbgs() << "Updating base value cache"
1283 << " for: " << BDV
->getName() << " from: "
1284 << (Cache
.count(BDV
) ? Cache
[BDV
]->getName().str() : "none")
1285 << " to: " << Base
->getName() << "\n");
1289 assert(Cache
.count(Def
));
1293 // For a set of live pointers (base and/or derived), identify the base
1294 // pointer of the object which they are derived from. This routine will
1295 // mutate the IR graph as needed to make the 'base' pointer live at the
1296 // definition site of 'derived'. This ensures that any use of 'derived' can
1297 // also use 'base'. This may involve the insertion of a number of
1298 // additional PHI nodes.
1300 // preconditions: live is a set of pointer type Values
1302 // side effects: may insert PHI nodes into the existing CFG, will preserve
1303 // CFG, will not remove or mutate any existing nodes
1305 // post condition: PointerToBase contains one (derived, base) pair for every
1306 // pointer in live. Note that derived can be equal to base if the original
1307 // pointer was a base pointer.
1308 static void findBasePointers(const StatepointLiveSetTy
&live
,
1309 PointerToBaseTy
&PointerToBase
, DominatorTree
*DT
,
1310 DefiningValueMapTy
&DVCache
,
1311 IsKnownBaseMapTy
&KnownBases
) {
1312 for (Value
*ptr
: live
) {
1313 Value
*base
= findBasePointer(ptr
, DVCache
, KnownBases
);
1314 assert(base
&& "failed to find base pointer");
1315 PointerToBase
[ptr
] = base
;
1316 assert((!isa
<Instruction
>(base
) || !isa
<Instruction
>(ptr
) ||
1317 DT
->dominates(cast
<Instruction
>(base
)->getParent(),
1318 cast
<Instruction
>(ptr
)->getParent())) &&
1319 "The base we found better dominate the derived pointer");
1323 /// Find the required based pointers (and adjust the live set) for the given
1325 static void findBasePointers(DominatorTree
&DT
, DefiningValueMapTy
&DVCache
,
1327 PartiallyConstructedSafepointRecord
&result
,
1328 PointerToBaseTy
&PointerToBase
,
1329 IsKnownBaseMapTy
&KnownBases
) {
1330 StatepointLiveSetTy PotentiallyDerivedPointers
= result
.LiveSet
;
1331 // We assume that all pointers passed to deopt are base pointers; as an
1332 // optimization, we can use this to avoid separately materializing the base
1333 // pointer graph. This is only relevant since we're very conservative about
1334 // generating new conflict nodes during base pointer insertion. If we were
1335 // smarter there, this would be irrelevant.
1336 if (auto Opt
= Call
->getOperandBundle(LLVMContext::OB_deopt
))
1337 for (Value
*V
: Opt
->Inputs
) {
1338 if (!PotentiallyDerivedPointers
.count(V
))
1340 PotentiallyDerivedPointers
.remove(V
);
1341 PointerToBase
[V
] = V
;
1343 findBasePointers(PotentiallyDerivedPointers
, PointerToBase
, &DT
, DVCache
,
1347 /// Given an updated version of the dataflow liveness results, update the
1348 /// liveset and base pointer maps for the call site CS.
1349 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
1351 PartiallyConstructedSafepointRecord
&result
,
1352 PointerToBaseTy
&PointerToBase
,
1355 static void recomputeLiveInValues(
1356 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
1357 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
,
1358 PointerToBaseTy
&PointerToBase
, GCStrategy
*GC
) {
1359 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1360 // again. The old values are still live and will help it stabilize quickly.
1361 GCPtrLivenessData RevisedLivenessData
;
1362 computeLiveInValues(DT
, F
, RevisedLivenessData
, GC
);
1363 for (size_t i
= 0; i
< records
.size(); i
++) {
1364 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
1365 recomputeLiveInValues(RevisedLivenessData
, toUpdate
[i
], info
, PointerToBase
,
1370 // Utility function which clones all instructions from "ChainToBase"
1371 // and inserts them before "InsertBefore". Returns rematerialized value
1372 // which should be used after statepoint.
1373 static Instruction
*rematerializeChain(ArrayRef
<Instruction
*> ChainToBase
,
1374 Instruction
*InsertBefore
,
1376 Value
*AlternateLiveBase
) {
1377 Instruction
*LastClonedValue
= nullptr;
1378 Instruction
*LastValue
= nullptr;
1379 // Walk backwards to visit top-most instructions first.
1380 for (Instruction
*Instr
:
1381 make_range(ChainToBase
.rbegin(), ChainToBase
.rend())) {
1382 // Only GEP's and casts are supported as we need to be careful to not
1383 // introduce any new uses of pointers not in the liveset.
1384 // Note that it's fine to introduce new uses of pointers which were
1385 // otherwise not used after this statepoint.
1386 assert(isa
<GetElementPtrInst
>(Instr
) || isa
<CastInst
>(Instr
));
1388 Instruction
*ClonedValue
= Instr
->clone();
1389 ClonedValue
->insertBefore(InsertBefore
);
1390 ClonedValue
->setName(Instr
->getName() + ".remat");
1392 // If it is not first instruction in the chain then it uses previously
1393 // cloned value. We should update it to use cloned value.
1394 if (LastClonedValue
) {
1396 ClonedValue
->replaceUsesOfWith(LastValue
, LastClonedValue
);
1398 for (auto *OpValue
: ClonedValue
->operand_values()) {
1399 // Assert that cloned instruction does not use any instructions from
1400 // this chain other than LastClonedValue
1401 assert(!is_contained(ChainToBase
, OpValue
) &&
1402 "incorrect use in rematerialization chain");
1403 // Assert that the cloned instruction does not use the RootOfChain
1404 // or the AlternateLiveBase.
1405 assert(OpValue
!= RootOfChain
&& OpValue
!= AlternateLiveBase
);
1409 // For the first instruction, replace the use of unrelocated base i.e.
1410 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
1411 // live set. They have been proved to be the same PHI nodes. Note
1412 // that the *only* use of the RootOfChain in the ChainToBase list is
1413 // the first Value in the list.
1414 if (RootOfChain
!= AlternateLiveBase
)
1415 ClonedValue
->replaceUsesOfWith(RootOfChain
, AlternateLiveBase
);
1418 LastClonedValue
= ClonedValue
;
1421 assert(LastClonedValue
);
1422 return LastClonedValue
;
1425 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1426 // no uses of the original value / return value between the gc.statepoint and
1427 // the gc.relocate / gc.result call. One case which can arise is a phi node
1428 // starting one of the successor blocks. We also need to be able to insert the
1429 // gc.relocates only on the path which goes through the statepoint. We might
1430 // need to split an edge to make this possible.
1432 normalizeForInvokeSafepoint(BasicBlock
*BB
, BasicBlock
*InvokeParent
,
1433 DominatorTree
&DT
) {
1434 BasicBlock
*Ret
= BB
;
1435 if (!BB
->getUniquePredecessor())
1436 Ret
= SplitBlockPredecessors(BB
, InvokeParent
, "", &DT
);
1438 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1440 FoldSingleEntryPHINodes(Ret
);
1441 assert(!isa
<PHINode
>(Ret
->begin()) &&
1442 "All PHI nodes should have been removed!");
1444 // At this point, we can safely insert a gc.relocate or gc.result as the first
1445 // instruction in Ret if needed.
1449 // List of all function attributes which must be stripped when lowering from
1450 // abstract machine model to physical machine model. Essentially, these are
1451 // all the effects a safepoint might have which we ignored in the abstract
1452 // machine model for purposes of optimization. We have to strip these on
1453 // both function declarations and call sites.
1454 static constexpr Attribute::AttrKind FnAttrsToStrip
[] =
1455 {Attribute::Memory
, Attribute::NoSync
, Attribute::NoFree
};
1457 // Create new attribute set containing only attributes which can be transferred
1458 // from the original call to the safepoint.
1459 static AttributeList
legalizeCallAttributes(CallBase
*Call
, bool IsMemIntrinsic
,
1460 AttributeList StatepointAL
) {
1461 AttributeList OrigAL
= Call
->getAttributes();
1462 if (OrigAL
.isEmpty())
1463 return StatepointAL
;
1465 // Remove the readonly, readnone, and statepoint function attributes.
1466 LLVMContext
&Ctx
= Call
->getContext();
1467 AttrBuilder
FnAttrs(Ctx
, OrigAL
.getFnAttrs());
1468 for (auto Attr
: FnAttrsToStrip
)
1469 FnAttrs
.removeAttribute(Attr
);
1471 for (Attribute A
: OrigAL
.getFnAttrs()) {
1472 if (isStatepointDirectiveAttr(A
))
1473 FnAttrs
.removeAttribute(A
);
1476 StatepointAL
= StatepointAL
.addFnAttributes(Ctx
, FnAttrs
);
1478 // The memory intrinsics do not have a 1:1 correspondence of the original
1479 // call arguments to the produced statepoint. Do not transfer the argument
1480 // attributes to avoid putting them on incorrect arguments.
1482 return StatepointAL
;
1484 // Attach the argument attributes from the original call at the corresponding
1485 // arguments in the statepoint. Note that any argument attributes that are
1486 // invalid after lowering are stripped in stripNonValidDataFromBody.
1487 for (unsigned I
: llvm::seq(Call
->arg_size()))
1488 StatepointAL
= StatepointAL
.addParamAttributes(
1489 Ctx
, GCStatepointInst::CallArgsBeginPos
+ I
,
1490 AttrBuilder(Ctx
, OrigAL
.getParamAttrs(I
)));
1492 // Return attributes are later attached to the gc.result intrinsic.
1493 return StatepointAL
;
1496 /// Helper function to place all gc relocates necessary for the given
1499 /// liveVariables - list of variables to be relocated.
1500 /// basePtrs - base pointers.
1501 /// statepointToken - statepoint instruction to which relocates should be
1503 /// Builder - Llvm IR builder to be used to construct new calls.
1504 static void CreateGCRelocates(ArrayRef
<Value
*> LiveVariables
,
1505 ArrayRef
<Value
*> BasePtrs
,
1506 Instruction
*StatepointToken
,
1507 IRBuilder
<> &Builder
, GCStrategy
*GC
) {
1508 if (LiveVariables
.empty())
1511 auto FindIndex
= [](ArrayRef
<Value
*> LiveVec
, Value
*Val
) {
1512 auto ValIt
= llvm::find(LiveVec
, Val
);
1513 assert(ValIt
!= LiveVec
.end() && "Val not found in LiveVec!");
1514 size_t Index
= std::distance(LiveVec
.begin(), ValIt
);
1515 assert(Index
< LiveVec
.size() && "Bug in std::find?");
1518 Module
*M
= StatepointToken
->getModule();
1520 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1521 // element type is i8 addrspace(1)*). We originally generated unique
1522 // declarations for each pointer type, but this proved problematic because
1523 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1524 // towards a single unified pointer type anyways, we can just cast everything
1525 // to an i8* of the right address space. A bitcast is added later to convert
1526 // gc_relocate to the actual value's type.
1527 auto getGCRelocateDecl
= [&](Type
*Ty
) {
1528 assert(isHandledGCPointerType(Ty
, GC
));
1529 auto AS
= Ty
->getScalarType()->getPointerAddressSpace();
1530 Type
*NewTy
= PointerType::get(M
->getContext(), AS
);
1531 if (auto *VT
= dyn_cast
<VectorType
>(Ty
))
1532 NewTy
= FixedVectorType::get(NewTy
,
1533 cast
<FixedVectorType
>(VT
)->getNumElements());
1534 return Intrinsic::getOrInsertDeclaration(
1535 M
, Intrinsic::experimental_gc_relocate
, {NewTy
});
1538 // Lazily populated map from input types to the canonicalized form mentioned
1539 // in the comment above. This should probably be cached somewhere more
1541 DenseMap
<Type
*, Function
*> TypeToDeclMap
;
1543 for (unsigned i
= 0; i
< LiveVariables
.size(); i
++) {
1544 // Generate the gc.relocate call and save the result
1545 Value
*BaseIdx
= Builder
.getInt32(FindIndex(LiveVariables
, BasePtrs
[i
]));
1546 Value
*LiveIdx
= Builder
.getInt32(i
);
1548 Type
*Ty
= LiveVariables
[i
]->getType();
1549 if (!TypeToDeclMap
.count(Ty
))
1550 TypeToDeclMap
[Ty
] = getGCRelocateDecl(Ty
);
1551 Function
*GCRelocateDecl
= TypeToDeclMap
[Ty
];
1553 // only specify a debug name if we can give a useful one
1554 CallInst
*Reloc
= Builder
.CreateCall(
1555 GCRelocateDecl
, {StatepointToken
, BaseIdx
, LiveIdx
},
1556 suffixed_name_or(LiveVariables
[i
], ".relocated", ""));
1557 // Trick CodeGen into thinking there are lots of free registers at this
1559 Reloc
->setCallingConv(CallingConv::Cold
);
1565 /// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1566 /// avoids having to worry about keeping around dangling pointers to Values.
1567 class DeferredReplacement
{
1568 AssertingVH
<Instruction
> Old
;
1569 AssertingVH
<Instruction
> New
;
1570 bool IsDeoptimize
= false;
1572 DeferredReplacement() = default;
1575 static DeferredReplacement
createRAUW(Instruction
*Old
, Instruction
*New
) {
1576 assert(Old
!= New
&& Old
&& New
&&
1577 "Cannot RAUW equal values or to / from null!");
1579 DeferredReplacement D
;
1585 static DeferredReplacement
createDelete(Instruction
*ToErase
) {
1586 DeferredReplacement D
;
1591 static DeferredReplacement
createDeoptimizeReplacement(Instruction
*Old
) {
1593 auto *F
= cast
<CallInst
>(Old
)->getCalledFunction();
1594 assert(F
&& F
->getIntrinsicID() == Intrinsic::experimental_deoptimize
&&
1595 "Only way to construct a deoptimize deferred replacement");
1597 DeferredReplacement D
;
1599 D
.IsDeoptimize
= true;
1603 /// Does the task represented by this instance.
1604 void doReplacement() {
1605 Instruction
*OldI
= Old
;
1606 Instruction
*NewI
= New
;
1608 assert(OldI
!= NewI
&& "Disallowed at construction?!");
1609 assert((!IsDeoptimize
|| !New
) &&
1610 "Deoptimize intrinsics are not replaced!");
1616 OldI
->replaceAllUsesWith(NewI
);
1619 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1620 // not necessarily be followed by the matching return.
1621 auto *RI
= cast
<ReturnInst
>(OldI
->getParent()->getTerminator());
1622 new UnreachableInst(RI
->getContext(), RI
->getIterator());
1623 RI
->eraseFromParent();
1626 OldI
->eraseFromParent();
1630 } // end anonymous namespace
1632 static StringRef
getDeoptLowering(CallBase
*Call
) {
1633 const char *DeoptLowering
= "deopt-lowering";
1634 if (Call
->hasFnAttr(DeoptLowering
)) {
1635 // FIXME: Calls have a *really* confusing interface around attributes
1637 const AttributeList
&CSAS
= Call
->getAttributes();
1638 if (CSAS
.hasFnAttr(DeoptLowering
))
1639 return CSAS
.getFnAttr(DeoptLowering
).getValueAsString();
1640 Function
*F
= Call
->getCalledFunction();
1641 assert(F
&& F
->hasFnAttribute(DeoptLowering
));
1642 return F
->getFnAttribute(DeoptLowering
).getValueAsString();
1644 return "live-through";
1648 makeStatepointExplicitImpl(CallBase
*Call
, /* to replace */
1649 const SmallVectorImpl
<Value
*> &BasePtrs
,
1650 const SmallVectorImpl
<Value
*> &LiveVariables
,
1651 PartiallyConstructedSafepointRecord
&Result
,
1652 std::vector
<DeferredReplacement
> &Replacements
,
1653 const PointerToBaseTy
&PointerToBase
,
1655 assert(BasePtrs
.size() == LiveVariables
.size());
1657 // Then go ahead and use the builder do actually do the inserts. We insert
1658 // immediately before the previous instruction under the assumption that all
1659 // arguments will be available here. We can't insert afterwards since we may
1660 // be replacing a terminator.
1661 IRBuilder
<> Builder(Call
);
1663 ArrayRef
<Value
*> GCLive(LiveVariables
);
1664 uint64_t StatepointID
= StatepointDirectives::DefaultStatepointID
;
1665 uint32_t NumPatchBytes
= 0;
1666 uint32_t Flags
= uint32_t(StatepointFlags::None
);
1668 SmallVector
<Value
*, 8> CallArgs(Call
->args());
1669 std::optional
<ArrayRef
<Use
>> DeoptArgs
;
1670 if (auto Bundle
= Call
->getOperandBundle(LLVMContext::OB_deopt
))
1671 DeoptArgs
= Bundle
->Inputs
;
1672 std::optional
<ArrayRef
<Use
>> TransitionArgs
;
1673 if (auto Bundle
= Call
->getOperandBundle(LLVMContext::OB_gc_transition
)) {
1674 TransitionArgs
= Bundle
->Inputs
;
1675 // TODO: This flag no longer serves a purpose and can be removed later
1676 Flags
|= uint32_t(StatepointFlags::GCTransition
);
1679 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1680 // with a return value, we lower then as never returning calls to
1681 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1682 bool IsDeoptimize
= false;
1683 bool IsMemIntrinsic
= false;
1685 StatepointDirectives SD
=
1686 parseStatepointDirectivesFromAttrs(Call
->getAttributes());
1687 if (SD
.NumPatchBytes
)
1688 NumPatchBytes
= *SD
.NumPatchBytes
;
1689 if (SD
.StatepointID
)
1690 StatepointID
= *SD
.StatepointID
;
1692 // Pass through the requested lowering if any. The default is live-through.
1693 StringRef DeoptLowering
= getDeoptLowering(Call
);
1694 if (DeoptLowering
== "live-in")
1695 Flags
|= uint32_t(StatepointFlags::DeoptLiveIn
);
1697 assert(DeoptLowering
== "live-through" && "Unsupported value!");
1700 FunctionCallee
CallTarget(Call
->getFunctionType(), Call
->getCalledOperand());
1701 if (Function
*F
= dyn_cast
<Function
>(CallTarget
.getCallee())) {
1702 auto IID
= F
->getIntrinsicID();
1703 if (IID
== Intrinsic::experimental_deoptimize
) {
1704 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1705 // __llvm_deoptimize symbol. We want to resolve this now, since the
1706 // verifier does not allow taking the address of an intrinsic function.
1708 SmallVector
<Type
*, 8> DomainTy
;
1709 for (Value
*Arg
: CallArgs
)
1710 DomainTy
.push_back(Arg
->getType());
1711 auto *FTy
= FunctionType::get(Type::getVoidTy(F
->getContext()), DomainTy
,
1712 /* isVarArg = */ false);
1714 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1715 // calls to @llvm.experimental.deoptimize with different argument types in
1716 // the same module. This is fine -- we assume the frontend knew what it
1717 // was doing when generating this kind of IR.
1718 CallTarget
= F
->getParent()
1719 ->getOrInsertFunction("__llvm_deoptimize", FTy
);
1721 IsDeoptimize
= true;
1722 } else if (IID
== Intrinsic::memcpy_element_unordered_atomic
||
1723 IID
== Intrinsic::memmove_element_unordered_atomic
) {
1724 IsMemIntrinsic
= true;
1726 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1727 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1728 // Specifically, these calls should be lowered to the
1729 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1730 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1731 // verifier does not allow taking the address of an intrinsic function.
1733 // Moreover we need to shuffle the arguments for the call in order to
1734 // accommodate GC. The underlying source and destination objects might be
1735 // relocated during copy operation should the GC occur. To relocate the
1736 // derived source and destination pointers the implementation of the
1737 // intrinsic should know the corresponding base pointers.
1739 // To make the base pointers available pass them explicitly as arguments:
1740 // memcpy(dest_derived, source_derived, ...) =>
1741 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1742 auto &Context
= Call
->getContext();
1743 auto &DL
= Call
->getDataLayout();
1744 auto GetBaseAndOffset
= [&](Value
*Derived
) {
1745 Value
*Base
= nullptr;
1746 // Optimizations in unreachable code might substitute the real pointer
1747 // with undef, poison or null-derived constant. Return null base for
1748 // them to be consistent with the handling in the main algorithm in
1749 // findBaseDefiningValue.
1750 if (isa
<Constant
>(Derived
))
1752 ConstantPointerNull::get(cast
<PointerType
>(Derived
->getType()));
1754 assert(PointerToBase
.count(Derived
));
1755 Base
= PointerToBase
.find(Derived
)->second
;
1757 unsigned AddressSpace
= Derived
->getType()->getPointerAddressSpace();
1758 unsigned IntPtrSize
= DL
.getPointerSizeInBits(AddressSpace
);
1759 Value
*Base_int
= Builder
.CreatePtrToInt(
1760 Base
, Type::getIntNTy(Context
, IntPtrSize
));
1761 Value
*Derived_int
= Builder
.CreatePtrToInt(
1762 Derived
, Type::getIntNTy(Context
, IntPtrSize
));
1763 return std::make_pair(Base
, Builder
.CreateSub(Derived_int
, Base_int
));
1766 auto *Dest
= CallArgs
[0];
1767 Value
*DestBase
, *DestOffset
;
1768 std::tie(DestBase
, DestOffset
) = GetBaseAndOffset(Dest
);
1770 auto *Source
= CallArgs
[1];
1771 Value
*SourceBase
, *SourceOffset
;
1772 std::tie(SourceBase
, SourceOffset
) = GetBaseAndOffset(Source
);
1774 auto *LengthInBytes
= CallArgs
[2];
1775 auto *ElementSizeCI
= cast
<ConstantInt
>(CallArgs
[3]);
1778 CallArgs
.push_back(DestBase
);
1779 CallArgs
.push_back(DestOffset
);
1780 CallArgs
.push_back(SourceBase
);
1781 CallArgs
.push_back(SourceOffset
);
1782 CallArgs
.push_back(LengthInBytes
);
1784 SmallVector
<Type
*, 8> DomainTy
;
1785 for (Value
*Arg
: CallArgs
)
1786 DomainTy
.push_back(Arg
->getType());
1787 auto *FTy
= FunctionType::get(Type::getVoidTy(F
->getContext()), DomainTy
,
1788 /* isVarArg = */ false);
1790 auto GetFunctionName
= [](Intrinsic::ID IID
, ConstantInt
*ElementSizeCI
) {
1791 uint64_t ElementSize
= ElementSizeCI
->getZExtValue();
1792 if (IID
== Intrinsic::memcpy_element_unordered_atomic
) {
1793 switch (ElementSize
) {
1795 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1797 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1799 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1801 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1803 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1805 llvm_unreachable("unexpected element size!");
1808 assert(IID
== Intrinsic::memmove_element_unordered_atomic
);
1809 switch (ElementSize
) {
1811 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1813 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1815 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1817 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1819 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1821 llvm_unreachable("unexpected element size!");
1827 ->getOrInsertFunction(GetFunctionName(IID
, ElementSizeCI
), FTy
);
1831 // Create the statepoint given all the arguments
1832 GCStatepointInst
*Token
= nullptr;
1833 if (auto *CI
= dyn_cast
<CallInst
>(Call
)) {
1834 CallInst
*SPCall
= Builder
.CreateGCStatepointCall(
1835 StatepointID
, NumPatchBytes
, CallTarget
, Flags
, CallArgs
,
1836 TransitionArgs
, DeoptArgs
, GCLive
, "safepoint_token");
1838 SPCall
->setTailCallKind(CI
->getTailCallKind());
1839 SPCall
->setCallingConv(CI
->getCallingConv());
1841 // Set up function attrs directly on statepoint and return attrs later for
1842 // gc_result intrinsic.
1843 SPCall
->setAttributes(
1844 legalizeCallAttributes(CI
, IsMemIntrinsic
, SPCall
->getAttributes()));
1846 Token
= cast
<GCStatepointInst
>(SPCall
);
1848 // Put the following gc_result and gc_relocate calls immediately after the
1849 // the old call (which we're about to delete)
1850 assert(CI
->getNextNode() && "Not a terminator, must have next!");
1851 Builder
.SetInsertPoint(CI
->getNextNode());
1852 Builder
.SetCurrentDebugLocation(CI
->getNextNode()->getDebugLoc());
1854 auto *II
= cast
<InvokeInst
>(Call
);
1856 // Insert the new invoke into the old block. We'll remove the old one in a
1857 // moment at which point this will become the new terminator for the
1859 InvokeInst
*SPInvoke
= Builder
.CreateGCStatepointInvoke(
1860 StatepointID
, NumPatchBytes
, CallTarget
, II
->getNormalDest(),
1861 II
->getUnwindDest(), Flags
, CallArgs
, TransitionArgs
, DeoptArgs
,
1862 GCLive
, "statepoint_token");
1864 SPInvoke
->setCallingConv(II
->getCallingConv());
1866 // Set up function attrs directly on statepoint and return attrs later for
1867 // gc_result intrinsic.
1868 SPInvoke
->setAttributes(
1869 legalizeCallAttributes(II
, IsMemIntrinsic
, SPInvoke
->getAttributes()));
1871 Token
= cast
<GCStatepointInst
>(SPInvoke
);
1873 // Generate gc relocates in exceptional path
1874 BasicBlock
*UnwindBlock
= II
->getUnwindDest();
1875 assert(!isa
<PHINode
>(UnwindBlock
->begin()) &&
1876 UnwindBlock
->getUniquePredecessor() &&
1877 "can't safely insert in this block!");
1879 Builder
.SetInsertPoint(UnwindBlock
, UnwindBlock
->getFirstInsertionPt());
1880 Builder
.SetCurrentDebugLocation(II
->getDebugLoc());
1882 // Attach exceptional gc relocates to the landingpad.
1883 Instruction
*ExceptionalToken
= UnwindBlock
->getLandingPadInst();
1884 Result
.UnwindToken
= ExceptionalToken
;
1886 CreateGCRelocates(LiveVariables
, BasePtrs
, ExceptionalToken
, Builder
, GC
);
1888 // Generate gc relocates and returns for normal block
1889 BasicBlock
*NormalDest
= II
->getNormalDest();
1890 assert(!isa
<PHINode
>(NormalDest
->begin()) &&
1891 NormalDest
->getUniquePredecessor() &&
1892 "can't safely insert in this block!");
1894 Builder
.SetInsertPoint(NormalDest
, NormalDest
->getFirstInsertionPt());
1896 // gc relocates will be generated later as if it were regular call
1899 assert(Token
&& "Should be set in one of the above branches!");
1902 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1903 // transform the tail-call like structure to a call to a void function
1904 // followed by unreachable to get better codegen.
1905 Replacements
.push_back(
1906 DeferredReplacement::createDeoptimizeReplacement(Call
));
1908 Token
->setName("statepoint_token");
1909 if (!Call
->getType()->isVoidTy() && !Call
->use_empty()) {
1910 StringRef Name
= Call
->hasName() ? Call
->getName() : "";
1911 CallInst
*GCResult
= Builder
.CreateGCResult(Token
, Call
->getType(), Name
);
1912 GCResult
->setAttributes(
1913 AttributeList::get(GCResult
->getContext(), AttributeList::ReturnIndex
,
1914 Call
->getAttributes().getRetAttrs()));
1916 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1917 // live set of some other safepoint, in which case that safepoint's
1918 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1919 // llvm::Instruction. Instead, we defer the replacement and deletion to
1920 // after the live sets have been made explicit in the IR, and we no longer
1921 // have raw pointers to worry about.
1922 Replacements
.emplace_back(
1923 DeferredReplacement::createRAUW(Call
, GCResult
));
1925 Replacements
.emplace_back(DeferredReplacement::createDelete(Call
));
1929 Result
.StatepointToken
= Token
;
1931 // Second, create a gc.relocate for every live variable
1932 CreateGCRelocates(LiveVariables
, BasePtrs
, Token
, Builder
, GC
);
1935 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1936 // which make the relocations happening at this safepoint explicit.
1938 // WARNING: Does not do any fixup to adjust users of the original live
1939 // values. That's the callers responsibility.
1941 makeStatepointExplicit(DominatorTree
&DT
, CallBase
*Call
,
1942 PartiallyConstructedSafepointRecord
&Result
,
1943 std::vector
<DeferredReplacement
> &Replacements
,
1944 const PointerToBaseTy
&PointerToBase
, GCStrategy
*GC
) {
1945 const auto &LiveSet
= Result
.LiveSet
;
1947 // Convert to vector for efficient cross referencing.
1948 SmallVector
<Value
*, 64> BaseVec
, LiveVec
;
1949 LiveVec
.reserve(LiveSet
.size());
1950 BaseVec
.reserve(LiveSet
.size());
1951 for (Value
*L
: LiveSet
) {
1952 LiveVec
.push_back(L
);
1953 assert(PointerToBase
.count(L
));
1954 Value
*Base
= PointerToBase
.find(L
)->second
;
1955 BaseVec
.push_back(Base
);
1957 assert(LiveVec
.size() == BaseVec
.size());
1959 // Do the actual rewriting and delete the old statepoint
1960 makeStatepointExplicitImpl(Call
, BaseVec
, LiveVec
, Result
, Replacements
,
1964 // Helper function for the relocationViaAlloca.
1966 // It receives iterator to the statepoint gc relocates and emits a store to the
1967 // assigned location (via allocaMap) for the each one of them. It adds the
1968 // visited values into the visitedLiveValues set, which we will later use them
1969 // for validation checking.
1971 insertRelocationStores(iterator_range
<Value::user_iterator
> GCRelocs
,
1972 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1973 DenseSet
<Value
*> &VisitedLiveValues
) {
1974 for (User
*U
: GCRelocs
) {
1975 GCRelocateInst
*Relocate
= dyn_cast
<GCRelocateInst
>(U
);
1979 Value
*OriginalValue
= Relocate
->getDerivedPtr();
1980 assert(AllocaMap
.count(OriginalValue
));
1981 Value
*Alloca
= AllocaMap
[OriginalValue
];
1983 // Emit store into the related alloca.
1984 assert(Relocate
->getNextNode() &&
1985 "Should always have one since it's not a terminator");
1986 new StoreInst(Relocate
, Alloca
, std::next(Relocate
->getIterator()));
1989 VisitedLiveValues
.insert(OriginalValue
);
1994 // Helper function for the "relocationViaAlloca". Similar to the
1995 // "insertRelocationStores" but works for rematerialized values.
1996 static void insertRematerializationStores(
1997 const RematerializedValueMapTy
&RematerializedValues
,
1998 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1999 DenseSet
<Value
*> &VisitedLiveValues
) {
2000 for (auto RematerializedValuePair
: RematerializedValues
) {
2001 Instruction
*RematerializedValue
= RematerializedValuePair
.first
;
2002 Value
*OriginalValue
= RematerializedValuePair
.second
;
2004 assert(AllocaMap
.count(OriginalValue
) &&
2005 "Can not find alloca for rematerialized value");
2006 Value
*Alloca
= AllocaMap
[OriginalValue
];
2008 new StoreInst(RematerializedValue
, Alloca
,
2009 std::next(RematerializedValue
->getIterator()));
2012 VisitedLiveValues
.insert(OriginalValue
);
2017 /// Do all the relocation update via allocas and mem2reg
2018 static void relocationViaAlloca(
2019 Function
&F
, DominatorTree
&DT
, ArrayRef
<Value
*> Live
,
2020 ArrayRef
<PartiallyConstructedSafepointRecord
> Records
) {
2022 // record initial number of (static) allocas; we'll check we have the same
2023 // number when we get done.
2024 int InitialAllocaNum
= 0;
2025 for (Instruction
&I
: F
.getEntryBlock())
2026 if (isa
<AllocaInst
>(I
))
2030 // TODO-PERF: change data structures, reserve
2031 DenseMap
<Value
*, AllocaInst
*> AllocaMap
;
2032 SmallVector
<AllocaInst
*, 200> PromotableAllocas
;
2033 // Used later to chack that we have enough allocas to store all values
2034 std::size_t NumRematerializedValues
= 0;
2035 PromotableAllocas
.reserve(Live
.size());
2037 // Emit alloca for "LiveValue" and record it in "allocaMap" and
2038 // "PromotableAllocas"
2039 const DataLayout
&DL
= F
.getDataLayout();
2040 auto emitAllocaFor
= [&](Value
*LiveValue
) {
2041 AllocaInst
*Alloca
=
2042 new AllocaInst(LiveValue
->getType(), DL
.getAllocaAddrSpace(), "",
2043 F
.getEntryBlock().getFirstNonPHIIt());
2044 AllocaMap
[LiveValue
] = Alloca
;
2045 PromotableAllocas
.push_back(Alloca
);
2048 // Emit alloca for each live gc pointer
2049 for (Value
*V
: Live
)
2052 // Emit allocas for rematerialized values
2053 for (const auto &Info
: Records
)
2054 for (auto RematerializedValuePair
: Info
.RematerializedValues
) {
2055 Value
*OriginalValue
= RematerializedValuePair
.second
;
2056 if (AllocaMap
.contains(OriginalValue
))
2059 emitAllocaFor(OriginalValue
);
2060 ++NumRematerializedValues
;
2063 // The next two loops are part of the same conceptual operation. We need to
2064 // insert a store to the alloca after the original def and at each
2065 // redefinition. We need to insert a load before each use. These are split
2066 // into distinct loops for performance reasons.
2068 // Update gc pointer after each statepoint: either store a relocated value or
2069 // null (if no relocated value was found for this gc pointer and it is not a
2070 // gc_result). This must happen before we update the statepoint with load of
2071 // alloca otherwise we lose the link between statepoint and old def.
2072 for (const auto &Info
: Records
) {
2073 Value
*Statepoint
= Info
.StatepointToken
;
2075 // This will be used for consistency check
2076 DenseSet
<Value
*> VisitedLiveValues
;
2078 // Insert stores for normal statepoint gc relocates
2079 insertRelocationStores(Statepoint
->users(), AllocaMap
, VisitedLiveValues
);
2081 // In case if it was invoke statepoint
2082 // we will insert stores for exceptional path gc relocates.
2083 if (isa
<InvokeInst
>(Statepoint
)) {
2084 insertRelocationStores(Info
.UnwindToken
->users(), AllocaMap
,
2088 // Do similar thing with rematerialized values
2089 insertRematerializationStores(Info
.RematerializedValues
, AllocaMap
,
2092 if (ClobberNonLive
) {
2093 // As a debugging aid, pretend that an unrelocated pointer becomes null at
2094 // the gc.statepoint. This will turn some subtle GC problems into
2095 // slightly easier to debug SEGVs. Note that on large IR files with
2096 // lots of gc.statepoints this is extremely costly both memory and time
2098 SmallVector
<AllocaInst
*, 64> ToClobber
;
2099 for (auto Pair
: AllocaMap
) {
2100 Value
*Def
= Pair
.first
;
2101 AllocaInst
*Alloca
= Pair
.second
;
2103 // This value was relocated
2104 if (VisitedLiveValues
.count(Def
)) {
2107 ToClobber
.push_back(Alloca
);
2110 auto InsertClobbersAt
= [&](BasicBlock::iterator IP
) {
2111 for (auto *AI
: ToClobber
) {
2112 auto AT
= AI
->getAllocatedType();
2114 if (AT
->isVectorTy())
2115 CPN
= ConstantAggregateZero::get(AT
);
2117 CPN
= ConstantPointerNull::get(cast
<PointerType
>(AT
));
2118 new StoreInst(CPN
, AI
, IP
);
2122 // Insert the clobbering stores. These may get intermixed with the
2123 // gc.results and gc.relocates, but that's fine.
2124 if (auto II
= dyn_cast
<InvokeInst
>(Statepoint
)) {
2125 InsertClobbersAt(II
->getNormalDest()->getFirstInsertionPt());
2126 InsertClobbersAt(II
->getUnwindDest()->getFirstInsertionPt());
2129 std::next(cast
<Instruction
>(Statepoint
)->getIterator()));
2134 // Update use with load allocas and add store for gc_relocated.
2135 for (auto Pair
: AllocaMap
) {
2136 Value
*Def
= Pair
.first
;
2137 AllocaInst
*Alloca
= Pair
.second
;
2139 // We pre-record the uses of allocas so that we dont have to worry about
2140 // later update that changes the user information..
2142 SmallVector
<Instruction
*, 20> Uses
;
2143 // PERF: trade a linear scan for repeated reallocation
2144 Uses
.reserve(Def
->getNumUses());
2145 for (User
*U
: Def
->users()) {
2146 if (!isa
<ConstantExpr
>(U
)) {
2147 // If the def has a ConstantExpr use, then the def is either a
2148 // ConstantExpr use itself or null. In either case
2149 // (recursively in the first, directly in the second), the oop
2150 // it is ultimately dependent on is null and this particular
2151 // use does not need to be fixed up.
2152 Uses
.push_back(cast
<Instruction
>(U
));
2157 auto Last
= llvm::unique(Uses
);
2158 Uses
.erase(Last
, Uses
.end());
2160 for (Instruction
*Use
: Uses
) {
2161 if (isa
<PHINode
>(Use
)) {
2162 PHINode
*Phi
= cast
<PHINode
>(Use
);
2163 for (unsigned i
= 0; i
< Phi
->getNumIncomingValues(); i
++) {
2164 if (Def
== Phi
->getIncomingValue(i
)) {
2165 LoadInst
*Load
= new LoadInst(
2166 Alloca
->getAllocatedType(), Alloca
, "",
2167 Phi
->getIncomingBlock(i
)->getTerminator()->getIterator());
2168 Phi
->setIncomingValue(i
, Load
);
2172 LoadInst
*Load
= new LoadInst(Alloca
->getAllocatedType(), Alloca
, "",
2173 Use
->getIterator());
2174 Use
->replaceUsesOfWith(Def
, Load
);
2178 // Emit store for the initial gc value. Store must be inserted after load,
2179 // otherwise store will be in alloca's use list and an extra load will be
2180 // inserted before it.
2181 StoreInst
*Store
= new StoreInst(Def
, Alloca
, /*volatile*/ false,
2182 DL
.getABITypeAlign(Def
->getType()));
2183 if (Instruction
*Inst
= dyn_cast
<Instruction
>(Def
)) {
2184 if (InvokeInst
*Invoke
= dyn_cast
<InvokeInst
>(Inst
)) {
2185 // InvokeInst is a terminator so the store need to be inserted into its
2186 // normal destination block.
2187 BasicBlock
*NormalDest
= Invoke
->getNormalDest();
2188 Store
->insertBefore(NormalDest
->getFirstNonPHI());
2190 assert(!Inst
->isTerminator() &&
2191 "The only terminator that can produce a value is "
2192 "InvokeInst which is handled above.");
2193 Store
->insertAfter(Inst
);
2196 assert(isa
<Argument
>(Def
));
2197 Store
->insertAfter(cast
<Instruction
>(Alloca
));
2201 assert(PromotableAllocas
.size() == Live
.size() + NumRematerializedValues
&&
2202 "we must have the same allocas with lives");
2203 (void) NumRematerializedValues
;
2204 if (!PromotableAllocas
.empty()) {
2205 // Apply mem2reg to promote alloca to SSA
2206 PromoteMemToReg(PromotableAllocas
, DT
);
2210 for (auto &I
: F
.getEntryBlock())
2211 if (isa
<AllocaInst
>(I
))
2213 assert(InitialAllocaNum
== 0 && "We must not introduce any extra allocas");
2217 /// Implement a unique function which doesn't require we sort the input
2218 /// vector. Doing so has the effect of changing the output of a couple of
2219 /// tests in ways which make them less useful in testing fused safepoints.
2220 template <typename T
> static void unique_unsorted(SmallVectorImpl
<T
> &Vec
) {
2221 SmallSet
<T
, 8> Seen
;
2222 erase_if(Vec
, [&](const T
&V
) { return !Seen
.insert(V
).second
; });
2225 /// Insert holders so that each Value is obviously live through the entire
2226 /// lifetime of the call.
2227 static void insertUseHolderAfter(CallBase
*Call
, const ArrayRef
<Value
*> Values
,
2228 SmallVectorImpl
<CallInst
*> &Holders
) {
2230 // No values to hold live, might as well not insert the empty holder
2233 Module
*M
= Call
->getModule();
2234 // Use a dummy vararg function to actually hold the values live
2235 FunctionCallee Func
= M
->getOrInsertFunction(
2236 "__tmp_use", FunctionType::get(Type::getVoidTy(M
->getContext()), true));
2237 if (isa
<CallInst
>(Call
)) {
2238 // For call safepoints insert dummy calls right after safepoint
2240 CallInst::Create(Func
, Values
, "", std::next(Call
->getIterator())));
2243 // For invoke safepooints insert dummy calls both in normal and
2244 // exceptional destination blocks
2245 auto *II
= cast
<InvokeInst
>(Call
);
2246 Holders
.push_back(CallInst::Create(
2247 Func
, Values
, "", II
->getNormalDest()->getFirstInsertionPt()));
2248 Holders
.push_back(CallInst::Create(
2249 Func
, Values
, "", II
->getUnwindDest()->getFirstInsertionPt()));
2252 static void findLiveReferences(
2253 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
2254 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
,
2256 GCPtrLivenessData OriginalLivenessData
;
2257 computeLiveInValues(DT
, F
, OriginalLivenessData
, GC
);
2258 for (size_t i
= 0; i
< records
.size(); i
++) {
2259 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
2260 analyzeParsePointLiveness(DT
, OriginalLivenessData
, toUpdate
[i
], info
, GC
);
2264 // Helper function for the "rematerializeLiveValues". It walks use chain
2265 // starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2266 // the base or a value it cannot process. Only "simple" values are processed
2267 // (currently it is GEP's and casts). The returned root is examined by the
2268 // callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2269 // with all visited values.
2270 static Value
* findRematerializableChainToBasePointer(
2271 SmallVectorImpl
<Instruction
*> &ChainToBase
,
2272 Value
*CurrentValue
) {
2273 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(CurrentValue
)) {
2274 ChainToBase
.push_back(GEP
);
2275 return findRematerializableChainToBasePointer(ChainToBase
,
2276 GEP
->getPointerOperand());
2279 if (CastInst
*CI
= dyn_cast
<CastInst
>(CurrentValue
)) {
2280 if (!CI
->isNoopCast(CI
->getDataLayout()))
2283 ChainToBase
.push_back(CI
);
2284 return findRematerializableChainToBasePointer(ChainToBase
,
2288 // We have reached the root of the chain, which is either equal to the base or
2289 // is the first unsupported value along the use chain.
2290 return CurrentValue
;
2293 // Helper function for the "rematerializeLiveValues". Compute cost of the use
2294 // chain we are going to rematerialize.
2295 static InstructionCost
2296 chainToBasePointerCost(SmallVectorImpl
<Instruction
*> &Chain
,
2297 TargetTransformInfo
&TTI
) {
2298 InstructionCost Cost
= 0;
2300 for (Instruction
*Instr
: Chain
) {
2301 if (CastInst
*CI
= dyn_cast
<CastInst
>(Instr
)) {
2302 assert(CI
->isNoopCast(CI
->getDataLayout()) &&
2303 "non noop cast is found during rematerialization");
2305 Type
*SrcTy
= CI
->getOperand(0)->getType();
2306 Cost
+= TTI
.getCastInstrCost(CI
->getOpcode(), CI
->getType(), SrcTy
,
2307 TTI::getCastContextHint(CI
),
2308 TargetTransformInfo::TCK_SizeAndLatency
, CI
);
2310 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Instr
)) {
2311 // Cost of the address calculation
2312 Type
*ValTy
= GEP
->getSourceElementType();
2313 Cost
+= TTI
.getAddressComputationCost(ValTy
);
2315 // And cost of the GEP itself
2316 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2317 // allowed for the external usage)
2318 if (!GEP
->hasAllConstantIndices())
2322 llvm_unreachable("unsupported instruction type during rematerialization");
2329 static bool AreEquivalentPhiNodes(PHINode
&OrigRootPhi
, PHINode
&AlternateRootPhi
) {
2330 unsigned PhiNum
= OrigRootPhi
.getNumIncomingValues();
2331 if (PhiNum
!= AlternateRootPhi
.getNumIncomingValues() ||
2332 OrigRootPhi
.getParent() != AlternateRootPhi
.getParent())
2334 // Map of incoming values and their corresponding basic blocks of
2336 SmallDenseMap
<Value
*, BasicBlock
*, 8> CurrentIncomingValues
;
2337 for (unsigned i
= 0; i
< PhiNum
; i
++)
2338 CurrentIncomingValues
[OrigRootPhi
.getIncomingValue(i
)] =
2339 OrigRootPhi
.getIncomingBlock(i
);
2341 // Both current and base PHIs should have same incoming values and
2342 // the same basic blocks corresponding to the incoming values.
2343 for (unsigned i
= 0; i
< PhiNum
; i
++) {
2345 CurrentIncomingValues
.find(AlternateRootPhi
.getIncomingValue(i
));
2346 if (CIVI
== CurrentIncomingValues
.end())
2348 BasicBlock
*CurrentIncomingBB
= CIVI
->second
;
2349 if (CurrentIncomingBB
!= AlternateRootPhi
.getIncomingBlock(i
))
2355 // Find derived pointers that can be recomputed cheap enough and fill
2356 // RematerizationCandidates with such candidates.
2358 findRematerializationCandidates(PointerToBaseTy PointerToBase
,
2359 RematCandTy
&RematerizationCandidates
,
2360 TargetTransformInfo
&TTI
) {
2361 const unsigned int ChainLengthThreshold
= 10;
2363 for (auto P2B
: PointerToBase
) {
2364 auto *Derived
= P2B
.first
;
2365 auto *Base
= P2B
.second
;
2366 // Consider only derived pointers.
2367 if (Derived
== Base
)
2370 // For each live pointer find its defining chain.
2371 SmallVector
<Instruction
*, 3> ChainToBase
;
2372 Value
*RootOfChain
=
2373 findRematerializableChainToBasePointer(ChainToBase
, Derived
);
2375 // Nothing to do, or chain is too long
2376 if ( ChainToBase
.size() == 0 ||
2377 ChainToBase
.size() > ChainLengthThreshold
)
2380 // Handle the scenario where the RootOfChain is not equal to the
2381 // Base Value, but they are essentially the same phi values.
2382 if (RootOfChain
!= PointerToBase
[Derived
]) {
2383 PHINode
*OrigRootPhi
= dyn_cast
<PHINode
>(RootOfChain
);
2384 PHINode
*AlternateRootPhi
= dyn_cast
<PHINode
>(PointerToBase
[Derived
]);
2385 if (!OrigRootPhi
|| !AlternateRootPhi
)
2387 // PHI nodes that have the same incoming values, and belonging to the same
2388 // basic blocks are essentially the same SSA value. When the original phi
2389 // has incoming values with different base pointers, the original phi is
2390 // marked as conflict, and an additional `AlternateRootPhi` with the same
2391 // incoming values get generated by the findBasePointer function. We need
2392 // to identify the newly generated AlternateRootPhi (.base version of phi)
2393 // and RootOfChain (the original phi node itself) are the same, so that we
2394 // can rematerialize the gep and casts. This is a workaround for the
2395 // deficiency in the findBasePointer algorithm.
2396 if (!AreEquivalentPhiNodes(*OrigRootPhi
, *AlternateRootPhi
))
2399 // Compute cost of this chain.
2400 InstructionCost Cost
= chainToBasePointerCost(ChainToBase
, TTI
);
2401 // TODO: We can also account for cases when we will be able to remove some
2402 // of the rematerialized values by later optimization passes. I.e if
2403 // we rematerialized several intersecting chains. Or if original values
2404 // don't have any uses besides this statepoint.
2406 // Ok, there is a candidate.
2407 RematerizlizationCandidateRecord Record
;
2408 Record
.ChainToBase
= ChainToBase
;
2409 Record
.RootOfChain
= RootOfChain
;
2411 RematerizationCandidates
.insert({ Derived
, Record
});
2415 // Try to rematerialize derived pointers immediately before their uses
2416 // (instead of rematerializing after every statepoint it is live through).
2417 // This can be beneficial when derived pointer is live across many
2418 // statepoints, but uses are rare.
2419 static void rematerializeLiveValuesAtUses(
2420 RematCandTy
&RematerizationCandidates
,
2421 MutableArrayRef
<PartiallyConstructedSafepointRecord
> Records
,
2422 PointerToBaseTy
&PointerToBase
) {
2423 if (!RematDerivedAtUses
)
2426 SmallVector
<Instruction
*, 32> LiveValuesToBeDeleted
;
2428 LLVM_DEBUG(dbgs() << "Rematerialize derived pointers at uses, "
2429 << "Num statepoints: " << Records
.size() << '\n');
2431 for (auto &It
: RematerizationCandidates
) {
2432 Instruction
*Cand
= cast
<Instruction
>(It
.first
);
2433 auto &Record
= It
.second
;
2435 if (Record
.Cost
>= RematerializationThreshold
)
2438 if (Cand
->user_empty())
2441 if (Cand
->hasOneUse())
2442 if (auto *U
= dyn_cast
<Instruction
>(Cand
->getUniqueUndroppableUser()))
2443 if (U
->getParent() == Cand
->getParent())
2446 // Rematerialization before PHI nodes is not implemented.
2447 if (llvm::any_of(Cand
->users(),
2448 [](const auto *U
) { return isa
<PHINode
>(U
); }))
2451 LLVM_DEBUG(dbgs() << "Trying cand " << *Cand
<< " ... ");
2453 // Count of rematerialization instructions we introduce is equal to number
2454 // of candidate uses.
2455 // Count of rematerialization instructions we eliminate is equal to number
2456 // of statepoints it is live through.
2457 // Consider transformation profitable if latter is greater than former
2458 // (in other words, we create less than eliminate).
2459 unsigned NumLiveStatepoints
= llvm::count_if(
2460 Records
, [Cand
](const auto &R
) { return R
.LiveSet
.contains(Cand
); });
2461 unsigned NumUses
= Cand
->getNumUses();
2463 LLVM_DEBUG(dbgs() << "Num uses: " << NumUses
<< " Num live statepoints: "
2464 << NumLiveStatepoints
<< " ");
2466 if (NumLiveStatepoints
< NumUses
) {
2467 LLVM_DEBUG(dbgs() << "not profitable\n");
2471 // If rematerialization is 'free', then favor rematerialization at
2472 // uses as it generally shortens live ranges.
2473 // TODO: Short (size ==1) chains only?
2474 if (NumLiveStatepoints
== NumUses
&& Record
.Cost
> 0) {
2475 LLVM_DEBUG(dbgs() << "not profitable\n");
2479 LLVM_DEBUG(dbgs() << "looks profitable\n");
2481 // ChainToBase may contain another remat candidate (as a sub chain) which
2482 // has been rewritten by now. Need to recollect chain to have up to date
2484 // TODO: sort records in findRematerializationCandidates() in
2485 // decreasing chain size order?
2486 if (Record
.ChainToBase
.size() > 1) {
2487 Record
.ChainToBase
.clear();
2488 findRematerializableChainToBasePointer(Record
.ChainToBase
, Cand
);
2491 // Current rematerialization algorithm is very simple: we rematerialize
2492 // immediately before EVERY use, even if there are several uses in same
2493 // block or if use is local to Cand Def. The reason is that this allows
2494 // us to avoid recomputing liveness without complicated analysis:
2495 // - If we did not eliminate all uses of original Candidate, we do not
2496 // know exaclty in what BBs it is still live.
2497 // - If we rematerialize once per BB, we need to find proper insertion
2498 // place (first use in block, but after Def) and analyze if there is
2499 // statepoint between uses in the block.
2500 while (!Cand
->user_empty()) {
2501 Instruction
*UserI
= cast
<Instruction
>(*Cand
->user_begin());
2502 Instruction
*RematChain
= rematerializeChain(
2503 Record
.ChainToBase
, UserI
, Record
.RootOfChain
, PointerToBase
[Cand
]);
2504 UserI
->replaceUsesOfWith(Cand
, RematChain
);
2505 PointerToBase
[RematChain
] = PointerToBase
[Cand
];
2507 LiveValuesToBeDeleted
.push_back(Cand
);
2510 LLVM_DEBUG(dbgs() << "Rematerialized " << LiveValuesToBeDeleted
.size()
2511 << " derived pointers\n");
2512 for (auto *Cand
: LiveValuesToBeDeleted
) {
2513 assert(Cand
->use_empty() && "Unexpected user remain");
2514 RematerizationCandidates
.erase(Cand
);
2515 for (auto &R
: Records
) {
2516 assert(!R
.LiveSet
.contains(Cand
) ||
2517 R
.LiveSet
.contains(PointerToBase
[Cand
]));
2518 R
.LiveSet
.remove(Cand
);
2522 // Recollect not rematerialized chains - we might have rewritten
2523 // their sub-chains.
2524 if (!LiveValuesToBeDeleted
.empty()) {
2525 for (auto &P
: RematerizationCandidates
) {
2527 if (R
.ChainToBase
.size() > 1) {
2528 R
.ChainToBase
.clear();
2529 findRematerializableChainToBasePointer(R
.ChainToBase
, P
.first
);
2535 // From the statepoint live set pick values that are cheaper to recompute then
2536 // to relocate. Remove this values from the live set, rematerialize them after
2537 // statepoint and record them in "Info" structure. Note that similar to
2538 // relocated values we don't do any user adjustments here.
2539 static void rematerializeLiveValues(CallBase
*Call
,
2540 PartiallyConstructedSafepointRecord
&Info
,
2541 PointerToBaseTy
&PointerToBase
,
2542 RematCandTy
&RematerizationCandidates
,
2543 TargetTransformInfo
&TTI
) {
2544 // Record values we are going to delete from this statepoint live set.
2545 // We can not di this in following loop due to iterator invalidation.
2546 SmallVector
<Value
*, 32> LiveValuesToBeDeleted
;
2548 for (Value
*LiveValue
: Info
.LiveSet
) {
2549 auto It
= RematerizationCandidates
.find(LiveValue
);
2550 if (It
== RematerizationCandidates
.end())
2553 RematerizlizationCandidateRecord
&Record
= It
->second
;
2555 InstructionCost Cost
= Record
.Cost
;
2556 // For invokes we need to rematerialize each chain twice - for normal and
2557 // for unwind basic blocks. Model this by multiplying cost by two.
2558 if (isa
<InvokeInst
>(Call
))
2561 // If it's too expensive - skip it.
2562 if (Cost
>= RematerializationThreshold
)
2565 // Remove value from the live set
2566 LiveValuesToBeDeleted
.push_back(LiveValue
);
2568 // Clone instructions and record them inside "Info" structure.
2570 // Different cases for calls and invokes. For invokes we need to clone
2571 // instructions both on normal and unwind path.
2572 if (isa
<CallInst
>(Call
)) {
2573 Instruction
*InsertBefore
= Call
->getNextNode();
2574 assert(InsertBefore
);
2575 Instruction
*RematerializedValue
=
2576 rematerializeChain(Record
.ChainToBase
, InsertBefore
,
2577 Record
.RootOfChain
, PointerToBase
[LiveValue
]);
2578 Info
.RematerializedValues
[RematerializedValue
] = LiveValue
;
2580 auto *Invoke
= cast
<InvokeInst
>(Call
);
2582 Instruction
*NormalInsertBefore
=
2583 &*Invoke
->getNormalDest()->getFirstInsertionPt();
2584 Instruction
*UnwindInsertBefore
=
2585 &*Invoke
->getUnwindDest()->getFirstInsertionPt();
2587 Instruction
*NormalRematerializedValue
=
2588 rematerializeChain(Record
.ChainToBase
, NormalInsertBefore
,
2589 Record
.RootOfChain
, PointerToBase
[LiveValue
]);
2590 Instruction
*UnwindRematerializedValue
=
2591 rematerializeChain(Record
.ChainToBase
, UnwindInsertBefore
,
2592 Record
.RootOfChain
, PointerToBase
[LiveValue
]);
2594 Info
.RematerializedValues
[NormalRematerializedValue
] = LiveValue
;
2595 Info
.RematerializedValues
[UnwindRematerializedValue
] = LiveValue
;
2599 // Remove rematerialized values from the live set.
2600 for (auto *LiveValue
: LiveValuesToBeDeleted
) {
2601 Info
.LiveSet
.remove(LiveValue
);
2605 static bool inlineGetBaseAndOffset(Function
&F
,
2606 SmallVectorImpl
<CallInst
*> &Intrinsics
,
2607 DefiningValueMapTy
&DVCache
,
2608 IsKnownBaseMapTy
&KnownBases
) {
2609 auto &Context
= F
.getContext();
2610 auto &DL
= F
.getDataLayout();
2611 bool Changed
= false;
2613 for (auto *Callsite
: Intrinsics
)
2614 switch (Callsite
->getIntrinsicID()) {
2615 case Intrinsic::experimental_gc_get_pointer_base
: {
2618 findBasePointer(Callsite
->getOperand(0), DVCache
, KnownBases
);
2619 assert(!DVCache
.count(Callsite
));
2620 Callsite
->replaceAllUsesWith(Base
);
2621 if (!Base
->hasName())
2622 Base
->takeName(Callsite
);
2623 Callsite
->eraseFromParent();
2626 case Intrinsic::experimental_gc_get_pointer_offset
: {
2628 Value
*Derived
= Callsite
->getOperand(0);
2629 Value
*Base
= findBasePointer(Derived
, DVCache
, KnownBases
);
2630 assert(!DVCache
.count(Callsite
));
2631 unsigned AddressSpace
= Derived
->getType()->getPointerAddressSpace();
2632 unsigned IntPtrSize
= DL
.getPointerSizeInBits(AddressSpace
);
2633 IRBuilder
<> Builder(Callsite
);
2635 Builder
.CreatePtrToInt(Base
, Type::getIntNTy(Context
, IntPtrSize
),
2636 suffixed_name_or(Base
, ".int", ""));
2638 Builder
.CreatePtrToInt(Derived
, Type::getIntNTy(Context
, IntPtrSize
),
2639 suffixed_name_or(Derived
, ".int", ""));
2640 Value
*Offset
= Builder
.CreateSub(DerivedInt
, BaseInt
);
2641 Callsite
->replaceAllUsesWith(Offset
);
2642 Offset
->takeName(Callsite
);
2643 Callsite
->eraseFromParent();
2647 llvm_unreachable("Unknown intrinsic");
2653 static bool insertParsePoints(Function
&F
, DominatorTree
&DT
,
2654 TargetTransformInfo
&TTI
,
2655 SmallVectorImpl
<CallBase
*> &ToUpdate
,
2656 DefiningValueMapTy
&DVCache
,
2657 IsKnownBaseMapTy
&KnownBases
) {
2658 std::unique_ptr
<GCStrategy
> GC
= findGCStrategy(F
);
2661 // Validate the input
2662 std::set
<CallBase
*> Uniqued
;
2663 Uniqued
.insert(ToUpdate
.begin(), ToUpdate
.end());
2664 assert(Uniqued
.size() == ToUpdate
.size() && "no duplicates please!");
2666 for (CallBase
*Call
: ToUpdate
)
2667 assert(Call
->getFunction() == &F
);
2670 // When inserting gc.relocates for invokes, we need to be able to insert at
2671 // the top of the successor blocks. See the comment on
2672 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2673 // may restructure the CFG.
2674 for (CallBase
*Call
: ToUpdate
) {
2675 auto *II
= dyn_cast
<InvokeInst
>(Call
);
2678 normalizeForInvokeSafepoint(II
->getNormalDest(), II
->getParent(), DT
);
2679 normalizeForInvokeSafepoint(II
->getUnwindDest(), II
->getParent(), DT
);
2682 // A list of dummy calls added to the IR to keep various values obviously
2683 // live in the IR. We'll remove all of these when done.
2684 SmallVector
<CallInst
*, 64> Holders
;
2686 // Insert a dummy call with all of the deopt operands we'll need for the
2687 // actual safepoint insertion as arguments. This ensures reference operands
2688 // in the deopt argument list are considered live through the safepoint (and
2689 // thus makes sure they get relocated.)
2690 for (CallBase
*Call
: ToUpdate
) {
2691 SmallVector
<Value
*, 64> DeoptValues
;
2693 for (Value
*Arg
: GetDeoptBundleOperands(Call
)) {
2694 assert(!isUnhandledGCPointerType(Arg
->getType(), GC
.get()) &&
2695 "support for FCA unimplemented");
2696 if (isHandledGCPointerType(Arg
->getType(), GC
.get()))
2697 DeoptValues
.push_back(Arg
);
2700 insertUseHolderAfter(Call
, DeoptValues
, Holders
);
2703 SmallVector
<PartiallyConstructedSafepointRecord
, 64> Records(ToUpdate
.size());
2705 // A) Identify all gc pointers which are statically live at the given call
2707 findLiveReferences(F
, DT
, ToUpdate
, Records
, GC
.get());
2709 /// Global mapping from live pointers to a base-defining-value.
2710 PointerToBaseTy PointerToBase
;
2712 // B) Find the base pointers for each live pointer
2713 for (size_t i
= 0; i
< Records
.size(); i
++) {
2714 PartiallyConstructedSafepointRecord
&info
= Records
[i
];
2715 findBasePointers(DT
, DVCache
, ToUpdate
[i
], info
, PointerToBase
, KnownBases
);
2717 if (PrintBasePointers
) {
2718 errs() << "Base Pairs (w/o Relocation):\n";
2719 for (auto &Pair
: PointerToBase
) {
2720 errs() << " derived ";
2721 Pair
.first
->printAsOperand(errs(), false);
2723 Pair
.second
->printAsOperand(errs(), false);
2729 // The base phi insertion logic (for any safepoint) may have inserted new
2730 // instructions which are now live at some safepoint. The simplest such
2733 // phi a <-- will be a new base_phi here
2734 // safepoint 1 <-- that needs to be live here
2738 // We insert some dummy calls after each safepoint to definitely hold live
2739 // the base pointers which were identified for that safepoint. We'll then
2740 // ask liveness for _every_ base inserted to see what is now live. Then we
2741 // remove the dummy calls.
2742 Holders
.reserve(Holders
.size() + Records
.size());
2743 for (size_t i
= 0; i
< Records
.size(); i
++) {
2744 PartiallyConstructedSafepointRecord
&Info
= Records
[i
];
2746 SmallVector
<Value
*, 128> Bases
;
2747 for (auto *Derived
: Info
.LiveSet
) {
2748 assert(PointerToBase
.count(Derived
) && "Missed base for derived pointer");
2749 Bases
.push_back(PointerToBase
[Derived
]);
2752 insertUseHolderAfter(ToUpdate
[i
], Bases
, Holders
);
2755 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2756 // need to rerun liveness. We may *also* have inserted new defs, but that's
2757 // not the key issue.
2758 recomputeLiveInValues(F
, DT
, ToUpdate
, Records
, PointerToBase
, GC
.get());
2760 if (PrintBasePointers
) {
2761 errs() << "Base Pairs: (w/Relocation)\n";
2762 for (auto Pair
: PointerToBase
) {
2763 errs() << " derived ";
2764 Pair
.first
->printAsOperand(errs(), false);
2766 Pair
.second
->printAsOperand(errs(), false);
2771 // It is possible that non-constant live variables have a constant base. For
2772 // example, a GEP with a variable offset from a global. In this case we can
2773 // remove it from the liveset. We already don't add constants to the liveset
2774 // because we assume they won't move at runtime and the GC doesn't need to be
2775 // informed about them. The same reasoning applies if the base is constant.
2776 // Note that the relocation placement code relies on this filtering for
2777 // correctness as it expects the base to be in the liveset, which isn't true
2778 // if the base is constant.
2779 for (auto &Info
: Records
) {
2780 Info
.LiveSet
.remove_if([&](Value
*LiveV
) {
2781 assert(PointerToBase
.count(LiveV
) && "Missed base for derived pointer");
2782 return isa
<Constant
>(PointerToBase
[LiveV
]);
2786 for (CallInst
*CI
: Holders
)
2787 CI
->eraseFromParent();
2791 // Compute the cost of possible re-materialization of derived pointers.
2792 RematCandTy RematerizationCandidates
;
2793 findRematerializationCandidates(PointerToBase
, RematerizationCandidates
, TTI
);
2795 // In order to reduce live set of statepoint we might choose to rematerialize
2796 // some values instead of relocating them. This is purely an optimization and
2797 // does not influence correctness.
2798 // First try rematerialization at uses, then after statepoints.
2799 rematerializeLiveValuesAtUses(RematerizationCandidates
, Records
,
2801 for (size_t i
= 0; i
< Records
.size(); i
++)
2802 rematerializeLiveValues(ToUpdate
[i
], Records
[i
], PointerToBase
,
2803 RematerizationCandidates
, TTI
);
2805 // We need this to safely RAUW and delete call or invoke return values that
2806 // may themselves be live over a statepoint. For details, please see usage in
2807 // makeStatepointExplicitImpl.
2808 std::vector
<DeferredReplacement
> Replacements
;
2810 // Now run through and replace the existing statepoints with new ones with
2811 // the live variables listed. We do not yet update uses of the values being
2812 // relocated. We have references to live variables that need to
2813 // survive to the last iteration of this loop. (By construction, the
2814 // previous statepoint can not be a live variable, thus we can and remove
2815 // the old statepoint calls as we go.)
2816 for (size_t i
= 0; i
< Records
.size(); i
++)
2817 makeStatepointExplicit(DT
, ToUpdate
[i
], Records
[i
], Replacements
,
2818 PointerToBase
, GC
.get());
2820 ToUpdate
.clear(); // prevent accident use of invalid calls.
2822 for (auto &PR
: Replacements
)
2825 Replacements
.clear();
2827 for (auto &Info
: Records
) {
2828 // These live sets may contain state Value pointers, since we replaced calls
2829 // with operand bundles with calls wrapped in gc.statepoint, and some of
2830 // those calls may have been def'ing live gc pointers. Clear these out to
2831 // avoid accidentally using them.
2833 // TODO: We should create a separate data structure that does not contain
2834 // these live sets, and migrate to using that data structure from this point
2836 Info
.LiveSet
.clear();
2838 PointerToBase
.clear();
2840 // Do all the fixups of the original live variables to their relocated selves
2841 SmallVector
<Value
*, 128> Live
;
2842 for (const PartiallyConstructedSafepointRecord
&Info
: Records
) {
2843 // We can't simply save the live set from the original insertion. One of
2844 // the live values might be the result of a call which needs a safepoint.
2845 // That Value* no longer exists and we need to use the new gc_result.
2846 // Thankfully, the live set is embedded in the statepoint (and updated), so
2847 // we just grab that.
2848 llvm::append_range(Live
, Info
.StatepointToken
->gc_live());
2850 // Do some basic validation checking on our liveness results before
2851 // performing relocation. Relocation can and will turn mistakes in liveness
2852 // results into non-sensical code which is must harder to debug.
2853 // TODO: It would be nice to test consistency as well
2854 assert(DT
.isReachableFromEntry(Info
.StatepointToken
->getParent()) &&
2855 "statepoint must be reachable or liveness is meaningless");
2856 for (Value
*V
: Info
.StatepointToken
->gc_live()) {
2857 if (!isa
<Instruction
>(V
))
2858 // Non-instruction values trivial dominate all possible uses
2860 auto *LiveInst
= cast
<Instruction
>(V
);
2861 assert(DT
.isReachableFromEntry(LiveInst
->getParent()) &&
2862 "unreachable values should never be live");
2863 assert(DT
.dominates(LiveInst
, Info
.StatepointToken
) &&
2864 "basic SSA liveness expectation violated by liveness analysis");
2868 unique_unsorted(Live
);
2872 for (auto *Ptr
: Live
)
2873 assert(isHandledGCPointerType(Ptr
->getType(), GC
.get()) &&
2874 "must be a gc pointer type");
2877 relocationViaAlloca(F
, DT
, Live
, Records
);
2878 return !Records
.empty();
2881 // List of all parameter and return attributes which must be stripped when
2882 // lowering from the abstract machine model. Note that we list attributes
2883 // here which aren't valid as return attributes, that is okay.
2884 static AttributeMask
getParamAndReturnAttributesToRemove() {
2886 R
.addAttribute(Attribute::Dereferenceable
);
2887 R
.addAttribute(Attribute::DereferenceableOrNull
);
2888 R
.addAttribute(Attribute::ReadNone
);
2889 R
.addAttribute(Attribute::ReadOnly
);
2890 R
.addAttribute(Attribute::WriteOnly
);
2891 R
.addAttribute(Attribute::NoAlias
);
2892 R
.addAttribute(Attribute::NoFree
);
2896 static void stripNonValidAttributesFromPrototype(Function
&F
) {
2897 LLVMContext
&Ctx
= F
.getContext();
2899 // Intrinsics are very delicate. Lowering sometimes depends the presence
2900 // of certain attributes for correctness, but we may have also inferred
2901 // additional ones in the abstract machine model which need stripped. This
2902 // assumes that the attributes defined in Intrinsic.td are conservatively
2903 // correct for both physical and abstract model.
2904 if (Intrinsic::ID id
= F
.getIntrinsicID()) {
2905 F
.setAttributes(Intrinsic::getAttributes(Ctx
, id
));
2909 AttributeMask R
= getParamAndReturnAttributesToRemove();
2910 for (Argument
&A
: F
.args())
2911 if (isa
<PointerType
>(A
.getType()))
2912 F
.removeParamAttrs(A
.getArgNo(), R
);
2914 if (isa
<PointerType
>(F
.getReturnType()))
2915 F
.removeRetAttrs(R
);
2917 for (auto Attr
: FnAttrsToStrip
)
2918 F
.removeFnAttr(Attr
);
2921 /// Certain metadata on instructions are invalid after running RS4GC.
2922 /// Optimizations that run after RS4GC can incorrectly use this metadata to
2923 /// optimize functions. We drop such metadata on the instruction.
2924 static void stripInvalidMetadataFromInstruction(Instruction
&I
) {
2925 if (!isa
<LoadInst
>(I
) && !isa
<StoreInst
>(I
))
2927 // These are the attributes that are still valid on loads and stores after
2929 // The metadata implying dereferenceability and noalias are (conservatively)
2930 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2931 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2932 // touch the entire heap including noalias objects. Note: The reasoning is
2933 // same as stripping the dereferenceability and noalias attributes that are
2934 // analogous to the metadata counterparts.
2935 // We also drop the invariant.load metadata on the load because that metadata
2936 // implies the address operand to the load points to memory that is never
2937 // changed once it became dereferenceable. This is no longer true after RS4GC.
2938 // Similar reasoning applies to invariant.group metadata, which applies to
2939 // loads within a group.
2940 unsigned ValidMetadataAfterRS4GC
[] = {LLVMContext::MD_tbaa
,
2941 LLVMContext::MD_range
,
2942 LLVMContext::MD_alias_scope
,
2943 LLVMContext::MD_nontemporal
,
2944 LLVMContext::MD_nonnull
,
2945 LLVMContext::MD_align
,
2946 LLVMContext::MD_type
};
2948 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2949 I
.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC
);
2952 static void stripNonValidDataFromBody(Function
&F
) {
2956 LLVMContext
&Ctx
= F
.getContext();
2957 MDBuilder
Builder(Ctx
);
2959 // Set of invariantstart instructions that we need to remove.
2960 // Use this to avoid invalidating the instruction iterator.
2961 SmallVector
<IntrinsicInst
*, 12> InvariantStartInstructions
;
2963 for (Instruction
&I
: instructions(F
)) {
2964 // invariant.start on memory location implies that the referenced memory
2965 // location is constant and unchanging. This is no longer true after
2966 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2967 // which frees the entire heap and the presence of invariant.start allows
2968 // the optimizer to sink the load of a memory location past a statepoint,
2969 // which is incorrect.
2970 if (auto *II
= dyn_cast
<IntrinsicInst
>(&I
))
2971 if (II
->getIntrinsicID() == Intrinsic::invariant_start
) {
2972 InvariantStartInstructions
.push_back(II
);
2976 if (MDNode
*Tag
= I
.getMetadata(LLVMContext::MD_tbaa
)) {
2977 MDNode
*MutableTBAA
= Builder
.createMutableTBAAAccessTag(Tag
);
2978 I
.setMetadata(LLVMContext::MD_tbaa
, MutableTBAA
);
2981 stripInvalidMetadataFromInstruction(I
);
2983 AttributeMask R
= getParamAndReturnAttributesToRemove();
2984 if (auto *Call
= dyn_cast
<CallBase
>(&I
)) {
2985 for (int i
= 0, e
= Call
->arg_size(); i
!= e
; i
++)
2986 if (isa
<PointerType
>(Call
->getArgOperand(i
)->getType()))
2987 Call
->removeParamAttrs(i
, R
);
2988 if (isa
<PointerType
>(Call
->getType()))
2989 Call
->removeRetAttrs(R
);
2993 // Delete the invariant.start instructions and RAUW poison.
2994 for (auto *II
: InvariantStartInstructions
) {
2995 II
->replaceAllUsesWith(PoisonValue::get(II
->getType()));
2996 II
->eraseFromParent();
3000 /// Looks up the GC strategy for a given function, returning null if the
3001 /// function doesn't have a GC tag. The strategy is stored in the cache.
3002 static std::unique_ptr
<GCStrategy
> findGCStrategy(Function
&F
) {
3006 return getGCStrategy(F
.getGC());
3009 /// Returns true if this function should be rewritten by this pass. The main
3010 /// point of this function is as an extension point for custom logic.
3011 static bool shouldRewriteStatepointsIn(Function
&F
) {
3015 std::unique_ptr
<GCStrategy
> Strategy
= findGCStrategy(F
);
3017 assert(Strategy
&& "GC strategy is required by function, but was not found");
3019 return Strategy
->useRS4GC();
3022 static void stripNonValidData(Module
&M
) {
3024 assert(llvm::any_of(M
, shouldRewriteStatepointsIn
) && "precondition!");
3027 for (Function
&F
: M
)
3028 stripNonValidAttributesFromPrototype(F
);
3030 for (Function
&F
: M
)
3031 stripNonValidDataFromBody(F
);
3034 bool RewriteStatepointsForGC::runOnFunction(Function
&F
, DominatorTree
&DT
,
3035 TargetTransformInfo
&TTI
,
3036 const TargetLibraryInfo
&TLI
) {
3037 assert(!F
.isDeclaration() && !F
.empty() &&
3038 "need function body to rewrite statepoints in");
3039 assert(shouldRewriteStatepointsIn(F
) && "mismatch in rewrite decision");
3041 auto NeedsRewrite
= [&TLI
](Instruction
&I
) {
3042 if (const auto *Call
= dyn_cast
<CallBase
>(&I
)) {
3043 if (isa
<GCStatepointInst
>(Call
))
3045 if (callsGCLeafFunction(Call
, TLI
))
3048 // Normally it's up to the frontend to make sure that non-leaf calls also
3049 // have proper deopt state if it is required. We make an exception for
3050 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
3051 // these are non-leaf by default. They might be generated by the optimizer
3052 // which doesn't know how to produce a proper deopt state. So if we see a
3053 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
3054 // copy and don't produce a statepoint.
3055 if (!AllowStatepointWithNoDeoptInfo
&& !Call
->hasDeoptState()) {
3056 assert((isa
<AtomicMemCpyInst
>(Call
) || isa
<AtomicMemMoveInst
>(Call
)) &&
3057 "Don't expect any other calls here!");
3065 // Delete any unreachable statepoints so that we don't have unrewritten
3066 // statepoints surviving this pass. This makes testing easier and the
3067 // resulting IR less confusing to human readers.
3068 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Lazy
);
3069 bool MadeChange
= removeUnreachableBlocks(F
, &DTU
);
3070 // Flush the Dominator Tree.
3073 // Gather all the statepoints which need rewritten. Be careful to only
3074 // consider those in reachable code since we need to ask dominance queries
3075 // when rewriting. We'll delete the unreachable ones in a moment.
3076 SmallVector
<CallBase
*, 64> ParsePointNeeded
;
3077 SmallVector
<CallInst
*, 64> Intrinsics
;
3078 for (Instruction
&I
: instructions(F
)) {
3079 // TODO: only the ones with the flag set!
3080 if (NeedsRewrite(I
)) {
3081 // NOTE removeUnreachableBlocks() is stronger than
3082 // DominatorTree::isReachableFromEntry(). In other words
3083 // removeUnreachableBlocks can remove some blocks for which
3084 // isReachableFromEntry() returns true.
3085 assert(DT
.isReachableFromEntry(I
.getParent()) &&
3086 "no unreachable blocks expected");
3087 ParsePointNeeded
.push_back(cast
<CallBase
>(&I
));
3089 if (auto *CI
= dyn_cast
<CallInst
>(&I
))
3090 if (CI
->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base
||
3091 CI
->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset
)
3092 Intrinsics
.emplace_back(CI
);
3095 // Return early if no work to do.
3096 if (ParsePointNeeded
.empty() && Intrinsics
.empty())
3099 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
3100 // These are created by LCSSA. They have the effect of increasing the size
3101 // of liveness sets for no good reason. It may be harder to do this post
3102 // insertion since relocations and base phis can confuse things.
3103 for (BasicBlock
&BB
: F
)
3104 if (BB
.getUniquePredecessor())
3105 MadeChange
|= FoldSingleEntryPHINodes(&BB
);
3107 // Before we start introducing relocations, we want to tweak the IR a bit to
3108 // avoid unfortunate code generation effects. The main example is that we
3109 // want to try to make sure the comparison feeding a branch is after any
3110 // safepoints. Otherwise, we end up with a comparison of pre-relocation
3111 // values feeding a branch after relocation. This is semantically correct,
3112 // but results in extra register pressure since both the pre-relocation and
3113 // post-relocation copies must be available in registers. For code without
3114 // relocations this is handled elsewhere, but teaching the scheduler to
3115 // reverse the transform we're about to do would be slightly complex.
3116 // Note: This may extend the live range of the inputs to the icmp and thus
3117 // increase the liveset of any statepoint we move over. This is profitable
3118 // as long as all statepoints are in rare blocks. If we had in-register
3119 // lowering for live values this would be a much safer transform.
3120 auto getConditionInst
= [](Instruction
*TI
) -> Instruction
* {
3121 if (auto *BI
= dyn_cast
<BranchInst
>(TI
))
3122 if (BI
->isConditional())
3123 return dyn_cast
<Instruction
>(BI
->getCondition());
3124 // TODO: Extend this to handle switches
3127 for (BasicBlock
&BB
: F
) {
3128 Instruction
*TI
= BB
.getTerminator();
3129 if (auto *Cond
= getConditionInst(TI
))
3130 // TODO: Handle more than just ICmps here. We should be able to move
3131 // most instructions without side effects or memory access.
3132 if (isa
<ICmpInst
>(Cond
) && Cond
->hasOneUse()) {
3134 Cond
->moveBefore(TI
);
3138 // Nasty workaround - The base computation code in the main algorithm doesn't
3139 // consider the fact that a GEP can be used to convert a scalar to a vector.
3140 // The right fix for this is to integrate GEPs into the base rewriting
3141 // algorithm properly, this is just a short term workaround to prevent
3142 // crashes by canonicalizing such GEPs into fully vector GEPs.
3143 for (Instruction
&I
: instructions(F
)) {
3144 if (!isa
<GetElementPtrInst
>(I
))
3148 for (unsigned i
= 0; i
< I
.getNumOperands(); i
++)
3149 if (auto *OpndVTy
= dyn_cast
<VectorType
>(I
.getOperand(i
)->getType())) {
3151 VF
== cast
<FixedVectorType
>(OpndVTy
)->getNumElements());
3152 VF
= cast
<FixedVectorType
>(OpndVTy
)->getNumElements();
3155 // It's the vector to scalar traversal through the pointer operand which
3156 // confuses base pointer rewriting, so limit ourselves to that case.
3157 if (!I
.getOperand(0)->getType()->isVectorTy() && VF
!= 0) {
3159 auto *Splat
= B
.CreateVectorSplat(VF
, I
.getOperand(0));
3160 I
.setOperand(0, Splat
);
3165 // Cache the 'defining value' relation used in the computation and
3166 // insertion of base phis and selects. This ensures that we don't insert
3167 // large numbers of duplicate base_phis. Use one cache for both
3168 // inlineGetBaseAndOffset() and insertParsePoints().
3169 DefiningValueMapTy DVCache
;
3171 // Mapping between a base values and a flag indicating whether it's a known
3173 IsKnownBaseMapTy KnownBases
;
3175 if (!Intrinsics
.empty())
3176 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
3178 MadeChange
|= inlineGetBaseAndOffset(F
, Intrinsics
, DVCache
, KnownBases
);
3180 if (!ParsePointNeeded
.empty())
3182 insertParsePoints(F
, DT
, TTI
, ParsePointNeeded
, DVCache
, KnownBases
);
3187 // liveness computation via standard dataflow
3188 // -------------------------------------------------------------------
3190 // TODO: Consider using bitvectors for liveness, the set of potentially
3191 // interesting values should be small and easy to pre-compute.
3193 /// Compute the live-in set for the location rbegin starting from
3194 /// the live-out set of the basic block
3195 static void computeLiveInValues(BasicBlock::reverse_iterator Begin
,
3196 BasicBlock::reverse_iterator End
,
3197 SetVector
<Value
*> &LiveTmp
, GCStrategy
*GC
) {
3198 for (auto &I
: make_range(Begin
, End
)) {
3199 // KILL/Def - Remove this definition from LiveIn
3202 // Don't consider *uses* in PHI nodes, we handle their contribution to
3203 // predecessor blocks when we seed the LiveOut sets
3204 if (isa
<PHINode
>(I
))
3207 // USE - Add to the LiveIn set for this instruction
3208 for (Value
*V
: I
.operands()) {
3209 assert(!isUnhandledGCPointerType(V
->getType(), GC
) &&
3210 "support for FCA unimplemented");
3211 if (isHandledGCPointerType(V
->getType(), GC
) && !isa
<Constant
>(V
)) {
3212 // The choice to exclude all things constant here is slightly subtle.
3213 // There are two independent reasons:
3214 // - We assume that things which are constant (from LLVM's definition)
3215 // do not move at runtime. For example, the address of a global
3216 // variable is fixed, even though it's contents may not be.
3217 // - Second, we can't disallow arbitrary inttoptr constants even
3218 // if the language frontend does. Optimization passes are free to
3219 // locally exploit facts without respect to global reachability. This
3220 // can create sections of code which are dynamically unreachable and
3221 // contain just about anything. (see constants.ll in tests)
3228 static void computeLiveOutSeed(BasicBlock
*BB
, SetVector
<Value
*> &LiveTmp
,
3230 for (BasicBlock
*Succ
: successors(BB
)) {
3231 for (auto &I
: *Succ
) {
3232 PHINode
*PN
= dyn_cast
<PHINode
>(&I
);
3236 Value
*V
= PN
->getIncomingValueForBlock(BB
);
3237 assert(!isUnhandledGCPointerType(V
->getType(), GC
) &&
3238 "support for FCA unimplemented");
3239 if (isHandledGCPointerType(V
->getType(), GC
) && !isa
<Constant
>(V
))
3245 static SetVector
<Value
*> computeKillSet(BasicBlock
*BB
, GCStrategy
*GC
) {
3246 SetVector
<Value
*> KillSet
;
3247 for (Instruction
&I
: *BB
)
3248 if (isHandledGCPointerType(I
.getType(), GC
))
3254 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3255 /// validation check for the liveness computation.
3256 static void checkBasicSSA(DominatorTree
&DT
, SetVector
<Value
*> &Live
,
3257 Instruction
*TI
, bool TermOkay
= false) {
3258 for (Value
*V
: Live
) {
3259 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
3260 // The terminator can be a member of the LiveOut set. LLVM's definition
3261 // of instruction dominance states that V does not dominate itself. As
3262 // such, we need to special case this to allow it.
3263 if (TermOkay
&& TI
== I
)
3265 assert(DT
.dominates(I
, TI
) &&
3266 "basic SSA liveness expectation violated by liveness analysis");
3271 /// Check that all the liveness sets used during the computation of liveness
3272 /// obey basic SSA properties. This is useful for finding cases where we miss
3274 static void checkBasicSSA(DominatorTree
&DT
, GCPtrLivenessData
&Data
,
3276 checkBasicSSA(DT
, Data
.LiveSet
[&BB
], BB
.getTerminator());
3277 checkBasicSSA(DT
, Data
.LiveOut
[&BB
], BB
.getTerminator(), true);
3278 checkBasicSSA(DT
, Data
.LiveIn
[&BB
], BB
.getTerminator());
3282 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
3283 GCPtrLivenessData
&Data
, GCStrategy
*GC
) {
3284 SmallSetVector
<BasicBlock
*, 32> Worklist
;
3286 // Seed the liveness for each individual block
3287 for (BasicBlock
&BB
: F
) {
3288 Data
.KillSet
[&BB
] = computeKillSet(&BB
, GC
);
3289 Data
.LiveSet
[&BB
].clear();
3290 computeLiveInValues(BB
.rbegin(), BB
.rend(), Data
.LiveSet
[&BB
], GC
);
3293 for (Value
*Kill
: Data
.KillSet
[&BB
])
3294 assert(!Data
.LiveSet
[&BB
].count(Kill
) && "live set contains kill");
3297 Data
.LiveOut
[&BB
] = SetVector
<Value
*>();
3298 computeLiveOutSeed(&BB
, Data
.LiveOut
[&BB
], GC
);
3299 Data
.LiveIn
[&BB
] = Data
.LiveSet
[&BB
];
3300 Data
.LiveIn
[&BB
].set_union(Data
.LiveOut
[&BB
]);
3301 Data
.LiveIn
[&BB
].set_subtract(Data
.KillSet
[&BB
]);
3302 if (!Data
.LiveIn
[&BB
].empty())
3303 Worklist
.insert(pred_begin(&BB
), pred_end(&BB
));
3306 // Propagate that liveness until stable
3307 while (!Worklist
.empty()) {
3308 BasicBlock
*BB
= Worklist
.pop_back_val();
3310 // Compute our new liveout set, then exit early if it hasn't changed despite
3311 // the contribution of our successor.
3312 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
3313 const auto OldLiveOutSize
= LiveOut
.size();
3314 for (BasicBlock
*Succ
: successors(BB
)) {
3315 assert(Data
.LiveIn
.count(Succ
));
3316 LiveOut
.set_union(Data
.LiveIn
[Succ
]);
3318 // assert OutLiveOut is a subset of LiveOut
3319 if (OldLiveOutSize
== LiveOut
.size()) {
3320 // If the sets are the same size, then we didn't actually add anything
3321 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3325 Data
.LiveOut
[BB
] = LiveOut
;
3327 // Apply the effects of this basic block
3328 SetVector
<Value
*> LiveTmp
= LiveOut
;
3329 LiveTmp
.set_union(Data
.LiveSet
[BB
]);
3330 LiveTmp
.set_subtract(Data
.KillSet
[BB
]);
3332 assert(Data
.LiveIn
.count(BB
));
3333 const SetVector
<Value
*> &OldLiveIn
= Data
.LiveIn
[BB
];
3334 // assert: OldLiveIn is a subset of LiveTmp
3335 if (OldLiveIn
.size() != LiveTmp
.size()) {
3336 Data
.LiveIn
[BB
] = LiveTmp
;
3337 Worklist
.insert(pred_begin(BB
), pred_end(BB
));
3339 } // while (!Worklist.empty())
3342 // Verify our output against SSA properties. This helps catch any
3343 // missing kills during the above iteration.
3344 for (BasicBlock
&BB
: F
)
3345 checkBasicSSA(DT
, Data
, BB
);
3349 static void findLiveSetAtInst(Instruction
*Inst
, GCPtrLivenessData
&Data
,
3350 StatepointLiveSetTy
&Out
, GCStrategy
*GC
) {
3351 BasicBlock
*BB
= Inst
->getParent();
3353 // Note: The copy is intentional and required
3354 assert(Data
.LiveOut
.count(BB
));
3355 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
3357 // We want to handle the statepoint itself oddly. It's
3358 // call result is not live (normal), nor are it's arguments
3359 // (unless they're used again later). This adjustment is
3360 // specifically what we need to relocate
3361 computeLiveInValues(BB
->rbegin(), ++Inst
->getIterator().getReverse(), LiveOut
,
3363 LiveOut
.remove(Inst
);
3364 Out
.insert(LiveOut
.begin(), LiveOut
.end());
3367 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
3369 PartiallyConstructedSafepointRecord
&Info
,
3370 PointerToBaseTy
&PointerToBase
,
3372 StatepointLiveSetTy Updated
;
3373 findLiveSetAtInst(Call
, RevisedLivenessData
, Updated
, GC
);
3375 // We may have base pointers which are now live that weren't before. We need
3376 // to update the PointerToBase structure to reflect this.
3377 for (auto *V
: Updated
)
3378 PointerToBase
.insert({ V
, V
});
3380 Info
.LiveSet
= Updated
;