1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Rewrite call/invoke instructions so as to make potential relocations
10 // performed by the garbage collector explicit in the IR.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/None.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/iterator_range.h"
28 #include "llvm/Analysis/DomTreeUpdater.h"
29 #include "llvm/Analysis/TargetLibraryInfo.h"
30 #include "llvm/Analysis/TargetTransformInfo.h"
31 #include "llvm/IR/Argument.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InstIterator.h"
43 #include "llvm/IR/InstrTypes.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Intrinsics.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/IR/MDBuilder.h"
50 #include "llvm/IR/Metadata.h"
51 #include "llvm/IR/Module.h"
52 #include "llvm/IR/Statepoint.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/User.h"
55 #include "llvm/IR/Value.h"
56 #include "llvm/IR/ValueHandle.h"
57 #include "llvm/InitializePasses.h"
58 #include "llvm/Pass.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/ErrorHandling.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Transforms/Scalar.h"
66 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
67 #include "llvm/Transforms/Utils/Local.h"
68 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
79 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
83 // Print the liveset found at the insert location
84 static cl::opt
<bool> PrintLiveSet("spp-print-liveset", cl::Hidden
,
86 static cl::opt
<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden
,
89 // Print out the base pointers for debugging
90 static cl::opt
<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden
,
93 // Cost threshold measuring when it is profitable to rematerialize value instead
95 static cl::opt
<unsigned>
96 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden
,
99 #ifdef EXPENSIVE_CHECKS
100 static bool ClobberNonLive
= true;
102 static bool ClobberNonLive
= false;
105 static cl::opt
<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
106 cl::location(ClobberNonLive
),
110 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
111 cl::Hidden
, cl::init(true));
113 /// The IR fed into RewriteStatepointsForGC may have had attributes and
114 /// metadata implying dereferenceability that are no longer valid/correct after
115 /// RewriteStatepointsForGC has run. This is because semantically, after
116 /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
117 /// heap. stripNonValidData (conservatively) restores
118 /// correctness by erasing all attributes in the module that externally imply
119 /// dereferenceability. Similar reasoning also applies to the noalias
120 /// attributes and metadata. gc.statepoint can touch the entire heap including
122 /// Apart from attributes and metadata, we also remove instructions that imply
123 /// constant physical memory: llvm.invariant.start.
124 static void stripNonValidData(Module
&M
);
126 static bool shouldRewriteStatepointsIn(Function
&F
);
128 PreservedAnalyses
RewriteStatepointsForGC::run(Module
&M
,
129 ModuleAnalysisManager
&AM
) {
130 bool Changed
= false;
131 auto &FAM
= AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
132 for (Function
&F
: M
) {
133 // Nothing to do for declarations.
134 if (F
.isDeclaration() || F
.empty())
137 // Policy choice says not to rewrite - the most common reason is that we're
138 // compiling code without a GCStrategy.
139 if (!shouldRewriteStatepointsIn(F
))
142 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(F
);
143 auto &TTI
= FAM
.getResult
<TargetIRAnalysis
>(F
);
144 auto &TLI
= FAM
.getResult
<TargetLibraryAnalysis
>(F
);
145 Changed
|= runOnFunction(F
, DT
, TTI
, TLI
);
148 return PreservedAnalyses::all();
150 // stripNonValidData asserts that shouldRewriteStatepointsIn
151 // returns true for at least one function in the module. Since at least
152 // one function changed, we know that the precondition is satisfied.
153 stripNonValidData(M
);
155 PreservedAnalyses PA
;
156 PA
.preserve
<TargetIRAnalysis
>();
157 PA
.preserve
<TargetLibraryAnalysis
>();
163 class RewriteStatepointsForGCLegacyPass
: public ModulePass
{
164 RewriteStatepointsForGC Impl
;
167 static char ID
; // Pass identification, replacement for typeid
169 RewriteStatepointsForGCLegacyPass() : ModulePass(ID
), Impl() {
170 initializeRewriteStatepointsForGCLegacyPassPass(
171 *PassRegistry::getPassRegistry());
174 bool runOnModule(Module
&M
) override
{
175 bool Changed
= false;
176 for (Function
&F
: M
) {
177 // Nothing to do for declarations.
178 if (F
.isDeclaration() || F
.empty())
181 // Policy choice says not to rewrite - the most common reason is that
182 // we're compiling code without a GCStrategy.
183 if (!shouldRewriteStatepointsIn(F
))
186 TargetTransformInfo
&TTI
=
187 getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
188 const TargetLibraryInfo
&TLI
=
189 getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
190 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>(F
).getDomTree();
192 Changed
|= Impl
.runOnFunction(F
, DT
, TTI
, TLI
);
198 // stripNonValidData asserts that shouldRewriteStatepointsIn
199 // returns true for at least one function in the module. Since at least
200 // one function changed, we know that the precondition is satisfied.
201 stripNonValidData(M
);
205 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
206 // We add and rewrite a bunch of instructions, but don't really do much
207 // else. We could in theory preserve a lot more analyses here.
208 AU
.addRequired
<DominatorTreeWrapperPass
>();
209 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
210 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
214 } // end anonymous namespace
216 char RewriteStatepointsForGCLegacyPass::ID
= 0;
218 ModulePass
*llvm::createRewriteStatepointsForGCLegacyPass() {
219 return new RewriteStatepointsForGCLegacyPass();
222 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGCLegacyPass
,
223 "rewrite-statepoints-for-gc",
224 "Make relocations explicit at statepoints", false, false)
225 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
226 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
227 INITIALIZE_PASS_END(RewriteStatepointsForGCLegacyPass
,
228 "rewrite-statepoints-for-gc",
229 "Make relocations explicit at statepoints", false, false)
233 struct GCPtrLivenessData
{
234 /// Values defined in this block.
235 MapVector
<BasicBlock
*, SetVector
<Value
*>> KillSet
;
237 /// Values used in this block (and thus live); does not included values
238 /// killed within this block.
239 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveSet
;
241 /// Values live into this basic block (i.e. used by any
242 /// instruction in this basic block or ones reachable from here)
243 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveIn
;
245 /// Values live out of this basic block (i.e. live into
246 /// any successor block)
247 MapVector
<BasicBlock
*, SetVector
<Value
*>> LiveOut
;
250 // The type of the internal cache used inside the findBasePointers family
251 // of functions. From the callers perspective, this is an opaque type and
252 // should not be inspected.
254 // In the actual implementation this caches two relations:
255 // - The base relation itself (i.e. this pointer is based on that one)
256 // - The base defining value relation (i.e. before base_phi insertion)
257 // Generally, after the execution of a full findBasePointer call, only the
258 // base relation will remain. Internally, we add a mixture of the two
259 // types, then update all the second type to the first type
260 using DefiningValueMapTy
= MapVector
<Value
*, Value
*>;
261 using StatepointLiveSetTy
= SetVector
<Value
*>;
262 using RematerializedValueMapTy
=
263 MapVector
<AssertingVH
<Instruction
>, AssertingVH
<Value
>>;
265 struct PartiallyConstructedSafepointRecord
{
266 /// The set of values known to be live across this safepoint
267 StatepointLiveSetTy LiveSet
;
269 /// Mapping from live pointers to a base-defining-value
270 MapVector
<Value
*, Value
*> PointerToBase
;
272 /// The *new* gc.statepoint instruction itself. This produces the token
273 /// that normal path gc.relocates and the gc.result are tied to.
274 GCStatepointInst
*StatepointToken
;
276 /// Instruction to which exceptional gc relocates are attached
277 /// Makes it easier to iterate through them during relocationViaAlloca.
278 Instruction
*UnwindToken
;
280 /// Record live values we are rematerialized instead of relocating.
281 /// They are not included into 'LiveSet' field.
282 /// Maps rematerialized copy to it's original value.
283 RematerializedValueMapTy RematerializedValues
;
286 } // end anonymous namespace
288 static ArrayRef
<Use
> GetDeoptBundleOperands(const CallBase
*Call
) {
289 Optional
<OperandBundleUse
> DeoptBundle
=
290 Call
->getOperandBundle(LLVMContext::OB_deopt
);
292 if (!DeoptBundle
.hasValue()) {
293 assert(AllowStatepointWithNoDeoptInfo
&&
294 "Found non-leaf call without deopt info!");
298 return DeoptBundle
.getValue().Inputs
;
301 /// Compute the live-in set for every basic block in the function
302 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
303 GCPtrLivenessData
&Data
);
305 /// Given results from the dataflow liveness computation, find the set of live
306 /// Values at a particular instruction.
307 static void findLiveSetAtInst(Instruction
*inst
, GCPtrLivenessData
&Data
,
308 StatepointLiveSetTy
&out
);
310 // TODO: Once we can get to the GCStrategy, this becomes
311 // Optional<bool> isGCManagedPointer(const Type *Ty) const override {
313 static bool isGCPointerType(Type
*T
) {
314 if (auto *PT
= dyn_cast
<PointerType
>(T
))
315 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
316 // GC managed heap. We know that a pointer into this heap needs to be
317 // updated and that no other pointer does.
318 return PT
->getAddressSpace() == 1;
322 // Return true if this type is one which a) is a gc pointer or contains a GC
323 // pointer and b) is of a type this code expects to encounter as a live value.
324 // (The insertion code will assert that a type which matches (a) and not (b)
325 // is not encountered.)
326 static bool isHandledGCPointerType(Type
*T
) {
327 // We fully support gc pointers
328 if (isGCPointerType(T
))
330 // We partially support vectors of gc pointers. The code will assert if it
331 // can't handle something.
332 if (auto VT
= dyn_cast
<VectorType
>(T
))
333 if (isGCPointerType(VT
->getElementType()))
339 /// Returns true if this type contains a gc pointer whether we know how to
340 /// handle that type or not.
341 static bool containsGCPtrType(Type
*Ty
) {
342 if (isGCPointerType(Ty
))
344 if (VectorType
*VT
= dyn_cast
<VectorType
>(Ty
))
345 return isGCPointerType(VT
->getScalarType());
346 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(Ty
))
347 return containsGCPtrType(AT
->getElementType());
348 if (StructType
*ST
= dyn_cast
<StructType
>(Ty
))
349 return llvm::any_of(ST
->elements(), containsGCPtrType
);
353 // Returns true if this is a type which a) is a gc pointer or contains a GC
354 // pointer and b) is of a type which the code doesn't expect (i.e. first class
355 // aggregates). Used to trip assertions.
356 static bool isUnhandledGCPointerType(Type
*Ty
) {
357 return containsGCPtrType(Ty
) && !isHandledGCPointerType(Ty
);
361 // Return the name of the value suffixed with the provided value, or if the
362 // value didn't have a name, the default value specified.
363 static std::string
suffixed_name_or(Value
*V
, StringRef Suffix
,
364 StringRef DefaultName
) {
365 return V
->hasName() ? (V
->getName() + Suffix
).str() : DefaultName
.str();
368 // Conservatively identifies any definitions which might be live at the
369 // given instruction. The analysis is performed immediately before the
370 // given instruction. Values defined by that instruction are not considered
371 // live. Values used by that instruction are considered live.
372 static void analyzeParsePointLiveness(
373 DominatorTree
&DT
, GCPtrLivenessData
&OriginalLivenessData
, CallBase
*Call
,
374 PartiallyConstructedSafepointRecord
&Result
) {
375 StatepointLiveSetTy LiveSet
;
376 findLiveSetAtInst(Call
, OriginalLivenessData
, LiveSet
);
379 dbgs() << "Live Variables:\n";
380 for (Value
*V
: LiveSet
)
381 dbgs() << " " << V
->getName() << " " << *V
<< "\n";
383 if (PrintLiveSetSize
) {
384 dbgs() << "Safepoint For: " << Call
->getCalledOperand()->getName() << "\n";
385 dbgs() << "Number live values: " << LiveSet
.size() << "\n";
387 Result
.LiveSet
= LiveSet
;
390 // Returns true is V is a knownBaseResult.
391 static bool isKnownBaseResult(Value
*V
);
393 // Returns true if V is a BaseResult that already exists in the IR, i.e. it is
394 // not created by the findBasePointers algorithm.
395 static bool isOriginalBaseResult(Value
*V
);
399 /// A single base defining value - An immediate base defining value for an
400 /// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
401 /// For instructions which have multiple pointer [vector] inputs or that
402 /// transition between vector and scalar types, there is no immediate base
403 /// defining value. The 'base defining value' for 'Def' is the transitive
404 /// closure of this relation stopping at the first instruction which has no
405 /// immediate base defining value. The b.d.v. might itself be a base pointer,
406 /// but it can also be an arbitrary derived pointer.
407 struct BaseDefiningValueResult
{
408 /// Contains the value which is the base defining value.
411 /// True if the base defining value is also known to be an actual base
413 const bool IsKnownBase
;
415 BaseDefiningValueResult(Value
*BDV
, bool IsKnownBase
)
416 : BDV(BDV
), IsKnownBase(IsKnownBase
) {
418 // Check consistency between new and old means of checking whether a BDV is
420 bool MustBeBase
= isKnownBaseResult(BDV
);
421 assert(!MustBeBase
|| MustBeBase
== IsKnownBase
);
426 } // end anonymous namespace
428 static BaseDefiningValueResult
findBaseDefiningValue(Value
*I
);
430 /// Return a base defining value for the 'Index' element of the given vector
431 /// instruction 'I'. If Index is null, returns a BDV for the entire vector
432 /// 'I'. As an optimization, this method will try to determine when the
433 /// element is known to already be a base pointer. If this can be established,
434 /// the second value in the returned pair will be true. Note that either a
435 /// vector or a pointer typed value can be returned. For the former, the
436 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
437 /// If the later, the return pointer is a BDV (or possibly a base) for the
438 /// particular element in 'I'.
439 static BaseDefiningValueResult
440 findBaseDefiningValueOfVector(Value
*I
) {
441 // Each case parallels findBaseDefiningValue below, see that code for
442 // detailed motivation.
444 if (isa
<Argument
>(I
))
445 // An incoming argument to the function is a base pointer
446 return BaseDefiningValueResult(I
, true);
448 if (isa
<Constant
>(I
))
449 // Base of constant vector consists only of constant null pointers.
450 // For reasoning see similar case inside 'findBaseDefiningValue' function.
451 return BaseDefiningValueResult(ConstantAggregateZero::get(I
->getType()),
454 if (isa
<LoadInst
>(I
))
455 return BaseDefiningValueResult(I
, true);
457 if (isa
<InsertElementInst
>(I
))
458 // We don't know whether this vector contains entirely base pointers or
459 // not. To be conservatively correct, we treat it as a BDV and will
460 // duplicate code as needed to construct a parallel vector of bases.
461 return BaseDefiningValueResult(I
, false);
463 if (isa
<ShuffleVectorInst
>(I
))
464 // We don't know whether this vector contains entirely base pointers or
465 // not. To be conservatively correct, we treat it as a BDV and will
466 // duplicate code as needed to construct a parallel vector of bases.
467 // TODO: There a number of local optimizations which could be applied here
468 // for particular sufflevector patterns.
469 return BaseDefiningValueResult(I
, false);
471 // The behavior of getelementptr instructions is the same for vector and
472 // non-vector data types.
473 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(I
))
474 return findBaseDefiningValue(GEP
->getPointerOperand());
476 // If the pointer comes through a bitcast of a vector of pointers to
477 // a vector of another type of pointer, then look through the bitcast
478 if (auto *BC
= dyn_cast
<BitCastInst
>(I
))
479 return findBaseDefiningValue(BC
->getOperand(0));
481 // We assume that functions in the source language only return base
482 // pointers. This should probably be generalized via attributes to support
483 // both source language and internal functions.
484 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
485 return BaseDefiningValueResult(I
, true);
487 // A PHI or Select is a base defining value. The outer findBasePointer
488 // algorithm is responsible for constructing a base value for this BDV.
489 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
490 "unknown vector instruction - no base found for vector element");
491 return BaseDefiningValueResult(I
, false);
494 /// Helper function for findBasePointer - Will return a value which either a)
495 /// defines the base pointer for the input, b) blocks the simple search
496 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
497 /// from pointer to vector type or back.
498 static BaseDefiningValueResult
findBaseDefiningValue(Value
*I
) {
499 assert(I
->getType()->isPtrOrPtrVectorTy() &&
500 "Illegal to ask for the base pointer of a non-pointer type");
502 if (I
->getType()->isVectorTy())
503 return findBaseDefiningValueOfVector(I
);
505 if (isa
<Argument
>(I
))
506 // An incoming argument to the function is a base pointer
507 // We should have never reached here if this argument isn't an gc value
508 return BaseDefiningValueResult(I
, true);
510 if (isa
<Constant
>(I
)) {
511 // We assume that objects with a constant base (e.g. a global) can't move
512 // and don't need to be reported to the collector because they are always
513 // live. Besides global references, all kinds of constants (e.g. undef,
514 // constant expressions, null pointers) can be introduced by the inliner or
515 // the optimizer, especially on dynamically dead paths.
516 // Here we treat all of them as having single null base. By doing this we
517 // trying to avoid problems reporting various conflicts in a form of
518 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
519 // See constant.ll file for relevant test cases.
521 return BaseDefiningValueResult(
522 ConstantPointerNull::get(cast
<PointerType
>(I
->getType())), true);
525 // inttoptrs in an integral address space are currently ill-defined. We
526 // treat them as defining base pointers here for consistency with the
527 // constant rule above and because we don't really have a better semantic
528 // to give them. Note that the optimizer is always free to insert undefined
529 // behavior on dynamically dead paths as well.
530 if (isa
<IntToPtrInst
>(I
))
531 return BaseDefiningValueResult(I
, true);
533 if (CastInst
*CI
= dyn_cast
<CastInst
>(I
)) {
534 Value
*Def
= CI
->stripPointerCasts();
535 // If stripping pointer casts changes the address space there is an
536 // addrspacecast in between.
537 assert(cast
<PointerType
>(Def
->getType())->getAddressSpace() ==
538 cast
<PointerType
>(CI
->getType())->getAddressSpace() &&
539 "unsupported addrspacecast");
540 // If we find a cast instruction here, it means we've found a cast which is
541 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
542 // handle int->ptr conversion.
543 assert(!isa
<CastInst
>(Def
) && "shouldn't find another cast here");
544 return findBaseDefiningValue(Def
);
547 if (isa
<LoadInst
>(I
))
548 // The value loaded is an gc base itself
549 return BaseDefiningValueResult(I
, true);
551 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(I
))
552 // The base of this GEP is the base
553 return findBaseDefiningValue(GEP
->getPointerOperand());
555 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
556 switch (II
->getIntrinsicID()) {
558 // fall through to general call handling
560 case Intrinsic::experimental_gc_statepoint
:
561 llvm_unreachable("statepoints don't produce pointers");
562 case Intrinsic::experimental_gc_relocate
:
563 // Rerunning safepoint insertion after safepoints are already
564 // inserted is not supported. It could probably be made to work,
565 // but why are you doing this? There's no good reason.
566 llvm_unreachable("repeat safepoint insertion is not supported");
567 case Intrinsic::gcroot
:
568 // Currently, this mechanism hasn't been extended to work with gcroot.
569 // There's no reason it couldn't be, but I haven't thought about the
570 // implications much.
572 "interaction with the gcroot mechanism is not supported");
573 case Intrinsic::experimental_gc_get_pointer_base
:
574 return findBaseDefiningValue(II
->getOperand(0));
577 // We assume that functions in the source language only return base
578 // pointers. This should probably be generalized via attributes to support
579 // both source language and internal functions.
580 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
581 return BaseDefiningValueResult(I
, true);
583 // TODO: I have absolutely no idea how to implement this part yet. It's not
584 // necessarily hard, I just haven't really looked at it yet.
585 assert(!isa
<LandingPadInst
>(I
) && "Landing Pad is unimplemented");
587 if (isa
<AtomicCmpXchgInst
>(I
))
588 // A CAS is effectively a atomic store and load combined under a
589 // predicate. From the perspective of base pointers, we just treat it
591 return BaseDefiningValueResult(I
, true);
593 assert(!isa
<AtomicRMWInst
>(I
) && "Xchg handled above, all others are "
594 "binary ops which don't apply to pointers");
596 // The aggregate ops. Aggregates can either be in the heap or on the
597 // stack, but in either case, this is simply a field load. As a result,
598 // this is a defining definition of the base just like a load is.
599 if (isa
<ExtractValueInst
>(I
))
600 return BaseDefiningValueResult(I
, true);
602 // We should never see an insert vector since that would require we be
603 // tracing back a struct value not a pointer value.
604 assert(!isa
<InsertValueInst
>(I
) &&
605 "Base pointer for a struct is meaningless");
607 // This value might have been generated by findBasePointer() called when
608 // substituting gc.get.pointer.base() intrinsic.
610 isa
<Instruction
>(I
) && cast
<Instruction
>(I
)->getMetadata("is_base_value");
612 // An extractelement produces a base result exactly when it's input does.
613 // We may need to insert a parallel instruction to extract the appropriate
614 // element out of the base vector corresponding to the input. Given this,
615 // it's analogous to the phi and select case even though it's not a merge.
616 if (isa
<ExtractElementInst
>(I
))
617 // Note: There a lot of obvious peephole cases here. This are deliberately
618 // handled after the main base pointer inference algorithm to make writing
619 // test cases to exercise that code easier.
620 return BaseDefiningValueResult(I
, IsKnownBase
);
622 // The last two cases here don't return a base pointer. Instead, they
623 // return a value which dynamically selects from among several base
624 // derived pointers (each with it's own base potentially). It's the job of
625 // the caller to resolve these.
626 assert((isa
<SelectInst
>(I
) || isa
<PHINode
>(I
)) &&
627 "missing instruction case in findBaseDefiningValing");
628 return BaseDefiningValueResult(I
, IsKnownBase
);
631 /// Returns the base defining value for this value.
632 static Value
*findBaseDefiningValueCached(Value
*I
, DefiningValueMapTy
&Cache
) {
633 Value
*&Cached
= Cache
[I
];
635 Cached
= findBaseDefiningValue(I
).BDV
;
636 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I
->getName() << " -> "
637 << Cached
->getName() << "\n");
639 assert(Cache
[I
] != nullptr);
643 /// Return a base pointer for this value if known. Otherwise, return it's
644 /// base defining value.
645 static Value
*findBaseOrBDV(Value
*I
, DefiningValueMapTy
&Cache
) {
646 Value
*Def
= findBaseDefiningValueCached(I
, Cache
);
647 auto Found
= Cache
.find(Def
);
648 if (Found
!= Cache
.end()) {
649 // Either a base-of relation, or a self reference. Caller must check.
650 return Found
->second
;
652 // Only a BDV available
656 /// This value is a base pointer that is not generated by RS4GC, i.e. it already
657 /// exists in the code.
658 static bool isOriginalBaseResult(Value
*V
) {
659 // no recursion possible
660 return !isa
<PHINode
>(V
) && !isa
<SelectInst
>(V
) &&
661 !isa
<ExtractElementInst
>(V
) && !isa
<InsertElementInst
>(V
) &&
662 !isa
<ShuffleVectorInst
>(V
);
665 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
666 /// is it known to be a base pointer? Or do we need to continue searching.
667 static bool isKnownBaseResult(Value
*V
) {
668 if (isOriginalBaseResult(V
))
670 if (isa
<Instruction
>(V
) &&
671 cast
<Instruction
>(V
)->getMetadata("is_base_value")) {
672 // This is a previously inserted base phi or select. We know
673 // that this is a base value.
677 // We need to keep searching
681 // Returns true if First and Second values are both scalar or both vector.
682 static bool areBothVectorOrScalar(Value
*First
, Value
*Second
) {
683 return isa
<VectorType
>(First
->getType()) ==
684 isa
<VectorType
>(Second
->getType());
689 /// Models the state of a single base defining value in the findBasePointer
690 /// algorithm for determining where a new instruction is needed to propagate
691 /// the base of this BDV.
695 // Starting state of lattice
697 // Some specific base value -- does *not* mean that instruction
698 // propagates the base of the object
699 // ex: gep %arg, 16 -> %arg is the base value
701 // Need to insert a node to represent a merge.
706 llvm_unreachable("missing state in map");
709 explicit BDVState(Value
*OriginalValue
)
710 : OriginalValue(OriginalValue
) {}
711 explicit BDVState(Value
*OriginalValue
, StatusTy Status
, Value
*BaseValue
= nullptr)
712 : OriginalValue(OriginalValue
), Status(Status
), BaseValue(BaseValue
) {
713 assert(Status
!= Base
|| BaseValue
);
716 StatusTy
getStatus() const { return Status
; }
717 Value
*getOriginalValue() const { return OriginalValue
; }
718 Value
*getBaseValue() const { return BaseValue
; }
720 bool isBase() const { return getStatus() == Base
; }
721 bool isUnknown() const { return getStatus() == Unknown
; }
722 bool isConflict() const { return getStatus() == Conflict
; }
724 // Values of type BDVState form a lattice, and this function implements the
727 void meet(const BDVState
&Other
) {
728 auto markConflict
= [&]() {
729 Status
= BDVState::Conflict
;
732 // Conflict is a final state.
735 // if we are not known - just take other state.
737 Status
= Other
.getStatus();
738 BaseValue
= Other
.getBaseValue();
742 assert(isBase() && "Unknown state");
743 // If other is unknown - just keep our state.
744 if (Other
.isUnknown())
746 // If other is conflict - it is a final state.
747 if (Other
.isConflict())
748 return markConflict();
749 // Other is base as well.
750 assert(Other
.isBase() && "Unknown state");
751 // If bases are different - Conflict.
752 if (getBaseValue() != Other
.getBaseValue())
753 return markConflict();
754 // We are identical, do nothing.
757 bool operator==(const BDVState
&Other
) const {
758 return OriginalValue
== OriginalValue
&& BaseValue
== Other
.BaseValue
&&
759 Status
== Other
.Status
;
762 bool operator!=(const BDVState
&other
) const { return !(*this == other
); }
770 void print(raw_ostream
&OS
) const {
771 switch (getStatus()) {
782 OS
<< " (base " << getBaseValue() << " - "
783 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")"
784 << " for " << OriginalValue
->getName() << ":";
788 AssertingVH
<Value
> OriginalValue
; // instruction this state corresponds to
789 StatusTy Status
= Unknown
;
790 AssertingVH
<Value
> BaseValue
= nullptr; // Non-null only if Status == Base.
793 } // end anonymous namespace
796 static raw_ostream
&operator<<(raw_ostream
&OS
, const BDVState
&State
) {
802 /// For a given value or instruction, figure out what base ptr its derived from.
803 /// For gc objects, this is simply itself. On success, returns a value which is
804 /// the base pointer. (This is reliable and can be used for relocation.) On
805 /// failure, returns nullptr.
806 static Value
*findBasePointer(Value
*I
, DefiningValueMapTy
&Cache
) {
807 Value
*Def
= findBaseOrBDV(I
, Cache
);
809 if (isKnownBaseResult(Def
) && areBothVectorOrScalar(Def
, I
))
812 // Here's the rough algorithm:
813 // - For every SSA value, construct a mapping to either an actual base
814 // pointer or a PHI which obscures the base pointer.
815 // - Construct a mapping from PHI to unknown TOP state. Use an
816 // optimistic algorithm to propagate base pointer information. Lattice
821 // When algorithm terminates, all PHIs will either have a single concrete
822 // base or be in a conflict state.
823 // - For every conflict, insert a dummy PHI node without arguments. Add
824 // these to the base[Instruction] = BasePtr mapping. For every
825 // non-conflict, add the actual base.
826 // - For every conflict, add arguments for the base[a] of each input
829 // Note: A simpler form of this would be to add the conflict form of all
830 // PHIs without running the optimistic algorithm. This would be
831 // analogous to pessimistic data flow and would likely lead to an
832 // overall worse solution.
835 auto isExpectedBDVType
= [](Value
*BDV
) {
836 return isa
<PHINode
>(BDV
) || isa
<SelectInst
>(BDV
) ||
837 isa
<ExtractElementInst
>(BDV
) || isa
<InsertElementInst
>(BDV
) ||
838 isa
<ShuffleVectorInst
>(BDV
);
842 // Once populated, will contain a mapping from each potentially non-base BDV
843 // to a lattice value (described above) which corresponds to that BDV.
844 // We use the order of insertion (DFS over the def/use graph) to provide a
845 // stable deterministic ordering for visiting DenseMaps (which are unordered)
846 // below. This is important for deterministic compilation.
847 MapVector
<Value
*, BDVState
> States
;
850 auto VerifyStates
= [&]() {
851 for (auto &Entry
: States
) {
852 assert(Entry
.first
== Entry
.second
.getOriginalValue());
857 auto visitBDVOperands
= [](Value
*BDV
, std::function
<void (Value
*)> F
) {
858 if (PHINode
*PN
= dyn_cast
<PHINode
>(BDV
)) {
859 for (Value
*InVal
: PN
->incoming_values())
861 } else if (SelectInst
*SI
= dyn_cast
<SelectInst
>(BDV
)) {
862 F(SI
->getTrueValue());
863 F(SI
->getFalseValue());
864 } else if (auto *EE
= dyn_cast
<ExtractElementInst
>(BDV
)) {
865 F(EE
->getVectorOperand());
866 } else if (auto *IE
= dyn_cast
<InsertElementInst
>(BDV
)) {
867 F(IE
->getOperand(0));
868 F(IE
->getOperand(1));
869 } else if (auto *SV
= dyn_cast
<ShuffleVectorInst
>(BDV
)) {
870 // For a canonical broadcast, ignore the undef argument
871 // (without this, we insert a parallel base shuffle for every broadcast)
872 F(SV
->getOperand(0));
873 if (!SV
->isZeroEltSplat())
874 F(SV
->getOperand(1));
876 llvm_unreachable("unexpected BDV type");
881 // Recursively fill in all base defining values reachable from the initial
882 // one for which we don't already know a definite base value for
884 SmallVector
<Value
*, 16> Worklist
;
885 Worklist
.push_back(Def
);
886 States
.insert({Def
, BDVState(Def
)});
887 while (!Worklist
.empty()) {
888 Value
*Current
= Worklist
.pop_back_val();
889 assert(!isOriginalBaseResult(Current
) && "why did it get added?");
891 auto visitIncomingValue
= [&](Value
*InVal
) {
892 Value
*Base
= findBaseOrBDV(InVal
, Cache
);
893 if (isKnownBaseResult(Base
) && areBothVectorOrScalar(Base
, InVal
))
894 // Known bases won't need new instructions introduced and can be
895 // ignored safely. However, this can only be done when InVal and Base
896 // are both scalar or both vector. Otherwise, we need to find a
897 // correct BDV for InVal, by creating an entry in the lattice
900 assert(isExpectedBDVType(Base
) && "the only non-base values "
901 "we see should be base defining values");
902 if (States
.insert(std::make_pair(Base
, BDVState(Base
))).second
)
903 Worklist
.push_back(Base
);
906 visitBDVOperands(Current
, visitIncomingValue
);
912 LLVM_DEBUG(dbgs() << "States after initialization:\n");
913 for (auto Pair
: States
) {
914 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
918 // Iterate forward through the value graph pruning any node from the state
919 // list where all of the inputs are base pointers. The purpose of this is to
920 // reuse existing values when the derived pointer we were asked to materialize
921 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
923 SmallVector
<Value
*> ToRemove
;
926 for (auto Pair
: States
) {
927 Value
*BDV
= Pair
.first
;
928 auto canPruneInput
= [&](Value
*V
) {
929 Value
*BDV
= findBaseOrBDV(V
, Cache
);
930 if (V
->stripPointerCasts() != BDV
)
932 // The assumption is that anything not in the state list is
933 // propagates a base pointer.
934 return States
.count(BDV
) == 0;
937 bool CanPrune
= true;
938 visitBDVOperands(BDV
, [&](Value
*Op
) {
939 CanPrune
= CanPrune
&& canPruneInput(Op
);
942 ToRemove
.push_back(BDV
);
944 for (Value
*V
: ToRemove
) {
946 // Cache the fact V is it's own base for later usage.
949 } while (!ToRemove
.empty());
951 // Did we manage to prove that Def itself must be a base pointer?
952 if (!States
.count(Def
))
955 // Return a phi state for a base defining value. We'll generate a new
956 // base state for known bases and expect to find a cached state otherwise.
957 auto GetStateForBDV
= [&](Value
*BaseValue
, Value
*Input
) {
958 auto I
= States
.find(BaseValue
);
959 if (I
!= States
.end())
961 assert(areBothVectorOrScalar(BaseValue
, Input
));
962 return BDVState(BaseValue
, BDVState::Base
, BaseValue
);
965 bool Progress
= true;
968 const size_t OldSize
= States
.size();
971 // We're only changing values in this loop, thus safe to keep iterators.
972 // Since this is computing a fixed point, the order of visit does not
973 // effect the result. TODO: We could use a worklist here and make this run
975 for (auto Pair
: States
) {
976 Value
*BDV
= Pair
.first
;
977 // Only values that do not have known bases or those that have differing
978 // type (scalar versus vector) from a possible known base should be in the
980 assert((!isKnownBaseResult(BDV
) ||
981 !areBothVectorOrScalar(BDV
, Pair
.second
.getBaseValue())) &&
982 "why did it get added?");
984 BDVState
NewState(BDV
);
985 visitBDVOperands(BDV
, [&](Value
*Op
) {
986 Value
*BDV
= findBaseOrBDV(Op
, Cache
);
987 auto OpState
= GetStateForBDV(BDV
, Op
);
988 NewState
.meet(OpState
);
991 BDVState OldState
= States
[BDV
];
992 if (OldState
!= NewState
) {
994 States
[BDV
] = NewState
;
998 assert(OldSize
== States
.size() &&
999 "fixed point shouldn't be adding any new nodes to state");
1004 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
1005 for (auto Pair
: States
) {
1006 LLVM_DEBUG(dbgs() << " " << Pair
.second
<< " for " << *Pair
.first
<< "\n");
1010 // Handle all instructions that have a vector BDV, but the instruction itself
1011 // is of scalar type.
1012 for (auto Pair
: States
) {
1013 Instruction
*I
= cast
<Instruction
>(Pair
.first
);
1014 BDVState State
= Pair
.second
;
1015 auto *BaseValue
= State
.getBaseValue();
1016 // Only values that do not have known bases or those that have differing
1017 // type (scalar versus vector) from a possible known base should be in the
1019 assert((!isKnownBaseResult(I
) || !areBothVectorOrScalar(I
, BaseValue
)) &&
1020 "why did it get added?");
1021 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1023 if (!State
.isBase() || !isa
<VectorType
>(BaseValue
->getType()))
1025 // extractelement instructions are a bit special in that we may need to
1026 // insert an extract even when we know an exact base for the instruction.
1027 // The problem is that we need to convert from a vector base to a scalar
1028 // base for the particular indice we're interested in.
1029 if (isa
<ExtractElementInst
>(I
)) {
1030 auto *EE
= cast
<ExtractElementInst
>(I
);
1031 // TODO: In many cases, the new instruction is just EE itself. We should
1032 // exploit this, but can't do it here since it would break the invariant
1033 // about the BDV not being known to be a base.
1034 auto *BaseInst
= ExtractElementInst::Create(
1035 State
.getBaseValue(), EE
->getIndexOperand(), "base_ee", EE
);
1036 BaseInst
->setMetadata("is_base_value", MDNode::get(I
->getContext(), {}));
1037 States
[I
] = BDVState(I
, BDVState::Base
, BaseInst
);
1038 } else if (!isa
<VectorType
>(I
->getType())) {
1039 // We need to handle cases that have a vector base but the instruction is
1040 // a scalar type (these could be phis or selects or any instruction that
1041 // are of scalar type, but the base can be a vector type). We
1042 // conservatively set this as conflict. Setting the base value for these
1043 // conflicts is handled in the next loop which traverses States.
1044 States
[I
] = BDVState(I
, BDVState::Conflict
);
1052 // Insert Phis for all conflicts
1053 // TODO: adjust naming patterns to avoid this order of iteration dependency
1054 for (auto Pair
: States
) {
1055 Instruction
*I
= cast
<Instruction
>(Pair
.first
);
1056 BDVState State
= Pair
.second
;
1057 // Only values that do not have known bases or those that have differing
1058 // type (scalar versus vector) from a possible known base should be in the
1060 assert((!isKnownBaseResult(I
) || !areBothVectorOrScalar(I
, State
.getBaseValue())) &&
1061 "why did it get added?");
1062 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1064 // Since we're joining a vector and scalar base, they can never be the
1065 // same. As a result, we should always see insert element having reached
1066 // the conflict state.
1067 assert(!isa
<InsertElementInst
>(I
) || State
.isConflict());
1069 if (!State
.isConflict())
1072 auto getMangledName
= [](Instruction
*I
) -> std::string
{
1073 if (isa
<PHINode
>(I
)) {
1074 return suffixed_name_or(I
, ".base", "base_phi");
1075 } else if (isa
<SelectInst
>(I
)) {
1076 return suffixed_name_or(I
, ".base", "base_select");
1077 } else if (isa
<ExtractElementInst
>(I
)) {
1078 return suffixed_name_or(I
, ".base", "base_ee");
1079 } else if (isa
<InsertElementInst
>(I
)) {
1080 return suffixed_name_or(I
, ".base", "base_ie");
1082 return suffixed_name_or(I
, ".base", "base_sv");
1086 Instruction
*BaseInst
= I
->clone();
1087 BaseInst
->insertBefore(I
);
1088 BaseInst
->setName(getMangledName(I
));
1089 // Add metadata marking this as a base value
1090 BaseInst
->setMetadata("is_base_value", MDNode::get(I
->getContext(), {}));
1091 States
[I
] = BDVState(I
, BDVState::Conflict
, BaseInst
);
1098 // Returns a instruction which produces the base pointer for a given
1099 // instruction. The instruction is assumed to be an input to one of the BDVs
1100 // seen in the inference algorithm above. As such, we must either already
1101 // know it's base defining value is a base, or have inserted a new
1102 // instruction to propagate the base of it's BDV and have entered that newly
1103 // introduced instruction into the state table. In either case, we are
1104 // assured to be able to determine an instruction which produces it's base
1106 auto getBaseForInput
= [&](Value
*Input
, Instruction
*InsertPt
) {
1107 Value
*BDV
= findBaseOrBDV(Input
, Cache
);
1108 Value
*Base
= nullptr;
1109 if (!States
.count(BDV
)) {
1110 assert(areBothVectorOrScalar(BDV
, Input
));
1113 // Either conflict or base.
1114 assert(States
.count(BDV
));
1115 Base
= States
[BDV
].getBaseValue();
1117 assert(Base
&& "Can't be null");
1118 // The cast is needed since base traversal may strip away bitcasts
1119 if (Base
->getType() != Input
->getType() && InsertPt
)
1120 Base
= new BitCastInst(Base
, Input
->getType(), "cast", InsertPt
);
1124 // Fixup all the inputs of the new PHIs. Visit order needs to be
1125 // deterministic and predictable because we're naming newly created
1127 for (auto Pair
: States
) {
1128 Instruction
*BDV
= cast
<Instruction
>(Pair
.first
);
1129 BDVState State
= Pair
.second
;
1131 // Only values that do not have known bases or those that have differing
1132 // type (scalar versus vector) from a possible known base should be in the
1134 assert((!isKnownBaseResult(BDV
) ||
1135 !areBothVectorOrScalar(BDV
, State
.getBaseValue())) &&
1136 "why did it get added?");
1137 assert(!State
.isUnknown() && "Optimistic algorithm didn't complete!");
1138 if (!State
.isConflict())
1141 if (PHINode
*BasePHI
= dyn_cast
<PHINode
>(State
.getBaseValue())) {
1142 PHINode
*PN
= cast
<PHINode
>(BDV
);
1143 const unsigned NumPHIValues
= PN
->getNumIncomingValues();
1145 // The IR verifier requires phi nodes with multiple entries from the
1146 // same basic block to have the same incoming value for each of those
1147 // entries. Since we're inserting bitcasts in the loop, make sure we
1148 // do so at least once per incoming block.
1149 DenseMap
<BasicBlock
*, Value
*> BlockToValue
;
1150 for (unsigned i
= 0; i
< NumPHIValues
; i
++) {
1151 Value
*InVal
= PN
->getIncomingValue(i
);
1152 BasicBlock
*InBB
= PN
->getIncomingBlock(i
);
1153 if (!BlockToValue
.count(InBB
))
1154 BlockToValue
[InBB
] = getBaseForInput(InVal
, InBB
->getTerminator());
1157 Value
*OldBase
= BlockToValue
[InBB
];
1158 Value
*Base
= getBaseForInput(InVal
, nullptr);
1159 // In essence this assert states: the only way two values
1160 // incoming from the same basic block may be different is by
1161 // being different bitcasts of the same value. A cleanup
1162 // that remains TODO is changing findBaseOrBDV to return an
1163 // llvm::Value of the correct type (and still remain pure).
1164 // This will remove the need to add bitcasts.
1165 assert(Base
->stripPointerCasts() == OldBase
->stripPointerCasts() &&
1166 "Sanity -- findBaseOrBDV should be pure!");
1169 Value
*Base
= BlockToValue
[InBB
];
1170 BasePHI
->setIncomingValue(i
, Base
);
1172 } else if (SelectInst
*BaseSI
=
1173 dyn_cast
<SelectInst
>(State
.getBaseValue())) {
1174 SelectInst
*SI
= cast
<SelectInst
>(BDV
);
1176 // Find the instruction which produces the base for each input.
1177 // We may need to insert a bitcast.
1178 BaseSI
->setTrueValue(getBaseForInput(SI
->getTrueValue(), BaseSI
));
1179 BaseSI
->setFalseValue(getBaseForInput(SI
->getFalseValue(), BaseSI
));
1180 } else if (auto *BaseEE
=
1181 dyn_cast
<ExtractElementInst
>(State
.getBaseValue())) {
1182 Value
*InVal
= cast
<ExtractElementInst
>(BDV
)->getVectorOperand();
1183 // Find the instruction which produces the base for each input. We may
1184 // need to insert a bitcast.
1185 BaseEE
->setOperand(0, getBaseForInput(InVal
, BaseEE
));
1186 } else if (auto *BaseIE
= dyn_cast
<InsertElementInst
>(State
.getBaseValue())){
1187 auto *BdvIE
= cast
<InsertElementInst
>(BDV
);
1188 auto UpdateOperand
= [&](int OperandIdx
) {
1189 Value
*InVal
= BdvIE
->getOperand(OperandIdx
);
1190 Value
*Base
= getBaseForInput(InVal
, BaseIE
);
1191 BaseIE
->setOperand(OperandIdx
, Base
);
1193 UpdateOperand(0); // vector operand
1194 UpdateOperand(1); // scalar operand
1196 auto *BaseSV
= cast
<ShuffleVectorInst
>(State
.getBaseValue());
1197 auto *BdvSV
= cast
<ShuffleVectorInst
>(BDV
);
1198 auto UpdateOperand
= [&](int OperandIdx
) {
1199 Value
*InVal
= BdvSV
->getOperand(OperandIdx
);
1200 Value
*Base
= getBaseForInput(InVal
, BaseSV
);
1201 BaseSV
->setOperand(OperandIdx
, Base
);
1203 UpdateOperand(0); // vector operand
1204 if (!BdvSV
->isZeroEltSplat())
1205 UpdateOperand(1); // vector operand
1207 // Never read, so just use undef
1208 Value
*InVal
= BdvSV
->getOperand(1);
1209 BaseSV
->setOperand(1, UndefValue::get(InVal
->getType()));
1218 // Cache all of our results so we can cheaply reuse them
1219 // NOTE: This is actually two caches: one of the base defining value
1220 // relation and one of the base pointer relation! FIXME
1221 for (auto Pair
: States
) {
1222 auto *BDV
= Pair
.first
;
1223 Value
*Base
= Pair
.second
.getBaseValue();
1224 assert(BDV
&& Base
);
1225 // Only values that do not have known bases or those that have differing
1226 // type (scalar versus vector) from a possible known base should be in the
1228 assert((!isKnownBaseResult(BDV
) || !areBothVectorOrScalar(BDV
, Base
)) &&
1229 "why did it get added?");
1232 dbgs() << "Updating base value cache"
1233 << " for: " << BDV
->getName() << " from: "
1234 << (Cache
.count(BDV
) ? Cache
[BDV
]->getName().str() : "none")
1235 << " to: " << Base
->getName() << "\n");
1239 assert(Cache
.count(Def
));
1243 // For a set of live pointers (base and/or derived), identify the base
1244 // pointer of the object which they are derived from. This routine will
1245 // mutate the IR graph as needed to make the 'base' pointer live at the
1246 // definition site of 'derived'. This ensures that any use of 'derived' can
1247 // also use 'base'. This may involve the insertion of a number of
1248 // additional PHI nodes.
1250 // preconditions: live is a set of pointer type Values
1252 // side effects: may insert PHI nodes into the existing CFG, will preserve
1253 // CFG, will not remove or mutate any existing nodes
1255 // post condition: PointerToBase contains one (derived, base) pair for every
1256 // pointer in live. Note that derived can be equal to base if the original
1257 // pointer was a base pointer.
1259 findBasePointers(const StatepointLiveSetTy
&live
,
1260 MapVector
<Value
*, Value
*> &PointerToBase
,
1261 DominatorTree
*DT
, DefiningValueMapTy
&DVCache
) {
1262 for (Value
*ptr
: live
) {
1263 Value
*base
= findBasePointer(ptr
, DVCache
);
1264 assert(base
&& "failed to find base pointer");
1265 PointerToBase
[ptr
] = base
;
1266 assert((!isa
<Instruction
>(base
) || !isa
<Instruction
>(ptr
) ||
1267 DT
->dominates(cast
<Instruction
>(base
)->getParent(),
1268 cast
<Instruction
>(ptr
)->getParent())) &&
1269 "The base we found better dominate the derived pointer");
1273 /// Find the required based pointers (and adjust the live set) for the given
1275 static void findBasePointers(DominatorTree
&DT
, DefiningValueMapTy
&DVCache
,
1277 PartiallyConstructedSafepointRecord
&result
) {
1278 MapVector
<Value
*, Value
*> PointerToBase
;
1279 StatepointLiveSetTy PotentiallyDerivedPointers
= result
.LiveSet
;
1280 // We assume that all pointers passed to deopt are base pointers; as an
1281 // optimization, we can use this to avoid seperately materializing the base
1282 // pointer graph. This is only relevant since we're very conservative about
1283 // generating new conflict nodes during base pointer insertion. If we were
1284 // smarter there, this would be irrelevant.
1285 if (auto Opt
= Call
->getOperandBundle(LLVMContext::OB_deopt
))
1286 for (Value
*V
: Opt
->Inputs
) {
1287 if (!PotentiallyDerivedPointers
.count(V
))
1289 PotentiallyDerivedPointers
.remove(V
);
1290 PointerToBase
[V
] = V
;
1292 findBasePointers(PotentiallyDerivedPointers
, PointerToBase
, &DT
, DVCache
);
1294 if (PrintBasePointers
) {
1295 errs() << "Base Pairs (w/o Relocation):\n";
1296 for (auto &Pair
: PointerToBase
) {
1297 errs() << " derived ";
1298 Pair
.first
->printAsOperand(errs(), false);
1300 Pair
.second
->printAsOperand(errs(), false);
1305 result
.PointerToBase
= PointerToBase
;
1308 /// Given an updated version of the dataflow liveness results, update the
1309 /// liveset and base pointer maps for the call site CS.
1310 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
1312 PartiallyConstructedSafepointRecord
&result
);
1314 static void recomputeLiveInValues(
1315 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
1316 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
) {
1317 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1318 // again. The old values are still live and will help it stabilize quickly.
1319 GCPtrLivenessData RevisedLivenessData
;
1320 computeLiveInValues(DT
, F
, RevisedLivenessData
);
1321 for (size_t i
= 0; i
< records
.size(); i
++) {
1322 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
1323 recomputeLiveInValues(RevisedLivenessData
, toUpdate
[i
], info
);
1327 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1328 // no uses of the original value / return value between the gc.statepoint and
1329 // the gc.relocate / gc.result call. One case which can arise is a phi node
1330 // starting one of the successor blocks. We also need to be able to insert the
1331 // gc.relocates only on the path which goes through the statepoint. We might
1332 // need to split an edge to make this possible.
1334 normalizeForInvokeSafepoint(BasicBlock
*BB
, BasicBlock
*InvokeParent
,
1335 DominatorTree
&DT
) {
1336 BasicBlock
*Ret
= BB
;
1337 if (!BB
->getUniquePredecessor())
1338 Ret
= SplitBlockPredecessors(BB
, InvokeParent
, "", &DT
);
1340 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1342 FoldSingleEntryPHINodes(Ret
);
1343 assert(!isa
<PHINode
>(Ret
->begin()) &&
1344 "All PHI nodes should have been removed!");
1346 // At this point, we can safely insert a gc.relocate or gc.result as the first
1347 // instruction in Ret if needed.
1351 // List of all function attributes which must be stripped when lowering from
1352 // abstract machine model to physical machine model. Essentially, these are
1353 // all the effects a safepoint might have which we ignored in the abstract
1354 // machine model for purposes of optimization. We have to strip these on
1355 // both function declarations and call sites.
1356 static constexpr Attribute::AttrKind FnAttrsToStrip
[] =
1357 {Attribute::ReadNone
, Attribute::ReadOnly
, Attribute::WriteOnly
,
1358 Attribute::ArgMemOnly
, Attribute::InaccessibleMemOnly
,
1359 Attribute::InaccessibleMemOrArgMemOnly
,
1360 Attribute::NoSync
, Attribute::NoFree
};
1362 // List of all parameter and return attributes which must be stripped when
1363 // lowering from the abstract machine model. Note that we list attributes
1364 // here which aren't valid as return attributes, that is okay. There are
1365 // also some additional attributes with arguments which are handled
1366 // explicitly and are not in this list.
1367 static constexpr Attribute::AttrKind ParamAttrsToStrip
[] =
1368 {Attribute::ReadNone
, Attribute::ReadOnly
, Attribute::WriteOnly
,
1369 Attribute::NoAlias
, Attribute::NoFree
};
1372 // Create new attribute set containing only attributes which can be transferred
1373 // from original call to the safepoint.
1374 static AttributeList
legalizeCallAttributes(LLVMContext
&Ctx
,
1379 // Remove the readonly, readnone, and statepoint function attributes.
1380 AttrBuilder FnAttrs
= AL
.getFnAttrs();
1381 for (auto Attr
: FnAttrsToStrip
)
1382 FnAttrs
.removeAttribute(Attr
);
1384 for (Attribute A
: AL
.getFnAttrs()) {
1385 if (isStatepointDirectiveAttr(A
))
1389 // Just skip parameter and return attributes for now
1390 return AttributeList::get(Ctx
, AttributeList::FunctionIndex
,
1391 AttributeSet::get(Ctx
, FnAttrs
));
1394 /// Helper function to place all gc relocates necessary for the given
1397 /// liveVariables - list of variables to be relocated.
1398 /// basePtrs - base pointers.
1399 /// statepointToken - statepoint instruction to which relocates should be
1401 /// Builder - Llvm IR builder to be used to construct new calls.
1402 static void CreateGCRelocates(ArrayRef
<Value
*> LiveVariables
,
1403 ArrayRef
<Value
*> BasePtrs
,
1404 Instruction
*StatepointToken
,
1405 IRBuilder
<> &Builder
) {
1406 if (LiveVariables
.empty())
1409 auto FindIndex
= [](ArrayRef
<Value
*> LiveVec
, Value
*Val
) {
1410 auto ValIt
= llvm::find(LiveVec
, Val
);
1411 assert(ValIt
!= LiveVec
.end() && "Val not found in LiveVec!");
1412 size_t Index
= std::distance(LiveVec
.begin(), ValIt
);
1413 assert(Index
< LiveVec
.size() && "Bug in std::find?");
1416 Module
*M
= StatepointToken
->getModule();
1418 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1419 // element type is i8 addrspace(1)*). We originally generated unique
1420 // declarations for each pointer type, but this proved problematic because
1421 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1422 // towards a single unified pointer type anyways, we can just cast everything
1423 // to an i8* of the right address space. A bitcast is added later to convert
1424 // gc_relocate to the actual value's type.
1425 auto getGCRelocateDecl
= [&] (Type
*Ty
) {
1426 assert(isHandledGCPointerType(Ty
));
1427 auto AS
= Ty
->getScalarType()->getPointerAddressSpace();
1428 Type
*NewTy
= Type::getInt8PtrTy(M
->getContext(), AS
);
1429 if (auto *VT
= dyn_cast
<VectorType
>(Ty
))
1430 NewTy
= FixedVectorType::get(NewTy
,
1431 cast
<FixedVectorType
>(VT
)->getNumElements());
1432 return Intrinsic::getDeclaration(M
, Intrinsic::experimental_gc_relocate
,
1436 // Lazily populated map from input types to the canonicalized form mentioned
1437 // in the comment above. This should probably be cached somewhere more
1439 DenseMap
<Type
*, Function
*> TypeToDeclMap
;
1441 for (unsigned i
= 0; i
< LiveVariables
.size(); i
++) {
1442 // Generate the gc.relocate call and save the result
1443 Value
*BaseIdx
= Builder
.getInt32(FindIndex(LiveVariables
, BasePtrs
[i
]));
1444 Value
*LiveIdx
= Builder
.getInt32(i
);
1446 Type
*Ty
= LiveVariables
[i
]->getType();
1447 if (!TypeToDeclMap
.count(Ty
))
1448 TypeToDeclMap
[Ty
] = getGCRelocateDecl(Ty
);
1449 Function
*GCRelocateDecl
= TypeToDeclMap
[Ty
];
1451 // only specify a debug name if we can give a useful one
1452 CallInst
*Reloc
= Builder
.CreateCall(
1453 GCRelocateDecl
, {StatepointToken
, BaseIdx
, LiveIdx
},
1454 suffixed_name_or(LiveVariables
[i
], ".relocated", ""));
1455 // Trick CodeGen into thinking there are lots of free registers at this
1457 Reloc
->setCallingConv(CallingConv::Cold
);
1463 /// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1464 /// avoids having to worry about keeping around dangling pointers to Values.
1465 class DeferredReplacement
{
1466 AssertingVH
<Instruction
> Old
;
1467 AssertingVH
<Instruction
> New
;
1468 bool IsDeoptimize
= false;
1470 DeferredReplacement() = default;
1473 static DeferredReplacement
createRAUW(Instruction
*Old
, Instruction
*New
) {
1474 assert(Old
!= New
&& Old
&& New
&&
1475 "Cannot RAUW equal values or to / from null!");
1477 DeferredReplacement D
;
1483 static DeferredReplacement
createDelete(Instruction
*ToErase
) {
1484 DeferredReplacement D
;
1489 static DeferredReplacement
createDeoptimizeReplacement(Instruction
*Old
) {
1491 auto *F
= cast
<CallInst
>(Old
)->getCalledFunction();
1492 assert(F
&& F
->getIntrinsicID() == Intrinsic::experimental_deoptimize
&&
1493 "Only way to construct a deoptimize deferred replacement");
1495 DeferredReplacement D
;
1497 D
.IsDeoptimize
= true;
1501 /// Does the task represented by this instance.
1502 void doReplacement() {
1503 Instruction
*OldI
= Old
;
1504 Instruction
*NewI
= New
;
1506 assert(OldI
!= NewI
&& "Disallowed at construction?!");
1507 assert((!IsDeoptimize
|| !New
) &&
1508 "Deoptimize intrinsics are not replaced!");
1514 OldI
->replaceAllUsesWith(NewI
);
1517 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1518 // not necessarily be followed by the matching return.
1519 auto *RI
= cast
<ReturnInst
>(OldI
->getParent()->getTerminator());
1520 new UnreachableInst(RI
->getContext(), RI
);
1521 RI
->eraseFromParent();
1524 OldI
->eraseFromParent();
1528 } // end anonymous namespace
1530 static StringRef
getDeoptLowering(CallBase
*Call
) {
1531 const char *DeoptLowering
= "deopt-lowering";
1532 if (Call
->hasFnAttr(DeoptLowering
)) {
1533 // FIXME: Calls have a *really* confusing interface around attributes
1535 const AttributeList
&CSAS
= Call
->getAttributes();
1536 if (CSAS
.hasFnAttr(DeoptLowering
))
1537 return CSAS
.getFnAttr(DeoptLowering
).getValueAsString();
1538 Function
*F
= Call
->getCalledFunction();
1539 assert(F
&& F
->hasFnAttribute(DeoptLowering
));
1540 return F
->getFnAttribute(DeoptLowering
).getValueAsString();
1542 return "live-through";
1546 makeStatepointExplicitImpl(CallBase
*Call
, /* to replace */
1547 const SmallVectorImpl
<Value
*> &BasePtrs
,
1548 const SmallVectorImpl
<Value
*> &LiveVariables
,
1549 PartiallyConstructedSafepointRecord
&Result
,
1550 std::vector
<DeferredReplacement
> &Replacements
) {
1551 assert(BasePtrs
.size() == LiveVariables
.size());
1553 // Then go ahead and use the builder do actually do the inserts. We insert
1554 // immediately before the previous instruction under the assumption that all
1555 // arguments will be available here. We can't insert afterwards since we may
1556 // be replacing a terminator.
1557 IRBuilder
<> Builder(Call
);
1559 ArrayRef
<Value
*> GCArgs(LiveVariables
);
1560 uint64_t StatepointID
= StatepointDirectives::DefaultStatepointID
;
1561 uint32_t NumPatchBytes
= 0;
1562 uint32_t Flags
= uint32_t(StatepointFlags::None
);
1564 SmallVector
<Value
*, 8> CallArgs(Call
->args());
1565 Optional
<ArrayRef
<Use
>> DeoptArgs
;
1566 if (auto Bundle
= Call
->getOperandBundle(LLVMContext::OB_deopt
))
1567 DeoptArgs
= Bundle
->Inputs
;
1568 Optional
<ArrayRef
<Use
>> TransitionArgs
;
1569 if (auto Bundle
= Call
->getOperandBundle(LLVMContext::OB_gc_transition
)) {
1570 TransitionArgs
= Bundle
->Inputs
;
1571 // TODO: This flag no longer serves a purpose and can be removed later
1572 Flags
|= uint32_t(StatepointFlags::GCTransition
);
1575 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1576 // with a return value, we lower then as never returning calls to
1577 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1578 bool IsDeoptimize
= false;
1580 StatepointDirectives SD
=
1581 parseStatepointDirectivesFromAttrs(Call
->getAttributes());
1582 if (SD
.NumPatchBytes
)
1583 NumPatchBytes
= *SD
.NumPatchBytes
;
1584 if (SD
.StatepointID
)
1585 StatepointID
= *SD
.StatepointID
;
1587 // Pass through the requested lowering if any. The default is live-through.
1588 StringRef DeoptLowering
= getDeoptLowering(Call
);
1589 if (DeoptLowering
.equals("live-in"))
1590 Flags
|= uint32_t(StatepointFlags::DeoptLiveIn
);
1592 assert(DeoptLowering
.equals("live-through") && "Unsupported value!");
1595 Value
*CallTarget
= Call
->getCalledOperand();
1596 if (Function
*F
= dyn_cast
<Function
>(CallTarget
)) {
1597 auto IID
= F
->getIntrinsicID();
1598 if (IID
== Intrinsic::experimental_deoptimize
) {
1599 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1600 // __llvm_deoptimize symbol. We want to resolve this now, since the
1601 // verifier does not allow taking the address of an intrinsic function.
1603 SmallVector
<Type
*, 8> DomainTy
;
1604 for (Value
*Arg
: CallArgs
)
1605 DomainTy
.push_back(Arg
->getType());
1606 auto *FTy
= FunctionType::get(Type::getVoidTy(F
->getContext()), DomainTy
,
1607 /* isVarArg = */ false);
1609 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1610 // calls to @llvm.experimental.deoptimize with different argument types in
1611 // the same module. This is fine -- we assume the frontend knew what it
1612 // was doing when generating this kind of IR.
1613 CallTarget
= F
->getParent()
1614 ->getOrInsertFunction("__llvm_deoptimize", FTy
)
1617 IsDeoptimize
= true;
1618 } else if (IID
== Intrinsic::memcpy_element_unordered_atomic
||
1619 IID
== Intrinsic::memmove_element_unordered_atomic
) {
1620 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1621 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1622 // Specifically, these calls should be lowered to the
1623 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1624 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1625 // verifier does not allow taking the address of an intrinsic function.
1627 // Moreover we need to shuffle the arguments for the call in order to
1628 // accommodate GC. The underlying source and destination objects might be
1629 // relocated during copy operation should the GC occur. To relocate the
1630 // derived source and destination pointers the implementation of the
1631 // intrinsic should know the corresponding base pointers.
1633 // To make the base pointers available pass them explicitly as arguments:
1634 // memcpy(dest_derived, source_derived, ...) =>
1635 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1636 auto &Context
= Call
->getContext();
1637 auto &DL
= Call
->getModule()->getDataLayout();
1638 auto GetBaseAndOffset
= [&](Value
*Derived
) {
1639 assert(Result
.PointerToBase
.count(Derived
));
1640 unsigned AddressSpace
= Derived
->getType()->getPointerAddressSpace();
1641 unsigned IntPtrSize
= DL
.getPointerSizeInBits(AddressSpace
);
1642 Value
*Base
= Result
.PointerToBase
.find(Derived
)->second
;
1643 Value
*Base_int
= Builder
.CreatePtrToInt(
1644 Base
, Type::getIntNTy(Context
, IntPtrSize
));
1645 Value
*Derived_int
= Builder
.CreatePtrToInt(
1646 Derived
, Type::getIntNTy(Context
, IntPtrSize
));
1647 return std::make_pair(Base
, Builder
.CreateSub(Derived_int
, Base_int
));
1650 auto *Dest
= CallArgs
[0];
1651 Value
*DestBase
, *DestOffset
;
1652 std::tie(DestBase
, DestOffset
) = GetBaseAndOffset(Dest
);
1654 auto *Source
= CallArgs
[1];
1655 Value
*SourceBase
, *SourceOffset
;
1656 std::tie(SourceBase
, SourceOffset
) = GetBaseAndOffset(Source
);
1658 auto *LengthInBytes
= CallArgs
[2];
1659 auto *ElementSizeCI
= cast
<ConstantInt
>(CallArgs
[3]);
1662 CallArgs
.push_back(DestBase
);
1663 CallArgs
.push_back(DestOffset
);
1664 CallArgs
.push_back(SourceBase
);
1665 CallArgs
.push_back(SourceOffset
);
1666 CallArgs
.push_back(LengthInBytes
);
1668 SmallVector
<Type
*, 8> DomainTy
;
1669 for (Value
*Arg
: CallArgs
)
1670 DomainTy
.push_back(Arg
->getType());
1671 auto *FTy
= FunctionType::get(Type::getVoidTy(F
->getContext()), DomainTy
,
1672 /* isVarArg = */ false);
1674 auto GetFunctionName
= [](Intrinsic::ID IID
, ConstantInt
*ElementSizeCI
) {
1675 uint64_t ElementSize
= ElementSizeCI
->getZExtValue();
1676 if (IID
== Intrinsic::memcpy_element_unordered_atomic
) {
1677 switch (ElementSize
) {
1679 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1681 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1683 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1685 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1687 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1689 llvm_unreachable("unexpected element size!");
1692 assert(IID
== Intrinsic::memmove_element_unordered_atomic
);
1693 switch (ElementSize
) {
1695 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1697 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1699 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1701 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1703 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1705 llvm_unreachable("unexpected element size!");
1711 ->getOrInsertFunction(GetFunctionName(IID
, ElementSizeCI
), FTy
)
1716 // Create the statepoint given all the arguments
1717 GCStatepointInst
*Token
= nullptr;
1718 if (auto *CI
= dyn_cast
<CallInst
>(Call
)) {
1719 CallInst
*SPCall
= Builder
.CreateGCStatepointCall(
1720 StatepointID
, NumPatchBytes
, CallTarget
, Flags
, CallArgs
,
1721 TransitionArgs
, DeoptArgs
, GCArgs
, "safepoint_token");
1723 SPCall
->setTailCallKind(CI
->getTailCallKind());
1724 SPCall
->setCallingConv(CI
->getCallingConv());
1726 // Currently we will fail on parameter attributes and on certain
1727 // function attributes. In case if we can handle this set of attributes -
1728 // set up function attrs directly on statepoint and return attrs later for
1729 // gc_result intrinsic.
1730 SPCall
->setAttributes(
1731 legalizeCallAttributes(CI
->getContext(), CI
->getAttributes()));
1733 Token
= cast
<GCStatepointInst
>(SPCall
);
1735 // Put the following gc_result and gc_relocate calls immediately after the
1736 // the old call (which we're about to delete)
1737 assert(CI
->getNextNode() && "Not a terminator, must have next!");
1738 Builder
.SetInsertPoint(CI
->getNextNode());
1739 Builder
.SetCurrentDebugLocation(CI
->getNextNode()->getDebugLoc());
1741 auto *II
= cast
<InvokeInst
>(Call
);
1743 // Insert the new invoke into the old block. We'll remove the old one in a
1744 // moment at which point this will become the new terminator for the
1746 InvokeInst
*SPInvoke
= Builder
.CreateGCStatepointInvoke(
1747 StatepointID
, NumPatchBytes
, CallTarget
, II
->getNormalDest(),
1748 II
->getUnwindDest(), Flags
, CallArgs
, TransitionArgs
, DeoptArgs
, GCArgs
,
1749 "statepoint_token");
1751 SPInvoke
->setCallingConv(II
->getCallingConv());
1753 // Currently we will fail on parameter attributes and on certain
1754 // function attributes. In case if we can handle this set of attributes -
1755 // set up function attrs directly on statepoint and return attrs later for
1756 // gc_result intrinsic.
1757 SPInvoke
->setAttributes(
1758 legalizeCallAttributes(II
->getContext(), II
->getAttributes()));
1760 Token
= cast
<GCStatepointInst
>(SPInvoke
);
1762 // Generate gc relocates in exceptional path
1763 BasicBlock
*UnwindBlock
= II
->getUnwindDest();
1764 assert(!isa
<PHINode
>(UnwindBlock
->begin()) &&
1765 UnwindBlock
->getUniquePredecessor() &&
1766 "can't safely insert in this block!");
1768 Builder
.SetInsertPoint(&*UnwindBlock
->getFirstInsertionPt());
1769 Builder
.SetCurrentDebugLocation(II
->getDebugLoc());
1771 // Attach exceptional gc relocates to the landingpad.
1772 Instruction
*ExceptionalToken
= UnwindBlock
->getLandingPadInst();
1773 Result
.UnwindToken
= ExceptionalToken
;
1775 CreateGCRelocates(LiveVariables
, BasePtrs
, ExceptionalToken
, Builder
);
1777 // Generate gc relocates and returns for normal block
1778 BasicBlock
*NormalDest
= II
->getNormalDest();
1779 assert(!isa
<PHINode
>(NormalDest
->begin()) &&
1780 NormalDest
->getUniquePredecessor() &&
1781 "can't safely insert in this block!");
1783 Builder
.SetInsertPoint(&*NormalDest
->getFirstInsertionPt());
1785 // gc relocates will be generated later as if it were regular call
1788 assert(Token
&& "Should be set in one of the above branches!");
1791 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1792 // transform the tail-call like structure to a call to a void function
1793 // followed by unreachable to get better codegen.
1794 Replacements
.push_back(
1795 DeferredReplacement::createDeoptimizeReplacement(Call
));
1797 Token
->setName("statepoint_token");
1798 if (!Call
->getType()->isVoidTy() && !Call
->use_empty()) {
1799 StringRef Name
= Call
->hasName() ? Call
->getName() : "";
1800 CallInst
*GCResult
= Builder
.CreateGCResult(Token
, Call
->getType(), Name
);
1801 GCResult
->setAttributes(
1802 AttributeList::get(GCResult
->getContext(), AttributeList::ReturnIndex
,
1803 Call
->getAttributes().getRetAttrs()));
1805 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1806 // live set of some other safepoint, in which case that safepoint's
1807 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1808 // llvm::Instruction. Instead, we defer the replacement and deletion to
1809 // after the live sets have been made explicit in the IR, and we no longer
1810 // have raw pointers to worry about.
1811 Replacements
.emplace_back(
1812 DeferredReplacement::createRAUW(Call
, GCResult
));
1814 Replacements
.emplace_back(DeferredReplacement::createDelete(Call
));
1818 Result
.StatepointToken
= Token
;
1820 // Second, create a gc.relocate for every live variable
1821 CreateGCRelocates(LiveVariables
, BasePtrs
, Token
, Builder
);
1824 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1825 // which make the relocations happening at this safepoint explicit.
1827 // WARNING: Does not do any fixup to adjust users of the original live
1828 // values. That's the callers responsibility.
1830 makeStatepointExplicit(DominatorTree
&DT
, CallBase
*Call
,
1831 PartiallyConstructedSafepointRecord
&Result
,
1832 std::vector
<DeferredReplacement
> &Replacements
) {
1833 const auto &LiveSet
= Result
.LiveSet
;
1834 const auto &PointerToBase
= Result
.PointerToBase
;
1836 // Convert to vector for efficient cross referencing.
1837 SmallVector
<Value
*, 64> BaseVec
, LiveVec
;
1838 LiveVec
.reserve(LiveSet
.size());
1839 BaseVec
.reserve(LiveSet
.size());
1840 for (Value
*L
: LiveSet
) {
1841 LiveVec
.push_back(L
);
1842 assert(PointerToBase
.count(L
));
1843 Value
*Base
= PointerToBase
.find(L
)->second
;
1844 BaseVec
.push_back(Base
);
1846 assert(LiveVec
.size() == BaseVec
.size());
1848 // Do the actual rewriting and delete the old statepoint
1849 makeStatepointExplicitImpl(Call
, BaseVec
, LiveVec
, Result
, Replacements
);
1852 // Helper function for the relocationViaAlloca.
1854 // It receives iterator to the statepoint gc relocates and emits a store to the
1855 // assigned location (via allocaMap) for the each one of them. It adds the
1856 // visited values into the visitedLiveValues set, which we will later use them
1857 // for sanity checking.
1859 insertRelocationStores(iterator_range
<Value::user_iterator
> GCRelocs
,
1860 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1861 DenseSet
<Value
*> &VisitedLiveValues
) {
1862 for (User
*U
: GCRelocs
) {
1863 GCRelocateInst
*Relocate
= dyn_cast
<GCRelocateInst
>(U
);
1867 Value
*OriginalValue
= Relocate
->getDerivedPtr();
1868 assert(AllocaMap
.count(OriginalValue
));
1869 Value
*Alloca
= AllocaMap
[OriginalValue
];
1871 // Emit store into the related alloca
1872 // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
1873 // the correct type according to alloca.
1874 assert(Relocate
->getNextNode() &&
1875 "Should always have one since it's not a terminator");
1876 IRBuilder
<> Builder(Relocate
->getNextNode());
1877 Value
*CastedRelocatedValue
=
1878 Builder
.CreateBitCast(Relocate
,
1879 cast
<AllocaInst
>(Alloca
)->getAllocatedType(),
1880 suffixed_name_or(Relocate
, ".casted", ""));
1882 new StoreInst(CastedRelocatedValue
, Alloca
,
1883 cast
<Instruction
>(CastedRelocatedValue
)->getNextNode());
1886 VisitedLiveValues
.insert(OriginalValue
);
1891 // Helper function for the "relocationViaAlloca". Similar to the
1892 // "insertRelocationStores" but works for rematerialized values.
1893 static void insertRematerializationStores(
1894 const RematerializedValueMapTy
&RematerializedValues
,
1895 DenseMap
<Value
*, AllocaInst
*> &AllocaMap
,
1896 DenseSet
<Value
*> &VisitedLiveValues
) {
1897 for (auto RematerializedValuePair
: RematerializedValues
) {
1898 Instruction
*RematerializedValue
= RematerializedValuePair
.first
;
1899 Value
*OriginalValue
= RematerializedValuePair
.second
;
1901 assert(AllocaMap
.count(OriginalValue
) &&
1902 "Can not find alloca for rematerialized value");
1903 Value
*Alloca
= AllocaMap
[OriginalValue
];
1905 new StoreInst(RematerializedValue
, Alloca
,
1906 RematerializedValue
->getNextNode());
1909 VisitedLiveValues
.insert(OriginalValue
);
1914 /// Do all the relocation update via allocas and mem2reg
1915 static void relocationViaAlloca(
1916 Function
&F
, DominatorTree
&DT
, ArrayRef
<Value
*> Live
,
1917 ArrayRef
<PartiallyConstructedSafepointRecord
> Records
) {
1919 // record initial number of (static) allocas; we'll check we have the same
1920 // number when we get done.
1921 int InitialAllocaNum
= 0;
1922 for (Instruction
&I
: F
.getEntryBlock())
1923 if (isa
<AllocaInst
>(I
))
1927 // TODO-PERF: change data structures, reserve
1928 DenseMap
<Value
*, AllocaInst
*> AllocaMap
;
1929 SmallVector
<AllocaInst
*, 200> PromotableAllocas
;
1930 // Used later to chack that we have enough allocas to store all values
1931 std::size_t NumRematerializedValues
= 0;
1932 PromotableAllocas
.reserve(Live
.size());
1934 // Emit alloca for "LiveValue" and record it in "allocaMap" and
1935 // "PromotableAllocas"
1936 const DataLayout
&DL
= F
.getParent()->getDataLayout();
1937 auto emitAllocaFor
= [&](Value
*LiveValue
) {
1938 AllocaInst
*Alloca
= new AllocaInst(LiveValue
->getType(),
1939 DL
.getAllocaAddrSpace(), "",
1940 F
.getEntryBlock().getFirstNonPHI());
1941 AllocaMap
[LiveValue
] = Alloca
;
1942 PromotableAllocas
.push_back(Alloca
);
1945 // Emit alloca for each live gc pointer
1946 for (Value
*V
: Live
)
1949 // Emit allocas for rematerialized values
1950 for (const auto &Info
: Records
)
1951 for (auto RematerializedValuePair
: Info
.RematerializedValues
) {
1952 Value
*OriginalValue
= RematerializedValuePair
.second
;
1953 if (AllocaMap
.count(OriginalValue
) != 0)
1956 emitAllocaFor(OriginalValue
);
1957 ++NumRematerializedValues
;
1960 // The next two loops are part of the same conceptual operation. We need to
1961 // insert a store to the alloca after the original def and at each
1962 // redefinition. We need to insert a load before each use. These are split
1963 // into distinct loops for performance reasons.
1965 // Update gc pointer after each statepoint: either store a relocated value or
1966 // null (if no relocated value was found for this gc pointer and it is not a
1967 // gc_result). This must happen before we update the statepoint with load of
1968 // alloca otherwise we lose the link between statepoint and old def.
1969 for (const auto &Info
: Records
) {
1970 Value
*Statepoint
= Info
.StatepointToken
;
1972 // This will be used for consistency check
1973 DenseSet
<Value
*> VisitedLiveValues
;
1975 // Insert stores for normal statepoint gc relocates
1976 insertRelocationStores(Statepoint
->users(), AllocaMap
, VisitedLiveValues
);
1978 // In case if it was invoke statepoint
1979 // we will insert stores for exceptional path gc relocates.
1980 if (isa
<InvokeInst
>(Statepoint
)) {
1981 insertRelocationStores(Info
.UnwindToken
->users(), AllocaMap
,
1985 // Do similar thing with rematerialized values
1986 insertRematerializationStores(Info
.RematerializedValues
, AllocaMap
,
1989 if (ClobberNonLive
) {
1990 // As a debugging aid, pretend that an unrelocated pointer becomes null at
1991 // the gc.statepoint. This will turn some subtle GC problems into
1992 // slightly easier to debug SEGVs. Note that on large IR files with
1993 // lots of gc.statepoints this is extremely costly both memory and time
1995 SmallVector
<AllocaInst
*, 64> ToClobber
;
1996 for (auto Pair
: AllocaMap
) {
1997 Value
*Def
= Pair
.first
;
1998 AllocaInst
*Alloca
= Pair
.second
;
2000 // This value was relocated
2001 if (VisitedLiveValues
.count(Def
)) {
2004 ToClobber
.push_back(Alloca
);
2007 auto InsertClobbersAt
= [&](Instruction
*IP
) {
2008 for (auto *AI
: ToClobber
) {
2009 auto PT
= cast
<PointerType
>(AI
->getAllocatedType());
2010 Constant
*CPN
= ConstantPointerNull::get(PT
);
2011 new StoreInst(CPN
, AI
, IP
);
2015 // Insert the clobbering stores. These may get intermixed with the
2016 // gc.results and gc.relocates, but that's fine.
2017 if (auto II
= dyn_cast
<InvokeInst
>(Statepoint
)) {
2018 InsertClobbersAt(&*II
->getNormalDest()->getFirstInsertionPt());
2019 InsertClobbersAt(&*II
->getUnwindDest()->getFirstInsertionPt());
2021 InsertClobbersAt(cast
<Instruction
>(Statepoint
)->getNextNode());
2026 // Update use with load allocas and add store for gc_relocated.
2027 for (auto Pair
: AllocaMap
) {
2028 Value
*Def
= Pair
.first
;
2029 AllocaInst
*Alloca
= Pair
.second
;
2031 // We pre-record the uses of allocas so that we dont have to worry about
2032 // later update that changes the user information..
2034 SmallVector
<Instruction
*, 20> Uses
;
2035 // PERF: trade a linear scan for repeated reallocation
2036 Uses
.reserve(Def
->getNumUses());
2037 for (User
*U
: Def
->users()) {
2038 if (!isa
<ConstantExpr
>(U
)) {
2039 // If the def has a ConstantExpr use, then the def is either a
2040 // ConstantExpr use itself or null. In either case
2041 // (recursively in the first, directly in the second), the oop
2042 // it is ultimately dependent on is null and this particular
2043 // use does not need to be fixed up.
2044 Uses
.push_back(cast
<Instruction
>(U
));
2049 auto Last
= std::unique(Uses
.begin(), Uses
.end());
2050 Uses
.erase(Last
, Uses
.end());
2052 for (Instruction
*Use
: Uses
) {
2053 if (isa
<PHINode
>(Use
)) {
2054 PHINode
*Phi
= cast
<PHINode
>(Use
);
2055 for (unsigned i
= 0; i
< Phi
->getNumIncomingValues(); i
++) {
2056 if (Def
== Phi
->getIncomingValue(i
)) {
2058 new LoadInst(Alloca
->getAllocatedType(), Alloca
, "",
2059 Phi
->getIncomingBlock(i
)->getTerminator());
2060 Phi
->setIncomingValue(i
, Load
);
2065 new LoadInst(Alloca
->getAllocatedType(), Alloca
, "", Use
);
2066 Use
->replaceUsesOfWith(Def
, Load
);
2070 // Emit store for the initial gc value. Store must be inserted after load,
2071 // otherwise store will be in alloca's use list and an extra load will be
2072 // inserted before it.
2073 StoreInst
*Store
= new StoreInst(Def
, Alloca
, /*volatile*/ false,
2074 DL
.getABITypeAlign(Def
->getType()));
2075 if (Instruction
*Inst
= dyn_cast
<Instruction
>(Def
)) {
2076 if (InvokeInst
*Invoke
= dyn_cast
<InvokeInst
>(Inst
)) {
2077 // InvokeInst is a terminator so the store need to be inserted into its
2078 // normal destination block.
2079 BasicBlock
*NormalDest
= Invoke
->getNormalDest();
2080 Store
->insertBefore(NormalDest
->getFirstNonPHI());
2082 assert(!Inst
->isTerminator() &&
2083 "The only terminator that can produce a value is "
2084 "InvokeInst which is handled above.");
2085 Store
->insertAfter(Inst
);
2088 assert(isa
<Argument
>(Def
));
2089 Store
->insertAfter(cast
<Instruction
>(Alloca
));
2093 assert(PromotableAllocas
.size() == Live
.size() + NumRematerializedValues
&&
2094 "we must have the same allocas with lives");
2095 if (!PromotableAllocas
.empty()) {
2096 // Apply mem2reg to promote alloca to SSA
2097 PromoteMemToReg(PromotableAllocas
, DT
);
2101 for (auto &I
: F
.getEntryBlock())
2102 if (isa
<AllocaInst
>(I
))
2104 assert(InitialAllocaNum
== 0 && "We must not introduce any extra allocas");
2108 /// Implement a unique function which doesn't require we sort the input
2109 /// vector. Doing so has the effect of changing the output of a couple of
2110 /// tests in ways which make them less useful in testing fused safepoints.
2111 template <typename T
> static void unique_unsorted(SmallVectorImpl
<T
> &Vec
) {
2112 SmallSet
<T
, 8> Seen
;
2113 erase_if(Vec
, [&](const T
&V
) { return !Seen
.insert(V
).second
; });
2116 /// Insert holders so that each Value is obviously live through the entire
2117 /// lifetime of the call.
2118 static void insertUseHolderAfter(CallBase
*Call
, const ArrayRef
<Value
*> Values
,
2119 SmallVectorImpl
<CallInst
*> &Holders
) {
2121 // No values to hold live, might as well not insert the empty holder
2124 Module
*M
= Call
->getModule();
2125 // Use a dummy vararg function to actually hold the values live
2126 FunctionCallee Func
= M
->getOrInsertFunction(
2127 "__tmp_use", FunctionType::get(Type::getVoidTy(M
->getContext()), true));
2128 if (isa
<CallInst
>(Call
)) {
2129 // For call safepoints insert dummy calls right after safepoint
2131 CallInst::Create(Func
, Values
, "", &*++Call
->getIterator()));
2134 // For invoke safepooints insert dummy calls both in normal and
2135 // exceptional destination blocks
2136 auto *II
= cast
<InvokeInst
>(Call
);
2137 Holders
.push_back(CallInst::Create(
2138 Func
, Values
, "", &*II
->getNormalDest()->getFirstInsertionPt()));
2139 Holders
.push_back(CallInst::Create(
2140 Func
, Values
, "", &*II
->getUnwindDest()->getFirstInsertionPt()));
2143 static void findLiveReferences(
2144 Function
&F
, DominatorTree
&DT
, ArrayRef
<CallBase
*> toUpdate
,
2145 MutableArrayRef
<struct PartiallyConstructedSafepointRecord
> records
) {
2146 GCPtrLivenessData OriginalLivenessData
;
2147 computeLiveInValues(DT
, F
, OriginalLivenessData
);
2148 for (size_t i
= 0; i
< records
.size(); i
++) {
2149 struct PartiallyConstructedSafepointRecord
&info
= records
[i
];
2150 analyzeParsePointLiveness(DT
, OriginalLivenessData
, toUpdate
[i
], info
);
2154 // Helper function for the "rematerializeLiveValues". It walks use chain
2155 // starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2156 // the base or a value it cannot process. Only "simple" values are processed
2157 // (currently it is GEP's and casts). The returned root is examined by the
2158 // callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2159 // with all visited values.
2160 static Value
* findRematerializableChainToBasePointer(
2161 SmallVectorImpl
<Instruction
*> &ChainToBase
,
2162 Value
*CurrentValue
) {
2163 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(CurrentValue
)) {
2164 ChainToBase
.push_back(GEP
);
2165 return findRematerializableChainToBasePointer(ChainToBase
,
2166 GEP
->getPointerOperand());
2169 if (CastInst
*CI
= dyn_cast
<CastInst
>(CurrentValue
)) {
2170 if (!CI
->isNoopCast(CI
->getModule()->getDataLayout()))
2173 ChainToBase
.push_back(CI
);
2174 return findRematerializableChainToBasePointer(ChainToBase
,
2178 // We have reached the root of the chain, which is either equal to the base or
2179 // is the first unsupported value along the use chain.
2180 return CurrentValue
;
2183 // Helper function for the "rematerializeLiveValues". Compute cost of the use
2184 // chain we are going to rematerialize.
2185 static InstructionCost
2186 chainToBasePointerCost(SmallVectorImpl
<Instruction
*> &Chain
,
2187 TargetTransformInfo
&TTI
) {
2188 InstructionCost Cost
= 0;
2190 for (Instruction
*Instr
: Chain
) {
2191 if (CastInst
*CI
= dyn_cast
<CastInst
>(Instr
)) {
2192 assert(CI
->isNoopCast(CI
->getModule()->getDataLayout()) &&
2193 "non noop cast is found during rematerialization");
2195 Type
*SrcTy
= CI
->getOperand(0)->getType();
2196 Cost
+= TTI
.getCastInstrCost(CI
->getOpcode(), CI
->getType(), SrcTy
,
2197 TTI::getCastContextHint(CI
),
2198 TargetTransformInfo::TCK_SizeAndLatency
, CI
);
2200 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(Instr
)) {
2201 // Cost of the address calculation
2202 Type
*ValTy
= GEP
->getSourceElementType();
2203 Cost
+= TTI
.getAddressComputationCost(ValTy
);
2205 // And cost of the GEP itself
2206 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2207 // allowed for the external usage)
2208 if (!GEP
->hasAllConstantIndices())
2212 llvm_unreachable("unsupported instruction type during rematerialization");
2219 static bool AreEquivalentPhiNodes(PHINode
&OrigRootPhi
, PHINode
&AlternateRootPhi
) {
2220 unsigned PhiNum
= OrigRootPhi
.getNumIncomingValues();
2221 if (PhiNum
!= AlternateRootPhi
.getNumIncomingValues() ||
2222 OrigRootPhi
.getParent() != AlternateRootPhi
.getParent())
2224 // Map of incoming values and their corresponding basic blocks of
2226 SmallDenseMap
<Value
*, BasicBlock
*, 8> CurrentIncomingValues
;
2227 for (unsigned i
= 0; i
< PhiNum
; i
++)
2228 CurrentIncomingValues
[OrigRootPhi
.getIncomingValue(i
)] =
2229 OrigRootPhi
.getIncomingBlock(i
);
2231 // Both current and base PHIs should have same incoming values and
2232 // the same basic blocks corresponding to the incoming values.
2233 for (unsigned i
= 0; i
< PhiNum
; i
++) {
2235 CurrentIncomingValues
.find(AlternateRootPhi
.getIncomingValue(i
));
2236 if (CIVI
== CurrentIncomingValues
.end())
2238 BasicBlock
*CurrentIncomingBB
= CIVI
->second
;
2239 if (CurrentIncomingBB
!= AlternateRootPhi
.getIncomingBlock(i
))
2245 // From the statepoint live set pick values that are cheaper to recompute then
2246 // to relocate. Remove this values from the live set, rematerialize them after
2247 // statepoint and record them in "Info" structure. Note that similar to
2248 // relocated values we don't do any user adjustments here.
2249 static void rematerializeLiveValues(CallBase
*Call
,
2250 PartiallyConstructedSafepointRecord
&Info
,
2251 TargetTransformInfo
&TTI
) {
2252 const unsigned int ChainLengthThreshold
= 10;
2254 // Record values we are going to delete from this statepoint live set.
2255 // We can not di this in following loop due to iterator invalidation.
2256 SmallVector
<Value
*, 32> LiveValuesToBeDeleted
;
2258 for (Value
*LiveValue
: Info
.LiveSet
) {
2259 // For each live pointer find its defining chain
2260 SmallVector
<Instruction
*, 3> ChainToBase
;
2261 assert(Info
.PointerToBase
.count(LiveValue
));
2262 Value
*RootOfChain
=
2263 findRematerializableChainToBasePointer(ChainToBase
,
2266 // Nothing to do, or chain is too long
2267 if ( ChainToBase
.size() == 0 ||
2268 ChainToBase
.size() > ChainLengthThreshold
)
2271 // Handle the scenario where the RootOfChain is not equal to the
2272 // Base Value, but they are essentially the same phi values.
2273 if (RootOfChain
!= Info
.PointerToBase
[LiveValue
]) {
2274 PHINode
*OrigRootPhi
= dyn_cast
<PHINode
>(RootOfChain
);
2275 PHINode
*AlternateRootPhi
= dyn_cast
<PHINode
>(Info
.PointerToBase
[LiveValue
]);
2276 if (!OrigRootPhi
|| !AlternateRootPhi
)
2278 // PHI nodes that have the same incoming values, and belonging to the same
2279 // basic blocks are essentially the same SSA value. When the original phi
2280 // has incoming values with different base pointers, the original phi is
2281 // marked as conflict, and an additional `AlternateRootPhi` with the same
2282 // incoming values get generated by the findBasePointer function. We need
2283 // to identify the newly generated AlternateRootPhi (.base version of phi)
2284 // and RootOfChain (the original phi node itself) are the same, so that we
2285 // can rematerialize the gep and casts. This is a workaround for the
2286 // deficiency in the findBasePointer algorithm.
2287 if (!AreEquivalentPhiNodes(*OrigRootPhi
, *AlternateRootPhi
))
2289 // Now that the phi nodes are proved to be the same, assert that
2290 // findBasePointer's newly generated AlternateRootPhi is present in the
2291 // liveset of the call.
2292 assert(Info
.LiveSet
.count(AlternateRootPhi
));
2294 // Compute cost of this chain
2295 InstructionCost Cost
= chainToBasePointerCost(ChainToBase
, TTI
);
2296 // TODO: We can also account for cases when we will be able to remove some
2297 // of the rematerialized values by later optimization passes. I.e if
2298 // we rematerialized several intersecting chains. Or if original values
2299 // don't have any uses besides this statepoint.
2301 // For invokes we need to rematerialize each chain twice - for normal and
2302 // for unwind basic blocks. Model this by multiplying cost by two.
2303 if (isa
<InvokeInst
>(Call
)) {
2306 // If it's too expensive - skip it
2307 if (Cost
>= RematerializationThreshold
)
2310 // Remove value from the live set
2311 LiveValuesToBeDeleted
.push_back(LiveValue
);
2313 // Clone instructions and record them inside "Info" structure
2315 // Walk backwards to visit top-most instructions first
2316 std::reverse(ChainToBase
.begin(), ChainToBase
.end());
2318 // Utility function which clones all instructions from "ChainToBase"
2319 // and inserts them before "InsertBefore". Returns rematerialized value
2320 // which should be used after statepoint.
2321 auto rematerializeChain
= [&ChainToBase
](
2322 Instruction
*InsertBefore
, Value
*RootOfChain
, Value
*AlternateLiveBase
) {
2323 Instruction
*LastClonedValue
= nullptr;
2324 Instruction
*LastValue
= nullptr;
2325 for (Instruction
*Instr
: ChainToBase
) {
2326 // Only GEP's and casts are supported as we need to be careful to not
2327 // introduce any new uses of pointers not in the liveset.
2328 // Note that it's fine to introduce new uses of pointers which were
2329 // otherwise not used after this statepoint.
2330 assert(isa
<GetElementPtrInst
>(Instr
) || isa
<CastInst
>(Instr
));
2332 Instruction
*ClonedValue
= Instr
->clone();
2333 ClonedValue
->insertBefore(InsertBefore
);
2334 ClonedValue
->setName(Instr
->getName() + ".remat");
2336 // If it is not first instruction in the chain then it uses previously
2337 // cloned value. We should update it to use cloned value.
2338 if (LastClonedValue
) {
2340 ClonedValue
->replaceUsesOfWith(LastValue
, LastClonedValue
);
2342 for (auto OpValue
: ClonedValue
->operand_values()) {
2343 // Assert that cloned instruction does not use any instructions from
2344 // this chain other than LastClonedValue
2345 assert(!is_contained(ChainToBase
, OpValue
) &&
2346 "incorrect use in rematerialization chain");
2347 // Assert that the cloned instruction does not use the RootOfChain
2348 // or the AlternateLiveBase.
2349 assert(OpValue
!= RootOfChain
&& OpValue
!= AlternateLiveBase
);
2353 // For the first instruction, replace the use of unrelocated base i.e.
2354 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
2355 // live set. They have been proved to be the same PHI nodes. Note
2356 // that the *only* use of the RootOfChain in the ChainToBase list is
2357 // the first Value in the list.
2358 if (RootOfChain
!= AlternateLiveBase
)
2359 ClonedValue
->replaceUsesOfWith(RootOfChain
, AlternateLiveBase
);
2362 LastClonedValue
= ClonedValue
;
2365 assert(LastClonedValue
);
2366 return LastClonedValue
;
2369 // Different cases for calls and invokes. For invokes we need to clone
2370 // instructions both on normal and unwind path.
2371 if (isa
<CallInst
>(Call
)) {
2372 Instruction
*InsertBefore
= Call
->getNextNode();
2373 assert(InsertBefore
);
2374 Instruction
*RematerializedValue
= rematerializeChain(
2375 InsertBefore
, RootOfChain
, Info
.PointerToBase
[LiveValue
]);
2376 Info
.RematerializedValues
[RematerializedValue
] = LiveValue
;
2378 auto *Invoke
= cast
<InvokeInst
>(Call
);
2380 Instruction
*NormalInsertBefore
=
2381 &*Invoke
->getNormalDest()->getFirstInsertionPt();
2382 Instruction
*UnwindInsertBefore
=
2383 &*Invoke
->getUnwindDest()->getFirstInsertionPt();
2385 Instruction
*NormalRematerializedValue
= rematerializeChain(
2386 NormalInsertBefore
, RootOfChain
, Info
.PointerToBase
[LiveValue
]);
2387 Instruction
*UnwindRematerializedValue
= rematerializeChain(
2388 UnwindInsertBefore
, RootOfChain
, Info
.PointerToBase
[LiveValue
]);
2390 Info
.RematerializedValues
[NormalRematerializedValue
] = LiveValue
;
2391 Info
.RematerializedValues
[UnwindRematerializedValue
] = LiveValue
;
2395 // Remove rematerializaed values from the live set
2396 for (auto LiveValue
: LiveValuesToBeDeleted
) {
2397 Info
.LiveSet
.remove(LiveValue
);
2401 static bool inlineGetBaseAndOffset(Function
&F
,
2402 SmallVectorImpl
<CallInst
*> &Intrinsics
,
2403 DefiningValueMapTy
&DVCache
) {
2404 auto &Context
= F
.getContext();
2405 auto &DL
= F
.getParent()->getDataLayout();
2406 bool Changed
= false;
2408 for (auto *Callsite
: Intrinsics
)
2409 switch (Callsite
->getIntrinsicID()) {
2410 case Intrinsic::experimental_gc_get_pointer_base
: {
2412 Value
*Base
= findBasePointer(Callsite
->getOperand(0), DVCache
);
2413 assert(!DVCache
.count(Callsite
));
2414 auto *BaseBC
= IRBuilder
<>(Callsite
).CreateBitCast(
2415 Base
, Callsite
->getType(), suffixed_name_or(Base
, ".cast", ""));
2417 DVCache
[BaseBC
] = Base
;
2418 Callsite
->replaceAllUsesWith(BaseBC
);
2419 if (!BaseBC
->hasName())
2420 BaseBC
->takeName(Callsite
);
2421 Callsite
->eraseFromParent();
2424 case Intrinsic::experimental_gc_get_pointer_offset
: {
2426 Value
*Derived
= Callsite
->getOperand(0);
2427 Value
*Base
= findBasePointer(Derived
, DVCache
);
2428 assert(!DVCache
.count(Callsite
));
2429 unsigned AddressSpace
= Derived
->getType()->getPointerAddressSpace();
2430 unsigned IntPtrSize
= DL
.getPointerSizeInBits(AddressSpace
);
2431 IRBuilder
<> Builder(Callsite
);
2433 Builder
.CreatePtrToInt(Base
, Type::getIntNTy(Context
, IntPtrSize
),
2434 suffixed_name_or(Base
, ".int", ""));
2436 Builder
.CreatePtrToInt(Derived
, Type::getIntNTy(Context
, IntPtrSize
),
2437 suffixed_name_or(Derived
, ".int", ""));
2438 Value
*Offset
= Builder
.CreateSub(DerivedInt
, BaseInt
);
2439 Callsite
->replaceAllUsesWith(Offset
);
2440 Offset
->takeName(Callsite
);
2441 Callsite
->eraseFromParent();
2445 llvm_unreachable("Unknown intrinsic");
2451 static bool insertParsePoints(Function
&F
, DominatorTree
&DT
,
2452 TargetTransformInfo
&TTI
,
2453 SmallVectorImpl
<CallBase
*> &ToUpdate
,
2454 DefiningValueMapTy
&DVCache
) {
2456 // sanity check the input
2457 std::set
<CallBase
*> Uniqued
;
2458 Uniqued
.insert(ToUpdate
.begin(), ToUpdate
.end());
2459 assert(Uniqued
.size() == ToUpdate
.size() && "no duplicates please!");
2461 for (CallBase
*Call
: ToUpdate
)
2462 assert(Call
->getFunction() == &F
);
2465 // When inserting gc.relocates for invokes, we need to be able to insert at
2466 // the top of the successor blocks. See the comment on
2467 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2468 // may restructure the CFG.
2469 for (CallBase
*Call
: ToUpdate
) {
2470 auto *II
= dyn_cast
<InvokeInst
>(Call
);
2473 normalizeForInvokeSafepoint(II
->getNormalDest(), II
->getParent(), DT
);
2474 normalizeForInvokeSafepoint(II
->getUnwindDest(), II
->getParent(), DT
);
2477 // A list of dummy calls added to the IR to keep various values obviously
2478 // live in the IR. We'll remove all of these when done.
2479 SmallVector
<CallInst
*, 64> Holders
;
2481 // Insert a dummy call with all of the deopt operands we'll need for the
2482 // actual safepoint insertion as arguments. This ensures reference operands
2483 // in the deopt argument list are considered live through the safepoint (and
2484 // thus makes sure they get relocated.)
2485 for (CallBase
*Call
: ToUpdate
) {
2486 SmallVector
<Value
*, 64> DeoptValues
;
2488 for (Value
*Arg
: GetDeoptBundleOperands(Call
)) {
2489 assert(!isUnhandledGCPointerType(Arg
->getType()) &&
2490 "support for FCA unimplemented");
2491 if (isHandledGCPointerType(Arg
->getType()))
2492 DeoptValues
.push_back(Arg
);
2495 insertUseHolderAfter(Call
, DeoptValues
, Holders
);
2498 SmallVector
<PartiallyConstructedSafepointRecord
, 64> Records(ToUpdate
.size());
2500 // A) Identify all gc pointers which are statically live at the given call
2502 findLiveReferences(F
, DT
, ToUpdate
, Records
);
2504 // B) Find the base pointers for each live pointer
2505 for (size_t i
= 0; i
< Records
.size(); i
++) {
2506 PartiallyConstructedSafepointRecord
&info
= Records
[i
];
2507 findBasePointers(DT
, DVCache
, ToUpdate
[i
], info
);
2510 // The base phi insertion logic (for any safepoint) may have inserted new
2511 // instructions which are now live at some safepoint. The simplest such
2514 // phi a <-- will be a new base_phi here
2515 // safepoint 1 <-- that needs to be live here
2519 // We insert some dummy calls after each safepoint to definitely hold live
2520 // the base pointers which were identified for that safepoint. We'll then
2521 // ask liveness for _every_ base inserted to see what is now live. Then we
2522 // remove the dummy calls.
2523 Holders
.reserve(Holders
.size() + Records
.size());
2524 for (size_t i
= 0; i
< Records
.size(); i
++) {
2525 PartiallyConstructedSafepointRecord
&Info
= Records
[i
];
2527 SmallVector
<Value
*, 128> Bases
;
2528 for (auto Pair
: Info
.PointerToBase
)
2529 Bases
.push_back(Pair
.second
);
2531 insertUseHolderAfter(ToUpdate
[i
], Bases
, Holders
);
2534 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2535 // need to rerun liveness. We may *also* have inserted new defs, but that's
2536 // not the key issue.
2537 recomputeLiveInValues(F
, DT
, ToUpdate
, Records
);
2539 if (PrintBasePointers
) {
2540 for (auto &Info
: Records
) {
2541 errs() << "Base Pairs: (w/Relocation)\n";
2542 for (auto Pair
: Info
.PointerToBase
) {
2543 errs() << " derived ";
2544 Pair
.first
->printAsOperand(errs(), false);
2546 Pair
.second
->printAsOperand(errs(), false);
2552 // It is possible that non-constant live variables have a constant base. For
2553 // example, a GEP with a variable offset from a global. In this case we can
2554 // remove it from the liveset. We already don't add constants to the liveset
2555 // because we assume they won't move at runtime and the GC doesn't need to be
2556 // informed about them. The same reasoning applies if the base is constant.
2557 // Note that the relocation placement code relies on this filtering for
2558 // correctness as it expects the base to be in the liveset, which isn't true
2559 // if the base is constant.
2560 for (auto &Info
: Records
)
2561 for (auto &BasePair
: Info
.PointerToBase
)
2562 if (isa
<Constant
>(BasePair
.second
))
2563 Info
.LiveSet
.remove(BasePair
.first
);
2565 for (CallInst
*CI
: Holders
)
2566 CI
->eraseFromParent();
2570 // In order to reduce live set of statepoint we might choose to rematerialize
2571 // some values instead of relocating them. This is purely an optimization and
2572 // does not influence correctness.
2573 for (size_t i
= 0; i
< Records
.size(); i
++)
2574 rematerializeLiveValues(ToUpdate
[i
], Records
[i
], TTI
);
2576 // We need this to safely RAUW and delete call or invoke return values that
2577 // may themselves be live over a statepoint. For details, please see usage in
2578 // makeStatepointExplicitImpl.
2579 std::vector
<DeferredReplacement
> Replacements
;
2581 // Now run through and replace the existing statepoints with new ones with
2582 // the live variables listed. We do not yet update uses of the values being
2583 // relocated. We have references to live variables that need to
2584 // survive to the last iteration of this loop. (By construction, the
2585 // previous statepoint can not be a live variable, thus we can and remove
2586 // the old statepoint calls as we go.)
2587 for (size_t i
= 0; i
< Records
.size(); i
++)
2588 makeStatepointExplicit(DT
, ToUpdate
[i
], Records
[i
], Replacements
);
2590 ToUpdate
.clear(); // prevent accident use of invalid calls.
2592 for (auto &PR
: Replacements
)
2595 Replacements
.clear();
2597 for (auto &Info
: Records
) {
2598 // These live sets may contain state Value pointers, since we replaced calls
2599 // with operand bundles with calls wrapped in gc.statepoint, and some of
2600 // those calls may have been def'ing live gc pointers. Clear these out to
2601 // avoid accidentally using them.
2603 // TODO: We should create a separate data structure that does not contain
2604 // these live sets, and migrate to using that data structure from this point
2606 Info
.LiveSet
.clear();
2607 Info
.PointerToBase
.clear();
2610 // Do all the fixups of the original live variables to their relocated selves
2611 SmallVector
<Value
*, 128> Live
;
2612 for (size_t i
= 0; i
< Records
.size(); i
++) {
2613 PartiallyConstructedSafepointRecord
&Info
= Records
[i
];
2615 // We can't simply save the live set from the original insertion. One of
2616 // the live values might be the result of a call which needs a safepoint.
2617 // That Value* no longer exists and we need to use the new gc_result.
2618 // Thankfully, the live set is embedded in the statepoint (and updated), so
2619 // we just grab that.
2620 llvm::append_range(Live
, Info
.StatepointToken
->gc_args());
2622 // Do some basic sanity checks on our liveness results before performing
2623 // relocation. Relocation can and will turn mistakes in liveness results
2624 // into non-sensical code which is must harder to debug.
2625 // TODO: It would be nice to test consistency as well
2626 assert(DT
.isReachableFromEntry(Info
.StatepointToken
->getParent()) &&
2627 "statepoint must be reachable or liveness is meaningless");
2628 for (Value
*V
: Info
.StatepointToken
->gc_args()) {
2629 if (!isa
<Instruction
>(V
))
2630 // Non-instruction values trivial dominate all possible uses
2632 auto *LiveInst
= cast
<Instruction
>(V
);
2633 assert(DT
.isReachableFromEntry(LiveInst
->getParent()) &&
2634 "unreachable values should never be live");
2635 assert(DT
.dominates(LiveInst
, Info
.StatepointToken
) &&
2636 "basic SSA liveness expectation violated by liveness analysis");
2640 unique_unsorted(Live
);
2644 for (auto *Ptr
: Live
)
2645 assert(isHandledGCPointerType(Ptr
->getType()) &&
2646 "must be a gc pointer type");
2649 relocationViaAlloca(F
, DT
, Live
, Records
);
2650 return !Records
.empty();
2653 // Handles both return values and arguments for Functions and calls.
2654 template <typename AttrHolder
>
2655 static void RemoveNonValidAttrAtIndex(LLVMContext
&Ctx
, AttrHolder
&AH
,
2658 AttributeSet AS
= AH
.getAttributes().getAttributes(Index
);
2659 if (AS
.getDereferenceableBytes())
2660 R
.addAttribute(Attribute::get(Ctx
, Attribute::Dereferenceable
,
2661 AS
.getDereferenceableBytes()));
2662 if (AS
.getDereferenceableOrNullBytes())
2663 R
.addAttribute(Attribute::get(Ctx
, Attribute::DereferenceableOrNull
,
2664 AS
.getDereferenceableOrNullBytes()));
2665 for (auto Attr
: ParamAttrsToStrip
)
2666 if (AS
.hasAttribute(Attr
))
2667 R
.addAttribute(Attr
);
2670 AH
.setAttributes(AH
.getAttributes().removeAttributes(Ctx
, Index
, R
));
2673 static void stripNonValidAttributesFromPrototype(Function
&F
) {
2674 LLVMContext
&Ctx
= F
.getContext();
2676 // Intrinsics are very delicate. Lowering sometimes depends the presence
2677 // of certain attributes for correctness, but we may have also inferred
2678 // additional ones in the abstract machine model which need stripped. This
2679 // assumes that the attributes defined in Intrinsic.td are conservatively
2680 // correct for both physical and abstract model.
2681 if (Intrinsic::ID id
= F
.getIntrinsicID()) {
2682 F
.setAttributes(Intrinsic::getAttributes(Ctx
, id
));
2686 for (Argument
&A
: F
.args())
2687 if (isa
<PointerType
>(A
.getType()))
2688 RemoveNonValidAttrAtIndex(Ctx
, F
,
2689 A
.getArgNo() + AttributeList::FirstArgIndex
);
2691 if (isa
<PointerType
>(F
.getReturnType()))
2692 RemoveNonValidAttrAtIndex(Ctx
, F
, AttributeList::ReturnIndex
);
2694 for (auto Attr
: FnAttrsToStrip
)
2695 F
.removeFnAttr(Attr
);
2698 /// Certain metadata on instructions are invalid after running RS4GC.
2699 /// Optimizations that run after RS4GC can incorrectly use this metadata to
2700 /// optimize functions. We drop such metadata on the instruction.
2701 static void stripInvalidMetadataFromInstruction(Instruction
&I
) {
2702 if (!isa
<LoadInst
>(I
) && !isa
<StoreInst
>(I
))
2704 // These are the attributes that are still valid on loads and stores after
2706 // The metadata implying dereferenceability and noalias are (conservatively)
2707 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2708 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2709 // touch the entire heap including noalias objects. Note: The reasoning is
2710 // same as stripping the dereferenceability and noalias attributes that are
2711 // analogous to the metadata counterparts.
2712 // We also drop the invariant.load metadata on the load because that metadata
2713 // implies the address operand to the load points to memory that is never
2714 // changed once it became dereferenceable. This is no longer true after RS4GC.
2715 // Similar reasoning applies to invariant.group metadata, which applies to
2716 // loads within a group.
2717 unsigned ValidMetadataAfterRS4GC
[] = {LLVMContext::MD_tbaa
,
2718 LLVMContext::MD_range
,
2719 LLVMContext::MD_alias_scope
,
2720 LLVMContext::MD_nontemporal
,
2721 LLVMContext::MD_nonnull
,
2722 LLVMContext::MD_align
,
2723 LLVMContext::MD_type
};
2725 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2726 I
.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC
);
2729 static void stripNonValidDataFromBody(Function
&F
) {
2733 LLVMContext
&Ctx
= F
.getContext();
2734 MDBuilder
Builder(Ctx
);
2736 // Set of invariantstart instructions that we need to remove.
2737 // Use this to avoid invalidating the instruction iterator.
2738 SmallVector
<IntrinsicInst
*, 12> InvariantStartInstructions
;
2740 for (Instruction
&I
: instructions(F
)) {
2741 // invariant.start on memory location implies that the referenced memory
2742 // location is constant and unchanging. This is no longer true after
2743 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2744 // which frees the entire heap and the presence of invariant.start allows
2745 // the optimizer to sink the load of a memory location past a statepoint,
2746 // which is incorrect.
2747 if (auto *II
= dyn_cast
<IntrinsicInst
>(&I
))
2748 if (II
->getIntrinsicID() == Intrinsic::invariant_start
) {
2749 InvariantStartInstructions
.push_back(II
);
2753 if (MDNode
*Tag
= I
.getMetadata(LLVMContext::MD_tbaa
)) {
2754 MDNode
*MutableTBAA
= Builder
.createMutableTBAAAccessTag(Tag
);
2755 I
.setMetadata(LLVMContext::MD_tbaa
, MutableTBAA
);
2758 stripInvalidMetadataFromInstruction(I
);
2760 if (auto *Call
= dyn_cast
<CallBase
>(&I
)) {
2761 for (int i
= 0, e
= Call
->arg_size(); i
!= e
; i
++)
2762 if (isa
<PointerType
>(Call
->getArgOperand(i
)->getType()))
2763 RemoveNonValidAttrAtIndex(Ctx
, *Call
,
2764 i
+ AttributeList::FirstArgIndex
);
2765 if (isa
<PointerType
>(Call
->getType()))
2766 RemoveNonValidAttrAtIndex(Ctx
, *Call
, AttributeList::ReturnIndex
);
2770 // Delete the invariant.start instructions and RAUW undef.
2771 for (auto *II
: InvariantStartInstructions
) {
2772 II
->replaceAllUsesWith(UndefValue::get(II
->getType()));
2773 II
->eraseFromParent();
2777 /// Returns true if this function should be rewritten by this pass. The main
2778 /// point of this function is as an extension point for custom logic.
2779 static bool shouldRewriteStatepointsIn(Function
&F
) {
2780 // TODO: This should check the GCStrategy
2782 const auto &FunctionGCName
= F
.getGC();
2783 const StringRef
StatepointExampleName("statepoint-example");
2784 const StringRef
CoreCLRName("coreclr");
2785 return (StatepointExampleName
== FunctionGCName
) ||
2786 (CoreCLRName
== FunctionGCName
);
2791 static void stripNonValidData(Module
&M
) {
2793 assert(llvm::any_of(M
, shouldRewriteStatepointsIn
) && "precondition!");
2796 for (Function
&F
: M
)
2797 stripNonValidAttributesFromPrototype(F
);
2799 for (Function
&F
: M
)
2800 stripNonValidDataFromBody(F
);
2803 bool RewriteStatepointsForGC::runOnFunction(Function
&F
, DominatorTree
&DT
,
2804 TargetTransformInfo
&TTI
,
2805 const TargetLibraryInfo
&TLI
) {
2806 assert(!F
.isDeclaration() && !F
.empty() &&
2807 "need function body to rewrite statepoints in");
2808 assert(shouldRewriteStatepointsIn(F
) && "mismatch in rewrite decision");
2810 auto NeedsRewrite
= [&TLI
](Instruction
&I
) {
2811 if (const auto *Call
= dyn_cast
<CallBase
>(&I
)) {
2812 if (isa
<GCStatepointInst
>(Call
))
2814 if (callsGCLeafFunction(Call
, TLI
))
2817 // Normally it's up to the frontend to make sure that non-leaf calls also
2818 // have proper deopt state if it is required. We make an exception for
2819 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
2820 // these are non-leaf by default. They might be generated by the optimizer
2821 // which doesn't know how to produce a proper deopt state. So if we see a
2822 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
2823 // copy and don't produce a statepoint.
2824 if (!AllowStatepointWithNoDeoptInfo
&&
2825 !Call
->getOperandBundle(LLVMContext::OB_deopt
)) {
2826 assert((isa
<AtomicMemCpyInst
>(Call
) || isa
<AtomicMemMoveInst
>(Call
)) &&
2827 "Don't expect any other calls here!");
2835 // Delete any unreachable statepoints so that we don't have unrewritten
2836 // statepoints surviving this pass. This makes testing easier and the
2837 // resulting IR less confusing to human readers.
2838 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Lazy
);
2839 bool MadeChange
= removeUnreachableBlocks(F
, &DTU
);
2840 // Flush the Dominator Tree.
2843 // Gather all the statepoints which need rewritten. Be careful to only
2844 // consider those in reachable code since we need to ask dominance queries
2845 // when rewriting. We'll delete the unreachable ones in a moment.
2846 SmallVector
<CallBase
*, 64> ParsePointNeeded
;
2847 SmallVector
<CallInst
*, 64> Intrinsics
;
2848 for (Instruction
&I
: instructions(F
)) {
2849 // TODO: only the ones with the flag set!
2850 if (NeedsRewrite(I
)) {
2851 // NOTE removeUnreachableBlocks() is stronger than
2852 // DominatorTree::isReachableFromEntry(). In other words
2853 // removeUnreachableBlocks can remove some blocks for which
2854 // isReachableFromEntry() returns true.
2855 assert(DT
.isReachableFromEntry(I
.getParent()) &&
2856 "no unreachable blocks expected");
2857 ParsePointNeeded
.push_back(cast
<CallBase
>(&I
));
2859 if (auto *CI
= dyn_cast
<CallInst
>(&I
))
2860 if (CI
->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base
||
2861 CI
->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset
)
2862 Intrinsics
.emplace_back(CI
);
2865 // Return early if no work to do.
2866 if (ParsePointNeeded
.empty() && Intrinsics
.empty())
2869 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
2870 // These are created by LCSSA. They have the effect of increasing the size
2871 // of liveness sets for no good reason. It may be harder to do this post
2872 // insertion since relocations and base phis can confuse things.
2873 for (BasicBlock
&BB
: F
)
2874 if (BB
.getUniquePredecessor())
2875 MadeChange
|= FoldSingleEntryPHINodes(&BB
);
2877 // Before we start introducing relocations, we want to tweak the IR a bit to
2878 // avoid unfortunate code generation effects. The main example is that we
2879 // want to try to make sure the comparison feeding a branch is after any
2880 // safepoints. Otherwise, we end up with a comparison of pre-relocation
2881 // values feeding a branch after relocation. This is semantically correct,
2882 // but results in extra register pressure since both the pre-relocation and
2883 // post-relocation copies must be available in registers. For code without
2884 // relocations this is handled elsewhere, but teaching the scheduler to
2885 // reverse the transform we're about to do would be slightly complex.
2886 // Note: This may extend the live range of the inputs to the icmp and thus
2887 // increase the liveset of any statepoint we move over. This is profitable
2888 // as long as all statepoints are in rare blocks. If we had in-register
2889 // lowering for live values this would be a much safer transform.
2890 auto getConditionInst
= [](Instruction
*TI
) -> Instruction
* {
2891 if (auto *BI
= dyn_cast
<BranchInst
>(TI
))
2892 if (BI
->isConditional())
2893 return dyn_cast
<Instruction
>(BI
->getCondition());
2894 // TODO: Extend this to handle switches
2897 for (BasicBlock
&BB
: F
) {
2898 Instruction
*TI
= BB
.getTerminator();
2899 if (auto *Cond
= getConditionInst(TI
))
2900 // TODO: Handle more than just ICmps here. We should be able to move
2901 // most instructions without side effects or memory access.
2902 if (isa
<ICmpInst
>(Cond
) && Cond
->hasOneUse()) {
2904 Cond
->moveBefore(TI
);
2908 // Nasty workaround - The base computation code in the main algorithm doesn't
2909 // consider the fact that a GEP can be used to convert a scalar to a vector.
2910 // The right fix for this is to integrate GEPs into the base rewriting
2911 // algorithm properly, this is just a short term workaround to prevent
2912 // crashes by canonicalizing such GEPs into fully vector GEPs.
2913 for (Instruction
&I
: instructions(F
)) {
2914 if (!isa
<GetElementPtrInst
>(I
))
2918 for (unsigned i
= 0; i
< I
.getNumOperands(); i
++)
2919 if (auto *OpndVTy
= dyn_cast
<VectorType
>(I
.getOperand(i
)->getType())) {
2921 VF
== cast
<FixedVectorType
>(OpndVTy
)->getNumElements());
2922 VF
= cast
<FixedVectorType
>(OpndVTy
)->getNumElements();
2925 // It's the vector to scalar traversal through the pointer operand which
2926 // confuses base pointer rewriting, so limit ourselves to that case.
2927 if (!I
.getOperand(0)->getType()->isVectorTy() && VF
!= 0) {
2929 auto *Splat
= B
.CreateVectorSplat(VF
, I
.getOperand(0));
2930 I
.setOperand(0, Splat
);
2935 // Cache the 'defining value' relation used in the computation and
2936 // insertion of base phis and selects. This ensures that we don't insert
2937 // large numbers of duplicate base_phis. Use one cache for both
2938 // inlineGetBaseAndOffset() and insertParsePoints().
2939 DefiningValueMapTy DVCache
;
2941 if (!Intrinsics
.empty())
2942 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
2944 MadeChange
|= inlineGetBaseAndOffset(F
, Intrinsics
, DVCache
);
2946 if (!ParsePointNeeded
.empty())
2947 MadeChange
|= insertParsePoints(F
, DT
, TTI
, ParsePointNeeded
, DVCache
);
2952 // liveness computation via standard dataflow
2953 // -------------------------------------------------------------------
2955 // TODO: Consider using bitvectors for liveness, the set of potentially
2956 // interesting values should be small and easy to pre-compute.
2958 /// Compute the live-in set for the location rbegin starting from
2959 /// the live-out set of the basic block
2960 static void computeLiveInValues(BasicBlock::reverse_iterator Begin
,
2961 BasicBlock::reverse_iterator End
,
2962 SetVector
<Value
*> &LiveTmp
) {
2963 for (auto &I
: make_range(Begin
, End
)) {
2964 // KILL/Def - Remove this definition from LiveIn
2967 // Don't consider *uses* in PHI nodes, we handle their contribution to
2968 // predecessor blocks when we seed the LiveOut sets
2969 if (isa
<PHINode
>(I
))
2972 // USE - Add to the LiveIn set for this instruction
2973 for (Value
*V
: I
.operands()) {
2974 assert(!isUnhandledGCPointerType(V
->getType()) &&
2975 "support for FCA unimplemented");
2976 if (isHandledGCPointerType(V
->getType()) && !isa
<Constant
>(V
)) {
2977 // The choice to exclude all things constant here is slightly subtle.
2978 // There are two independent reasons:
2979 // - We assume that things which are constant (from LLVM's definition)
2980 // do not move at runtime. For example, the address of a global
2981 // variable is fixed, even though it's contents may not be.
2982 // - Second, we can't disallow arbitrary inttoptr constants even
2983 // if the language frontend does. Optimization passes are free to
2984 // locally exploit facts without respect to global reachability. This
2985 // can create sections of code which are dynamically unreachable and
2986 // contain just about anything. (see constants.ll in tests)
2993 static void computeLiveOutSeed(BasicBlock
*BB
, SetVector
<Value
*> &LiveTmp
) {
2994 for (BasicBlock
*Succ
: successors(BB
)) {
2995 for (auto &I
: *Succ
) {
2996 PHINode
*PN
= dyn_cast
<PHINode
>(&I
);
3000 Value
*V
= PN
->getIncomingValueForBlock(BB
);
3001 assert(!isUnhandledGCPointerType(V
->getType()) &&
3002 "support for FCA unimplemented");
3003 if (isHandledGCPointerType(V
->getType()) && !isa
<Constant
>(V
))
3009 static SetVector
<Value
*> computeKillSet(BasicBlock
*BB
) {
3010 SetVector
<Value
*> KillSet
;
3011 for (Instruction
&I
: *BB
)
3012 if (isHandledGCPointerType(I
.getType()))
3018 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3019 /// sanity check for the liveness computation.
3020 static void checkBasicSSA(DominatorTree
&DT
, SetVector
<Value
*> &Live
,
3021 Instruction
*TI
, bool TermOkay
= false) {
3022 for (Value
*V
: Live
) {
3023 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
3024 // The terminator can be a member of the LiveOut set. LLVM's definition
3025 // of instruction dominance states that V does not dominate itself. As
3026 // such, we need to special case this to allow it.
3027 if (TermOkay
&& TI
== I
)
3029 assert(DT
.dominates(I
, TI
) &&
3030 "basic SSA liveness expectation violated by liveness analysis");
3035 /// Check that all the liveness sets used during the computation of liveness
3036 /// obey basic SSA properties. This is useful for finding cases where we miss
3038 static void checkBasicSSA(DominatorTree
&DT
, GCPtrLivenessData
&Data
,
3040 checkBasicSSA(DT
, Data
.LiveSet
[&BB
], BB
.getTerminator());
3041 checkBasicSSA(DT
, Data
.LiveOut
[&BB
], BB
.getTerminator(), true);
3042 checkBasicSSA(DT
, Data
.LiveIn
[&BB
], BB
.getTerminator());
3046 static void computeLiveInValues(DominatorTree
&DT
, Function
&F
,
3047 GCPtrLivenessData
&Data
) {
3048 SmallSetVector
<BasicBlock
*, 32> Worklist
;
3050 // Seed the liveness for each individual block
3051 for (BasicBlock
&BB
: F
) {
3052 Data
.KillSet
[&BB
] = computeKillSet(&BB
);
3053 Data
.LiveSet
[&BB
].clear();
3054 computeLiveInValues(BB
.rbegin(), BB
.rend(), Data
.LiveSet
[&BB
]);
3057 for (Value
*Kill
: Data
.KillSet
[&BB
])
3058 assert(!Data
.LiveSet
[&BB
].count(Kill
) && "live set contains kill");
3061 Data
.LiveOut
[&BB
] = SetVector
<Value
*>();
3062 computeLiveOutSeed(&BB
, Data
.LiveOut
[&BB
]);
3063 Data
.LiveIn
[&BB
] = Data
.LiveSet
[&BB
];
3064 Data
.LiveIn
[&BB
].set_union(Data
.LiveOut
[&BB
]);
3065 Data
.LiveIn
[&BB
].set_subtract(Data
.KillSet
[&BB
]);
3066 if (!Data
.LiveIn
[&BB
].empty())
3067 Worklist
.insert(pred_begin(&BB
), pred_end(&BB
));
3070 // Propagate that liveness until stable
3071 while (!Worklist
.empty()) {
3072 BasicBlock
*BB
= Worklist
.pop_back_val();
3074 // Compute our new liveout set, then exit early if it hasn't changed despite
3075 // the contribution of our successor.
3076 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
3077 const auto OldLiveOutSize
= LiveOut
.size();
3078 for (BasicBlock
*Succ
: successors(BB
)) {
3079 assert(Data
.LiveIn
.count(Succ
));
3080 LiveOut
.set_union(Data
.LiveIn
[Succ
]);
3082 // assert OutLiveOut is a subset of LiveOut
3083 if (OldLiveOutSize
== LiveOut
.size()) {
3084 // If the sets are the same size, then we didn't actually add anything
3085 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3089 Data
.LiveOut
[BB
] = LiveOut
;
3091 // Apply the effects of this basic block
3092 SetVector
<Value
*> LiveTmp
= LiveOut
;
3093 LiveTmp
.set_union(Data
.LiveSet
[BB
]);
3094 LiveTmp
.set_subtract(Data
.KillSet
[BB
]);
3096 assert(Data
.LiveIn
.count(BB
));
3097 const SetVector
<Value
*> &OldLiveIn
= Data
.LiveIn
[BB
];
3098 // assert: OldLiveIn is a subset of LiveTmp
3099 if (OldLiveIn
.size() != LiveTmp
.size()) {
3100 Data
.LiveIn
[BB
] = LiveTmp
;
3101 Worklist
.insert(pred_begin(BB
), pred_end(BB
));
3103 } // while (!Worklist.empty())
3106 // Sanity check our output against SSA properties. This helps catch any
3107 // missing kills during the above iteration.
3108 for (BasicBlock
&BB
: F
)
3109 checkBasicSSA(DT
, Data
, BB
);
3113 static void findLiveSetAtInst(Instruction
*Inst
, GCPtrLivenessData
&Data
,
3114 StatepointLiveSetTy
&Out
) {
3115 BasicBlock
*BB
= Inst
->getParent();
3117 // Note: The copy is intentional and required
3118 assert(Data
.LiveOut
.count(BB
));
3119 SetVector
<Value
*> LiveOut
= Data
.LiveOut
[BB
];
3121 // We want to handle the statepoint itself oddly. It's
3122 // call result is not live (normal), nor are it's arguments
3123 // (unless they're used again later). This adjustment is
3124 // specifically what we need to relocate
3125 computeLiveInValues(BB
->rbegin(), ++Inst
->getIterator().getReverse(),
3127 LiveOut
.remove(Inst
);
3128 Out
.insert(LiveOut
.begin(), LiveOut
.end());
3131 static void recomputeLiveInValues(GCPtrLivenessData
&RevisedLivenessData
,
3133 PartiallyConstructedSafepointRecord
&Info
) {
3134 StatepointLiveSetTy Updated
;
3135 findLiveSetAtInst(Call
, RevisedLivenessData
, Updated
);
3137 // We may have base pointers which are now live that weren't before. We need
3138 // to update the PointerToBase structure to reflect this.
3139 for (auto V
: Updated
)
3140 Info
.PointerToBase
.insert({V
, V
});
3143 for (auto V
: Updated
)
3144 assert(Info
.PointerToBase
.count(V
) &&
3145 "Must be able to find base for live value!");
3148 // Remove any stale base mappings - this can happen since our liveness is
3149 // more precise then the one inherent in the base pointer analysis.
3150 DenseSet
<Value
*> ToErase
;
3151 for (auto KVPair
: Info
.PointerToBase
)
3152 if (!Updated
.count(KVPair
.first
))
3153 ToErase
.insert(KVPair
.first
);
3155 for (auto *V
: ToErase
)
3156 Info
.PointerToBase
.erase(V
);
3159 for (auto KVPair
: Info
.PointerToBase
)
3160 assert(Updated
.count(KVPair
.first
) && "record for non-live value");
3163 Info
.LiveSet
= Updated
;