1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass promotes "by reference" arguments to be "by value" arguments. In
10 // practice, this means looking for internal functions that have pointer
11 // arguments. If it can prove, through the use of alias analysis, that an
12 // argument is *only* loaded, then it can pass the value into the function
13 // instead of the address of the value. This can cause recursive simplification
14 // of code and lead to the elimination of allocas (especially in C++ template
15 // code like the STL).
17 // This pass also handles aggregate arguments that are passed into a function,
18 // scalarizing them if the elements of the aggregate are only loaded. Note that
19 // by default it refuses to scalarize aggregates which would require passing in
20 // more than three operands to the function, because passing thousands of
21 // operands for a large array or structure is unprofitable! This limit can be
22 // configured or disabled, however.
24 // Note that this transformation could also be done for arguments that are only
25 // stored to (returning the value instead), but does not currently. This case
26 // would be best handled when and if LLVM begins supporting multiple return
27 // values from functions.
29 //===----------------------------------------------------------------------===//
31 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
33 #include "llvm/ADT/DepthFirstIterator.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/ScopeExit.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/Twine.h"
40 #include "llvm/Analysis/AssumptionCache.h"
41 #include "llvm/Analysis/BasicAliasAnalysis.h"
42 #include "llvm/Analysis/CallGraph.h"
43 #include "llvm/Analysis/Loads.h"
44 #include "llvm/Analysis/MemoryLocation.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/Argument.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/CFG.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/Metadata.h"
61 #include "llvm/IR/NoFolder.h"
62 #include "llvm/IR/PassManager.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Utils/Local.h"
71 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
80 #define DEBUG_TYPE "argpromotion"
82 STATISTIC(NumArgumentsPromoted
, "Number of pointer arguments promoted");
83 STATISTIC(NumArgumentsDead
, "Number of dead pointer args eliminated");
90 /// A representative guaranteed-executed load or store instruction for use by
91 /// metadata transfer.
92 Instruction
*MustExecInstr
;
95 using OffsetAndArgPart
= std::pair
<int64_t, ArgPart
>;
97 } // end anonymous namespace
99 static Value
*createByteGEP(IRBuilderBase
&IRB
, const DataLayout
&DL
,
100 Value
*Ptr
, Type
*ResElemTy
, int64_t Offset
) {
102 APInt
APOffset(DL
.getIndexTypeSizeInBits(Ptr
->getType()), Offset
);
103 Ptr
= IRB
.CreateGEP(IRB
.getInt8Ty(), Ptr
, IRB
.getInt(APOffset
));
108 /// DoPromotion - This method actually performs the promotion of the specified
109 /// arguments, and returns the new function. At this point, we know that it's
112 doPromotion(Function
*F
, FunctionAnalysisManager
&FAM
,
113 const DenseMap
<Argument
*, SmallVector
<OffsetAndArgPart
, 4>>
115 // Start by computing a new prototype for the function, which is the same as
116 // the old function, but has modified arguments.
117 FunctionType
*FTy
= F
->getFunctionType();
118 std::vector
<Type
*> Params
;
120 // Attribute - Keep track of the parameter attributes for the arguments
121 // that we are *not* promoting. For the ones that we do promote, the parameter
122 // attributes are lost
123 SmallVector
<AttributeSet
, 8> ArgAttrVec
;
124 // Mapping from old to new argument indices. -1 for promoted or removed
126 SmallVector
<unsigned> NewArgIndices
;
127 AttributeList PAL
= F
->getAttributes();
129 // First, determine the new argument list
130 unsigned ArgNo
= 0, NewArgNo
= 0;
131 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
;
133 if (!ArgsToPromote
.count(&*I
)) {
134 // Unchanged argument
135 Params
.push_back(I
->getType());
136 ArgAttrVec
.push_back(PAL
.getParamAttrs(ArgNo
));
137 NewArgIndices
.push_back(NewArgNo
++);
138 } else if (I
->use_empty()) {
139 // Dead argument (which are always marked as promotable)
141 NewArgIndices
.push_back((unsigned)-1);
143 const auto &ArgParts
= ArgsToPromote
.find(&*I
)->second
;
144 for (const auto &Pair
: ArgParts
) {
145 Params
.push_back(Pair
.second
.Ty
);
146 ArgAttrVec
.push_back(AttributeSet());
148 ++NumArgumentsPromoted
;
149 NewArgIndices
.push_back((unsigned)-1);
150 NewArgNo
+= ArgParts
.size();
154 Type
*RetTy
= FTy
->getReturnType();
156 // Construct the new function type using the new arguments.
157 FunctionType
*NFTy
= FunctionType::get(RetTy
, Params
, FTy
->isVarArg());
159 // Create the new function body and insert it into the module.
160 Function
*NF
= Function::Create(NFTy
, F
->getLinkage(), F
->getAddressSpace(),
162 NF
->copyAttributesFrom(F
);
163 NF
->copyMetadata(F
, 0);
165 // The new function will have the !dbg metadata copied from the original
166 // function. The original function may not be deleted, and dbg metadata need
167 // to be unique, so we need to drop it.
168 F
->setSubprogram(nullptr);
170 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF
<< "\n"
173 uint64_t LargestVectorWidth
= 0;
174 for (auto *I
: Params
)
175 if (auto *VT
= dyn_cast
<llvm::VectorType
>(I
))
176 LargestVectorWidth
= std::max(
177 LargestVectorWidth
, VT
->getPrimitiveSizeInBits().getKnownMinValue());
179 // Recompute the parameter attributes list based on the new arguments for
181 NF
->setAttributes(AttributeList::get(F
->getContext(), PAL
.getFnAttrs(),
182 PAL
.getRetAttrs(), ArgAttrVec
));
184 // Remap argument indices in allocsize attribute.
185 if (auto AllocSize
= NF
->getAttributes().getFnAttrs().getAllocSizeArgs()) {
186 unsigned Arg1
= NewArgIndices
[AllocSize
->first
];
187 assert(Arg1
!= (unsigned)-1 && "allocsize cannot be promoted argument");
188 std::optional
<unsigned> Arg2
;
189 if (AllocSize
->second
) {
190 Arg2
= NewArgIndices
[*AllocSize
->second
];
191 assert(Arg2
!= (unsigned)-1 && "allocsize cannot be promoted argument");
193 NF
->addFnAttr(Attribute::getWithAllocSizeArgs(F
->getContext(), Arg1
, Arg2
));
196 AttributeFuncs::updateMinLegalVectorWidthAttr(*NF
, LargestVectorWidth
);
199 F
->getParent()->getFunctionList().insert(F
->getIterator(), NF
);
202 // Loop over all the callers of the function, transforming the call sites to
203 // pass in the loaded pointers.
204 SmallVector
<Value
*, 16> Args
;
205 const DataLayout
&DL
= F
->getParent()->getDataLayout();
206 SmallVector
<WeakTrackingVH
, 16> DeadArgs
;
208 while (!F
->use_empty()) {
209 CallBase
&CB
= cast
<CallBase
>(*F
->user_back());
210 assert(CB
.getCalledFunction() == F
);
211 const AttributeList
&CallPAL
= CB
.getAttributes();
212 IRBuilder
<NoFolder
> IRB(&CB
);
214 // Loop over the operands, inserting GEP and loads in the caller as
216 auto *AI
= CB
.arg_begin();
218 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
;
219 ++I
, ++AI
, ++ArgNo
) {
220 if (!ArgsToPromote
.count(&*I
)) {
221 Args
.push_back(*AI
); // Unmodified argument
222 ArgAttrVec
.push_back(CallPAL
.getParamAttrs(ArgNo
));
223 } else if (!I
->use_empty()) {
225 const auto &ArgParts
= ArgsToPromote
.find(&*I
)->second
;
226 for (const auto &Pair
: ArgParts
) {
227 LoadInst
*LI
= IRB
.CreateAlignedLoad(
229 createByteGEP(IRB
, DL
, V
, Pair
.second
.Ty
, Pair
.first
),
230 Pair
.second
.Alignment
, V
->getName() + ".val");
231 if (Pair
.second
.MustExecInstr
) {
232 LI
->setAAMetadata(Pair
.second
.MustExecInstr
->getAAMetadata());
233 LI
->copyMetadata(*Pair
.second
.MustExecInstr
,
234 {LLVMContext::MD_dereferenceable
,
235 LLVMContext::MD_dereferenceable_or_null
,
236 LLVMContext::MD_noundef
,
237 LLVMContext::MD_nontemporal
});
238 // Only transfer poison-generating metadata if we also have
240 // TODO: Without !noundef, we could merge this metadata across
241 // all promoted loads.
242 if (LI
->hasMetadata(LLVMContext::MD_noundef
))
243 LI
->copyMetadata(*Pair
.second
.MustExecInstr
,
244 {LLVMContext::MD_range
, LLVMContext::MD_nonnull
,
245 LLVMContext::MD_align
});
248 ArgAttrVec
.push_back(AttributeSet());
251 assert(ArgsToPromote
.count(&*I
) && I
->use_empty());
252 DeadArgs
.emplace_back(AI
->get());
256 // Push any varargs arguments on the list.
257 for (; AI
!= CB
.arg_end(); ++AI
, ++ArgNo
) {
259 ArgAttrVec
.push_back(CallPAL
.getParamAttrs(ArgNo
));
262 SmallVector
<OperandBundleDef
, 1> OpBundles
;
263 CB
.getOperandBundlesAsDefs(OpBundles
);
265 CallBase
*NewCS
= nullptr;
266 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&CB
)) {
267 NewCS
= InvokeInst::Create(NF
, II
->getNormalDest(), II
->getUnwindDest(),
268 Args
, OpBundles
, "", &CB
);
270 auto *NewCall
= CallInst::Create(NF
, Args
, OpBundles
, "", &CB
);
271 NewCall
->setTailCallKind(cast
<CallInst
>(&CB
)->getTailCallKind());
274 NewCS
->setCallingConv(CB
.getCallingConv());
275 NewCS
->setAttributes(AttributeList::get(F
->getContext(),
276 CallPAL
.getFnAttrs(),
277 CallPAL
.getRetAttrs(), ArgAttrVec
));
278 NewCS
->copyMetadata(CB
, {LLVMContext::MD_prof
, LLVMContext::MD_dbg
});
282 AttributeFuncs::updateMinLegalVectorWidthAttr(*CB
.getCaller(),
285 if (!CB
.use_empty()) {
286 CB
.replaceAllUsesWith(NewCS
);
287 NewCS
->takeName(&CB
);
290 // Finally, remove the old call from the program, reducing the use-count of
292 CB
.eraseFromParent();
295 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs
);
297 // Since we have now created the new function, splice the body of the old
298 // function right into the new function, leaving the old rotting hulk of the
300 NF
->splice(NF
->begin(), F
);
302 // We will collect all the new created allocas to promote them into registers
303 // after the following loop
304 SmallVector
<AllocaInst
*, 4> Allocas
;
306 // Loop over the argument list, transferring uses of the old arguments over to
307 // the new arguments, also transferring over the names as well.
308 Function::arg_iterator I2
= NF
->arg_begin();
309 for (Argument
&Arg
: F
->args()) {
310 if (!ArgsToPromote
.count(&Arg
)) {
311 // If this is an unmodified argument, move the name and users over to the
313 Arg
.replaceAllUsesWith(&*I2
);
319 // There potentially are metadata uses for things like llvm.dbg.value.
320 // Replace them with undef, after handling the other regular uses.
321 auto RauwUndefMetadata
= make_scope_exit(
322 [&]() { Arg
.replaceAllUsesWith(UndefValue::get(Arg
.getType())); });
327 // Otherwise, if we promoted this argument, we have to create an alloca in
328 // the callee for every promotable part and store each of the new incoming
329 // arguments into the corresponding alloca, what lets the old code (the
330 // store instructions if they are allowed especially) a chance to work as
332 assert(Arg
.getType()->isPointerTy() &&
333 "Only arguments with a pointer type are promotable");
335 IRBuilder
<NoFolder
> IRB(&NF
->begin()->front());
337 // Add only the promoted elements, so parts from ArgsToPromote
338 SmallDenseMap
<int64_t, AllocaInst
*> OffsetToAlloca
;
339 for (const auto &Pair
: ArgsToPromote
.find(&Arg
)->second
) {
340 int64_t Offset
= Pair
.first
;
341 const ArgPart
&Part
= Pair
.second
;
343 Argument
*NewArg
= I2
++;
344 NewArg
->setName(Arg
.getName() + "." + Twine(Offset
) + ".val");
346 AllocaInst
*NewAlloca
= IRB
.CreateAlloca(
347 Part
.Ty
, nullptr, Arg
.getName() + "." + Twine(Offset
) + ".allc");
348 NewAlloca
->setAlignment(Pair
.second
.Alignment
);
349 IRB
.CreateAlignedStore(NewArg
, NewAlloca
, Pair
.second
.Alignment
);
351 // Collect the alloca to retarget the users to
352 OffsetToAlloca
.insert({Offset
, NewAlloca
});
355 auto GetAlloca
= [&](Value
*Ptr
) {
356 APInt
Offset(DL
.getIndexTypeSizeInBits(Ptr
->getType()), 0);
357 Ptr
= Ptr
->stripAndAccumulateConstantOffsets(DL
, Offset
,
358 /* AllowNonInbounds */ true);
359 assert(Ptr
== &Arg
&& "Not constant offset from arg?");
360 return OffsetToAlloca
.lookup(Offset
.getSExtValue());
363 // Cleanup the code from the dead instructions: GEPs and BitCasts in between
364 // the original argument and its users: loads and stores. Retarget every
365 // user to the new created alloca.
366 SmallVector
<Value
*, 16> Worklist
;
367 SmallVector
<Instruction
*, 16> DeadInsts
;
368 append_range(Worklist
, Arg
.users());
369 while (!Worklist
.empty()) {
370 Value
*V
= Worklist
.pop_back_val();
371 if (isa
<BitCastInst
>(V
) || isa
<GetElementPtrInst
>(V
)) {
372 DeadInsts
.push_back(cast
<Instruction
>(V
));
373 append_range(Worklist
, V
->users());
377 if (auto *LI
= dyn_cast
<LoadInst
>(V
)) {
378 Value
*Ptr
= LI
->getPointerOperand();
379 LI
->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr
));
383 if (auto *SI
= dyn_cast
<StoreInst
>(V
)) {
384 assert(!SI
->isVolatile() && "Volatile operations can't be promoted.");
385 Value
*Ptr
= SI
->getPointerOperand();
386 SI
->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr
));
390 llvm_unreachable("Unexpected user");
393 for (Instruction
*I
: DeadInsts
) {
394 I
->replaceAllUsesWith(PoisonValue::get(I
->getType()));
395 I
->eraseFromParent();
398 // Collect the allocas for promotion
399 for (const auto &Pair
: OffsetToAlloca
) {
400 assert(isAllocaPromotable(Pair
.second
) &&
401 "By design, only promotable allocas should be produced.");
402 Allocas
.push_back(Pair
.second
);
406 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas
.size()
407 << " alloca(s) are promotable by Mem2Reg\n");
409 if (!Allocas
.empty()) {
410 // And we are able to call the `promoteMemoryToRegister()` function.
411 // Our earlier checks have ensured that PromoteMemToReg() will
413 auto &DT
= FAM
.getResult
<DominatorTreeAnalysis
>(*NF
);
414 auto &AC
= FAM
.getResult
<AssumptionAnalysis
>(*NF
);
415 PromoteMemToReg(Allocas
, DT
, &AC
);
421 /// Return true if we can prove that all callees pass in a valid pointer for the
422 /// specified function argument.
423 static bool allCallersPassValidPointerForArgument(Argument
*Arg
,
425 uint64_t NeededDerefBytes
) {
426 Function
*Callee
= Arg
->getParent();
427 const DataLayout
&DL
= Callee
->getParent()->getDataLayout();
428 APInt
Bytes(64, NeededDerefBytes
);
430 // Check if the argument itself is marked dereferenceable and aligned.
431 if (isDereferenceableAndAlignedPointer(Arg
, NeededAlign
, Bytes
, DL
))
434 // Look at all call sites of the function. At this point we know we only have
436 return all_of(Callee
->users(), [&](User
*U
) {
437 CallBase
&CB
= cast
<CallBase
>(*U
);
438 return isDereferenceableAndAlignedPointer(CB
.getArgOperand(Arg
->getArgNo()),
439 NeededAlign
, Bytes
, DL
);
443 /// Determine that this argument is safe to promote, and find the argument
444 /// parts it can be promoted into.
445 static bool findArgParts(Argument
*Arg
, const DataLayout
&DL
, AAResults
&AAR
,
446 unsigned MaxElements
, bool IsRecursive
,
447 SmallVectorImpl
<OffsetAndArgPart
> &ArgPartsVec
) {
448 // Quick exit for unused arguments
449 if (Arg
->use_empty())
452 // We can only promote this argument if all the uses are loads at known
455 // Promoting the argument causes it to be loaded in the caller
456 // unconditionally. This is only safe if we can prove that either the load
457 // would have happened in the callee anyway (ie, there is a load in the entry
458 // block) or the pointer passed in at every call site is guaranteed to be
460 // In the former case, invalid loads can happen, but would have happened
461 // anyway, in the latter case, invalid loads won't happen. This prevents us
462 // from introducing an invalid load that wouldn't have happened in the
465 SmallDenseMap
<int64_t, ArgPart
, 4> ArgParts
;
466 Align
NeededAlign(1);
467 uint64_t NeededDerefBytes
= 0;
469 // And if this is a byval argument we also allow to have store instructions.
470 // Only handle in such way arguments with specified alignment;
471 // if it's unspecified, the actual alignment of the argument is
473 bool AreStoresAllowed
= Arg
->getParamByValType() && Arg
->getParamAlign();
475 // An end user of a pointer argument is a load or store instruction.
476 // Returns std::nullopt if this load or store is not based on the argument.
477 // Return true if we can promote the instruction, false otherwise.
478 auto HandleEndUser
= [&](auto *I
, Type
*Ty
,
479 bool GuaranteedToExecute
) -> std::optional
<bool> {
480 // Don't promote volatile or atomic instructions.
484 Value
*Ptr
= I
->getPointerOperand();
485 APInt
Offset(DL
.getIndexTypeSizeInBits(Ptr
->getType()), 0);
486 Ptr
= Ptr
->stripAndAccumulateConstantOffsets(DL
, Offset
,
487 /* AllowNonInbounds */ true);
491 if (Offset
.getSignificantBits() >= 64)
494 TypeSize Size
= DL
.getTypeStoreSize(Ty
);
495 // Don't try to promote scalable types.
496 if (Size
.isScalable())
499 // If this is a recursive function and one of the types is a pointer,
500 // then promoting it might lead to recursive promotion.
501 if (IsRecursive
&& Ty
->isPointerTy())
504 int64_t Off
= Offset
.getSExtValue();
505 auto Pair
= ArgParts
.try_emplace(
506 Off
, ArgPart
{Ty
, I
->getAlign(), GuaranteedToExecute
? I
: nullptr});
507 ArgPart
&Part
= Pair
.first
->second
;
508 bool OffsetNotSeenBefore
= Pair
.second
;
510 // We limit promotion to only promoting up to a fixed number of elements of
512 if (MaxElements
> 0 && ArgParts
.size() > MaxElements
) {
513 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg
<< " failed: "
514 << "more than " << MaxElements
<< " parts\n");
518 // For now, we only support loading/storing one specific type at a given
521 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg
<< " failed: "
522 << "accessed as both " << *Part
.Ty
<< " and " << *Ty
523 << " at offset " << Off
<< "\n");
527 // If this instruction is not guaranteed to execute, and we haven't seen a
528 // load or store at this offset before (or it had lower alignment), then we
529 // need to remember that requirement.
530 // Note that skipping instructions of previously seen offsets is only
531 // correct because we only allow a single type for a given offset, which
532 // also means that the number of accessed bytes will be the same.
533 if (!GuaranteedToExecute
&&
534 (OffsetNotSeenBefore
|| Part
.Alignment
< I
->getAlign())) {
535 // We won't be able to prove dereferenceability for negative offsets.
539 // If the offset is not aligned, an aligned base pointer won't help.
540 if (!isAligned(I
->getAlign(), Off
))
543 NeededDerefBytes
= std::max(NeededDerefBytes
, Off
+ Size
.getFixedValue());
544 NeededAlign
= std::max(NeededAlign
, I
->getAlign());
547 Part
.Alignment
= std::max(Part
.Alignment
, I
->getAlign());
551 // Look for loads and stores that are guaranteed to execute on entry.
552 for (Instruction
&I
: Arg
->getParent()->getEntryBlock()) {
553 std::optional
<bool> Res
{};
554 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(&I
))
555 Res
= HandleEndUser(LI
, LI
->getType(), /* GuaranteedToExecute */ true);
556 else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(&I
))
557 Res
= HandleEndUser(SI
, SI
->getValueOperand()->getType(),
558 /* GuaranteedToExecute */ true);
562 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
566 // Now look at all loads of the argument. Remember the load instructions
567 // for the aliasing check below.
568 SmallVector
<const Use
*, 16> Worklist
;
569 SmallPtrSet
<const Use
*, 16> Visited
;
570 SmallVector
<LoadInst
*, 16> Loads
;
571 auto AppendUses
= [&](const Value
*V
) {
572 for (const Use
&U
: V
->uses())
573 if (Visited
.insert(&U
).second
)
574 Worklist
.push_back(&U
);
577 while (!Worklist
.empty()) {
578 const Use
*U
= Worklist
.pop_back_val();
579 Value
*V
= U
->getUser();
580 if (isa
<BitCastInst
>(V
)) {
585 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(V
)) {
586 if (!GEP
->hasAllConstantIndices())
592 if (auto *LI
= dyn_cast
<LoadInst
>(V
)) {
593 if (!*HandleEndUser(LI
, LI
->getType(), /* GuaranteedToExecute */ false))
599 // Stores are allowed for byval arguments
600 auto *SI
= dyn_cast
<StoreInst
>(V
);
601 if (AreStoresAllowed
&& SI
&&
602 U
->getOperandNo() == StoreInst::getPointerOperandIndex()) {
603 if (!*HandleEndUser(SI
, SI
->getValueOperand()->getType(),
604 /* GuaranteedToExecute */ false))
607 // Only stores TO the argument is allowed, all the other stores are
612 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg
<< " failed: "
613 << "unknown user " << *V
<< "\n");
617 if (NeededDerefBytes
|| NeededAlign
> 1) {
618 // Try to prove a required deref / aligned requirement.
619 if (!allCallersPassValidPointerForArgument(Arg
, NeededAlign
,
621 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg
<< " failed: "
622 << "not dereferenceable or aligned\n");
627 if (ArgParts
.empty())
628 return true; // No users, this is a dead argument.
630 // Sort parts by offset.
631 append_range(ArgPartsVec
, ArgParts
);
632 sort(ArgPartsVec
, llvm::less_first());
634 // Make sure the parts are non-overlapping.
635 int64_t Offset
= ArgPartsVec
[0].first
;
636 for (const auto &Pair
: ArgPartsVec
) {
637 if (Pair
.first
< Offset
)
638 return false; // Overlap with previous part.
640 Offset
= Pair
.first
+ DL
.getTypeStoreSize(Pair
.second
.Ty
);
643 // If store instructions are allowed, the path from the entry of the function
644 // to each load may be not free of instructions that potentially invalidate
645 // the load, and this is an admissible situation.
646 if (AreStoresAllowed
)
649 // Okay, now we know that the argument is only used by load instructions, and
650 // it is safe to unconditionally perform all of them. Use alias analysis to
651 // check to see if the pointer is guaranteed to not be modified from entry of
652 // the function to each of the load instructions.
654 // Because there could be several/many load instructions, remember which
655 // blocks we know to be transparent to the load.
656 df_iterator_default_set
<BasicBlock
*, 16> TranspBlocks
;
658 for (LoadInst
*Load
: Loads
) {
659 // Check to see if the load is invalidated from the start of the block to
661 BasicBlock
*BB
= Load
->getParent();
663 MemoryLocation Loc
= MemoryLocation::get(Load
);
664 if (AAR
.canInstructionRangeModRef(BB
->front(), *Load
, Loc
, ModRefInfo::Mod
))
665 return false; // Pointer is invalidated!
667 // Now check every path from the entry block to the load for transparency.
668 // To do this, we perform a depth first search on the inverse CFG from the
670 for (BasicBlock
*P
: predecessors(BB
)) {
671 for (BasicBlock
*TranspBB
: inverse_depth_first_ext(P
, TranspBlocks
))
672 if (AAR
.canBasicBlockModify(*TranspBB
, Loc
))
677 // If the path from the entry of the function to each load is free of
678 // instructions that potentially invalidate the load, we can make the
683 /// Check if callers and callee agree on how promoted arguments would be
685 static bool areTypesABICompatible(ArrayRef
<Type
*> Types
, const Function
&F
,
686 const TargetTransformInfo
&TTI
) {
687 return all_of(F
.uses(), [&](const Use
&U
) {
688 CallBase
*CB
= dyn_cast
<CallBase
>(U
.getUser());
692 const Function
*Caller
= CB
->getCaller();
693 const Function
*Callee
= CB
->getCalledFunction();
694 return TTI
.areTypesABICompatible(Caller
, Callee
, Types
);
698 /// PromoteArguments - This method checks the specified function to see if there
699 /// are any promotable arguments and if it is safe to promote the function (for
700 /// example, all callers are direct). If safe to promote some arguments, it
701 /// calls the DoPromotion method.
702 static Function
*promoteArguments(Function
*F
, FunctionAnalysisManager
&FAM
,
703 unsigned MaxElements
, bool IsRecursive
) {
704 // Don't perform argument promotion for naked functions; otherwise we can end
705 // up removing parameters that are seemingly 'not used' as they are referred
706 // to in the assembly.
707 if (F
->hasFnAttribute(Attribute::Naked
))
710 // Make sure that it is local to this module.
711 if (!F
->hasLocalLinkage())
714 // Don't promote arguments for variadic functions. Adding, removing, or
715 // changing non-pack parameters can change the classification of pack
716 // parameters. Frontends encode that classification at the call site in the
717 // IR, while in the callee the classification is determined dynamically based
718 // on the number of registers consumed so far.
722 // Don't transform functions that receive inallocas, as the transformation may
723 // not be safe depending on calling convention.
724 if (F
->getAttributes().hasAttrSomewhere(Attribute::InAlloca
))
727 // First check: see if there are any pointer arguments! If not, quick exit.
728 SmallVector
<Argument
*, 16> PointerArgs
;
729 for (Argument
&I
: F
->args())
730 if (I
.getType()->isPointerTy())
731 PointerArgs
.push_back(&I
);
732 if (PointerArgs
.empty())
735 // Second check: make sure that all callers are direct callers. We can't
736 // transform functions that have indirect callers. Also see if the function
737 // is self-recursive.
738 for (Use
&U
: F
->uses()) {
739 CallBase
*CB
= dyn_cast
<CallBase
>(U
.getUser());
740 // Must be a direct call.
741 if (CB
== nullptr || !CB
->isCallee(&U
) ||
742 CB
->getFunctionType() != F
->getFunctionType())
745 // Can't change signature of musttail callee
746 if (CB
->isMustTailCall())
749 if (CB
->getFunction() == F
)
753 // Can't change signature of musttail caller
754 // FIXME: Support promoting whole chain of musttail functions
755 for (BasicBlock
&BB
: *F
)
756 if (BB
.getTerminatingMustTailCall())
759 const DataLayout
&DL
= F
->getParent()->getDataLayout();
760 auto &AAR
= FAM
.getResult
<AAManager
>(*F
);
761 const auto &TTI
= FAM
.getResult
<TargetIRAnalysis
>(*F
);
763 // Check to see which arguments are promotable. If an argument is promotable,
764 // add it to ArgsToPromote.
765 DenseMap
<Argument
*, SmallVector
<OffsetAndArgPart
, 4>> ArgsToPromote
;
766 unsigned NumArgsAfterPromote
= F
->getFunctionType()->getNumParams();
767 for (Argument
*PtrArg
: PointerArgs
) {
768 // Replace sret attribute with noalias. This reduces register pressure by
769 // avoiding a register copy.
770 if (PtrArg
->hasStructRetAttr()) {
771 unsigned ArgNo
= PtrArg
->getArgNo();
772 F
->removeParamAttr(ArgNo
, Attribute::StructRet
);
773 F
->addParamAttr(ArgNo
, Attribute::NoAlias
);
774 for (Use
&U
: F
->uses()) {
775 CallBase
&CB
= cast
<CallBase
>(*U
.getUser());
776 CB
.removeParamAttr(ArgNo
, Attribute::StructRet
);
777 CB
.addParamAttr(ArgNo
, Attribute::NoAlias
);
781 // If we can promote the pointer to its value.
782 SmallVector
<OffsetAndArgPart
, 4> ArgParts
;
784 if (findArgParts(PtrArg
, DL
, AAR
, MaxElements
, IsRecursive
, ArgParts
)) {
785 SmallVector
<Type
*, 4> Types
;
786 for (const auto &Pair
: ArgParts
)
787 Types
.push_back(Pair
.second
.Ty
);
789 if (areTypesABICompatible(Types
, *F
, TTI
)) {
790 NumArgsAfterPromote
+= ArgParts
.size() - 1;
791 ArgsToPromote
.insert({PtrArg
, std::move(ArgParts
)});
796 // No promotable pointer arguments.
797 if (ArgsToPromote
.empty())
800 if (NumArgsAfterPromote
> TTI
.getMaxNumArgs())
803 return doPromotion(F
, FAM
, ArgsToPromote
);
806 PreservedAnalyses
ArgumentPromotionPass::run(LazyCallGraph::SCC
&C
,
807 CGSCCAnalysisManager
&AM
,
809 CGSCCUpdateResult
&UR
) {
810 bool Changed
= false, LocalChange
;
812 // Iterate until we stop promoting from this SCC.
816 FunctionAnalysisManager
&FAM
=
817 AM
.getResult
<FunctionAnalysisManagerCGSCCProxy
>(C
, CG
).getManager();
819 bool IsRecursive
= C
.size() > 1;
820 for (LazyCallGraph::Node
&N
: C
) {
821 Function
&OldF
= N
.getFunction();
822 Function
*NewF
= promoteArguments(&OldF
, FAM
, MaxElements
, IsRecursive
);
827 // Directly substitute the functions in the call graph. Note that this
828 // requires the old function to be completely dead and completely
829 // replaced by the new function. It does no call graph updates, it merely
830 // swaps out the particular function mapped to a particular node in the
832 C
.getOuterRefSCC().replaceNodeFunction(N
, *NewF
);
833 FAM
.clear(OldF
, OldF
.getName());
834 OldF
.eraseFromParent();
836 PreservedAnalyses FuncPA
;
837 FuncPA
.preserveSet
<CFGAnalyses
>();
838 for (auto *U
: NewF
->users()) {
839 auto *UserF
= cast
<CallBase
>(U
)->getFunction();
840 FAM
.invalidate(*UserF
, FuncPA
);
844 Changed
|= LocalChange
;
845 } while (LocalChange
);
848 return PreservedAnalyses::all();
850 PreservedAnalyses PA
;
851 // We've cleared out analyses for deleted functions.
852 PA
.preserve
<FunctionAnalysisManagerCGSCCProxy
>();
853 // We've manually invalidated analyses for functions we've modified.
854 PA
.preserveSet
<AllAnalysesOn
<Function
>>();