[AMDGPU] Test codegen'ing True16 additions.
[llvm-project.git] / llvm / lib / Transforms / IPO / ArgumentPromotion.cpp
blobdb9d5bc5a2cd2f3612e4cc0eeddab5fef9372e4e
1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass promotes "by reference" arguments to be "by value" arguments. In
10 // practice, this means looking for internal functions that have pointer
11 // arguments. If it can prove, through the use of alias analysis, that an
12 // argument is *only* loaded, then it can pass the value into the function
13 // instead of the address of the value. This can cause recursive simplification
14 // of code and lead to the elimination of allocas (especially in C++ template
15 // code like the STL).
17 // This pass also handles aggregate arguments that are passed into a function,
18 // scalarizing them if the elements of the aggregate are only loaded. Note that
19 // by default it refuses to scalarize aggregates which would require passing in
20 // more than three operands to the function, because passing thousands of
21 // operands for a large array or structure is unprofitable! This limit can be
22 // configured or disabled, however.
24 // Note that this transformation could also be done for arguments that are only
25 // stored to (returning the value instead), but does not currently. This case
26 // would be best handled when and if LLVM begins supporting multiple return
27 // values from functions.
29 //===----------------------------------------------------------------------===//
31 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
33 #include "llvm/ADT/DepthFirstIterator.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/ScopeExit.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/Twine.h"
40 #include "llvm/Analysis/AssumptionCache.h"
41 #include "llvm/Analysis/BasicAliasAnalysis.h"
42 #include "llvm/Analysis/CallGraph.h"
43 #include "llvm/Analysis/Loads.h"
44 #include "llvm/Analysis/MemoryLocation.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/Argument.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/CFG.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/Metadata.h"
61 #include "llvm/IR/NoFolder.h"
62 #include "llvm/IR/PassManager.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Utils/Local.h"
71 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
72 #include <algorithm>
73 #include <cassert>
74 #include <cstdint>
75 #include <utility>
76 #include <vector>
78 using namespace llvm;
80 #define DEBUG_TYPE "argpromotion"
82 STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
83 STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
85 namespace {
87 struct ArgPart {
88 Type *Ty;
89 Align Alignment;
90 /// A representative guaranteed-executed load or store instruction for use by
91 /// metadata transfer.
92 Instruction *MustExecInstr;
95 using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
97 } // end anonymous namespace
99 static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
100 Value *Ptr, Type *ResElemTy, int64_t Offset) {
101 if (Offset != 0) {
102 APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
103 Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(APOffset));
105 return Ptr;
108 /// DoPromotion - This method actually performs the promotion of the specified
109 /// arguments, and returns the new function. At this point, we know that it's
110 /// safe to do so.
111 static Function *
112 doPromotion(Function *F, FunctionAnalysisManager &FAM,
113 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
114 &ArgsToPromote) {
115 // Start by computing a new prototype for the function, which is the same as
116 // the old function, but has modified arguments.
117 FunctionType *FTy = F->getFunctionType();
118 std::vector<Type *> Params;
120 // Attribute - Keep track of the parameter attributes for the arguments
121 // that we are *not* promoting. For the ones that we do promote, the parameter
122 // attributes are lost
123 SmallVector<AttributeSet, 8> ArgAttrVec;
124 // Mapping from old to new argument indices. -1 for promoted or removed
125 // arguments.
126 SmallVector<unsigned> NewArgIndices;
127 AttributeList PAL = F->getAttributes();
129 // First, determine the new argument list
130 unsigned ArgNo = 0, NewArgNo = 0;
131 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
132 ++I, ++ArgNo) {
133 if (!ArgsToPromote.count(&*I)) {
134 // Unchanged argument
135 Params.push_back(I->getType());
136 ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
137 NewArgIndices.push_back(NewArgNo++);
138 } else if (I->use_empty()) {
139 // Dead argument (which are always marked as promotable)
140 ++NumArgumentsDead;
141 NewArgIndices.push_back((unsigned)-1);
142 } else {
143 const auto &ArgParts = ArgsToPromote.find(&*I)->second;
144 for (const auto &Pair : ArgParts) {
145 Params.push_back(Pair.second.Ty);
146 ArgAttrVec.push_back(AttributeSet());
148 ++NumArgumentsPromoted;
149 NewArgIndices.push_back((unsigned)-1);
150 NewArgNo += ArgParts.size();
154 Type *RetTy = FTy->getReturnType();
156 // Construct the new function type using the new arguments.
157 FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
159 // Create the new function body and insert it into the module.
160 Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
161 F->getName());
162 NF->copyAttributesFrom(F);
163 NF->copyMetadata(F, 0);
165 // The new function will have the !dbg metadata copied from the original
166 // function. The original function may not be deleted, and dbg metadata need
167 // to be unique, so we need to drop it.
168 F->setSubprogram(nullptr);
170 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
171 << "From: " << *F);
173 uint64_t LargestVectorWidth = 0;
174 for (auto *I : Params)
175 if (auto *VT = dyn_cast<llvm::VectorType>(I))
176 LargestVectorWidth = std::max(
177 LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue());
179 // Recompute the parameter attributes list based on the new arguments for
180 // the function.
181 NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
182 PAL.getRetAttrs(), ArgAttrVec));
184 // Remap argument indices in allocsize attribute.
185 if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) {
186 unsigned Arg1 = NewArgIndices[AllocSize->first];
187 assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument");
188 std::optional<unsigned> Arg2;
189 if (AllocSize->second) {
190 Arg2 = NewArgIndices[*AllocSize->second];
191 assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument");
193 NF->addFnAttr(Attribute::getWithAllocSizeArgs(F->getContext(), Arg1, Arg2));
196 AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
197 ArgAttrVec.clear();
199 F->getParent()->getFunctionList().insert(F->getIterator(), NF);
200 NF->takeName(F);
202 // Loop over all the callers of the function, transforming the call sites to
203 // pass in the loaded pointers.
204 SmallVector<Value *, 16> Args;
205 const DataLayout &DL = F->getParent()->getDataLayout();
206 SmallVector<WeakTrackingVH, 16> DeadArgs;
208 while (!F->use_empty()) {
209 CallBase &CB = cast<CallBase>(*F->user_back());
210 assert(CB.getCalledFunction() == F);
211 const AttributeList &CallPAL = CB.getAttributes();
212 IRBuilder<NoFolder> IRB(&CB);
214 // Loop over the operands, inserting GEP and loads in the caller as
215 // appropriate.
216 auto *AI = CB.arg_begin();
217 ArgNo = 0;
218 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
219 ++I, ++AI, ++ArgNo) {
220 if (!ArgsToPromote.count(&*I)) {
221 Args.push_back(*AI); // Unmodified argument
222 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
223 } else if (!I->use_empty()) {
224 Value *V = *AI;
225 const auto &ArgParts = ArgsToPromote.find(&*I)->second;
226 for (const auto &Pair : ArgParts) {
227 LoadInst *LI = IRB.CreateAlignedLoad(
228 Pair.second.Ty,
229 createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
230 Pair.second.Alignment, V->getName() + ".val");
231 if (Pair.second.MustExecInstr) {
232 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
233 LI->copyMetadata(*Pair.second.MustExecInstr,
234 {LLVMContext::MD_dereferenceable,
235 LLVMContext::MD_dereferenceable_or_null,
236 LLVMContext::MD_noundef,
237 LLVMContext::MD_nontemporal});
238 // Only transfer poison-generating metadata if we also have
239 // !noundef.
240 // TODO: Without !noundef, we could merge this metadata across
241 // all promoted loads.
242 if (LI->hasMetadata(LLVMContext::MD_noundef))
243 LI->copyMetadata(*Pair.second.MustExecInstr,
244 {LLVMContext::MD_range, LLVMContext::MD_nonnull,
245 LLVMContext::MD_align});
247 Args.push_back(LI);
248 ArgAttrVec.push_back(AttributeSet());
250 } else {
251 assert(ArgsToPromote.count(&*I) && I->use_empty());
252 DeadArgs.emplace_back(AI->get());
256 // Push any varargs arguments on the list.
257 for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
258 Args.push_back(*AI);
259 ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
262 SmallVector<OperandBundleDef, 1> OpBundles;
263 CB.getOperandBundlesAsDefs(OpBundles);
265 CallBase *NewCS = nullptr;
266 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
267 NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
268 Args, OpBundles, "", &CB);
269 } else {
270 auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
271 NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
272 NewCS = NewCall;
274 NewCS->setCallingConv(CB.getCallingConv());
275 NewCS->setAttributes(AttributeList::get(F->getContext(),
276 CallPAL.getFnAttrs(),
277 CallPAL.getRetAttrs(), ArgAttrVec));
278 NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
279 Args.clear();
280 ArgAttrVec.clear();
282 AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
283 LargestVectorWidth);
285 if (!CB.use_empty()) {
286 CB.replaceAllUsesWith(NewCS);
287 NewCS->takeName(&CB);
290 // Finally, remove the old call from the program, reducing the use-count of
291 // F.
292 CB.eraseFromParent();
295 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs);
297 // Since we have now created the new function, splice the body of the old
298 // function right into the new function, leaving the old rotting hulk of the
299 // function empty.
300 NF->splice(NF->begin(), F);
302 // We will collect all the new created allocas to promote them into registers
303 // after the following loop
304 SmallVector<AllocaInst *, 4> Allocas;
306 // Loop over the argument list, transferring uses of the old arguments over to
307 // the new arguments, also transferring over the names as well.
308 Function::arg_iterator I2 = NF->arg_begin();
309 for (Argument &Arg : F->args()) {
310 if (!ArgsToPromote.count(&Arg)) {
311 // If this is an unmodified argument, move the name and users over to the
312 // new version.
313 Arg.replaceAllUsesWith(&*I2);
314 I2->takeName(&Arg);
315 ++I2;
316 continue;
319 // There potentially are metadata uses for things like llvm.dbg.value.
320 // Replace them with undef, after handling the other regular uses.
321 auto RauwUndefMetadata = make_scope_exit(
322 [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
324 if (Arg.use_empty())
325 continue;
327 // Otherwise, if we promoted this argument, we have to create an alloca in
328 // the callee for every promotable part and store each of the new incoming
329 // arguments into the corresponding alloca, what lets the old code (the
330 // store instructions if they are allowed especially) a chance to work as
331 // before.
332 assert(Arg.getType()->isPointerTy() &&
333 "Only arguments with a pointer type are promotable");
335 IRBuilder<NoFolder> IRB(&NF->begin()->front());
337 // Add only the promoted elements, so parts from ArgsToPromote
338 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
339 for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
340 int64_t Offset = Pair.first;
341 const ArgPart &Part = Pair.second;
343 Argument *NewArg = I2++;
344 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
346 AllocaInst *NewAlloca = IRB.CreateAlloca(
347 Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
348 NewAlloca->setAlignment(Pair.second.Alignment);
349 IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
351 // Collect the alloca to retarget the users to
352 OffsetToAlloca.insert({Offset, NewAlloca});
355 auto GetAlloca = [&](Value *Ptr) {
356 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
357 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
358 /* AllowNonInbounds */ true);
359 assert(Ptr == &Arg && "Not constant offset from arg?");
360 return OffsetToAlloca.lookup(Offset.getSExtValue());
363 // Cleanup the code from the dead instructions: GEPs and BitCasts in between
364 // the original argument and its users: loads and stores. Retarget every
365 // user to the new created alloca.
366 SmallVector<Value *, 16> Worklist;
367 SmallVector<Instruction *, 16> DeadInsts;
368 append_range(Worklist, Arg.users());
369 while (!Worklist.empty()) {
370 Value *V = Worklist.pop_back_val();
371 if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
372 DeadInsts.push_back(cast<Instruction>(V));
373 append_range(Worklist, V->users());
374 continue;
377 if (auto *LI = dyn_cast<LoadInst>(V)) {
378 Value *Ptr = LI->getPointerOperand();
379 LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
380 continue;
383 if (auto *SI = dyn_cast<StoreInst>(V)) {
384 assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
385 Value *Ptr = SI->getPointerOperand();
386 SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
387 continue;
390 llvm_unreachable("Unexpected user");
393 for (Instruction *I : DeadInsts) {
394 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
395 I->eraseFromParent();
398 // Collect the allocas for promotion
399 for (const auto &Pair : OffsetToAlloca) {
400 assert(isAllocaPromotable(Pair.second) &&
401 "By design, only promotable allocas should be produced.");
402 Allocas.push_back(Pair.second);
406 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
407 << " alloca(s) are promotable by Mem2Reg\n");
409 if (!Allocas.empty()) {
410 // And we are able to call the `promoteMemoryToRegister()` function.
411 // Our earlier checks have ensured that PromoteMemToReg() will
412 // succeed.
413 auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);
414 auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);
415 PromoteMemToReg(Allocas, DT, &AC);
418 return NF;
421 /// Return true if we can prove that all callees pass in a valid pointer for the
422 /// specified function argument.
423 static bool allCallersPassValidPointerForArgument(Argument *Arg,
424 Align NeededAlign,
425 uint64_t NeededDerefBytes) {
426 Function *Callee = Arg->getParent();
427 const DataLayout &DL = Callee->getParent()->getDataLayout();
428 APInt Bytes(64, NeededDerefBytes);
430 // Check if the argument itself is marked dereferenceable and aligned.
431 if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
432 return true;
434 // Look at all call sites of the function. At this point we know we only have
435 // direct callees.
436 return all_of(Callee->users(), [&](User *U) {
437 CallBase &CB = cast<CallBase>(*U);
438 return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
439 NeededAlign, Bytes, DL);
443 /// Determine that this argument is safe to promote, and find the argument
444 /// parts it can be promoted into.
445 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
446 unsigned MaxElements, bool IsRecursive,
447 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
448 // Quick exit for unused arguments
449 if (Arg->use_empty())
450 return true;
452 // We can only promote this argument if all the uses are loads at known
453 // offsets.
455 // Promoting the argument causes it to be loaded in the caller
456 // unconditionally. This is only safe if we can prove that either the load
457 // would have happened in the callee anyway (ie, there is a load in the entry
458 // block) or the pointer passed in at every call site is guaranteed to be
459 // valid.
460 // In the former case, invalid loads can happen, but would have happened
461 // anyway, in the latter case, invalid loads won't happen. This prevents us
462 // from introducing an invalid load that wouldn't have happened in the
463 // original code.
465 SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
466 Align NeededAlign(1);
467 uint64_t NeededDerefBytes = 0;
469 // And if this is a byval argument we also allow to have store instructions.
470 // Only handle in such way arguments with specified alignment;
471 // if it's unspecified, the actual alignment of the argument is
472 // target-specific.
473 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
475 // An end user of a pointer argument is a load or store instruction.
476 // Returns std::nullopt if this load or store is not based on the argument.
477 // Return true if we can promote the instruction, false otherwise.
478 auto HandleEndUser = [&](auto *I, Type *Ty,
479 bool GuaranteedToExecute) -> std::optional<bool> {
480 // Don't promote volatile or atomic instructions.
481 if (!I->isSimple())
482 return false;
484 Value *Ptr = I->getPointerOperand();
485 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
486 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
487 /* AllowNonInbounds */ true);
488 if (Ptr != Arg)
489 return std::nullopt;
491 if (Offset.getSignificantBits() >= 64)
492 return false;
494 TypeSize Size = DL.getTypeStoreSize(Ty);
495 // Don't try to promote scalable types.
496 if (Size.isScalable())
497 return false;
499 // If this is a recursive function and one of the types is a pointer,
500 // then promoting it might lead to recursive promotion.
501 if (IsRecursive && Ty->isPointerTy())
502 return false;
504 int64_t Off = Offset.getSExtValue();
505 auto Pair = ArgParts.try_emplace(
506 Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
507 ArgPart &Part = Pair.first->second;
508 bool OffsetNotSeenBefore = Pair.second;
510 // We limit promotion to only promoting up to a fixed number of elements of
511 // the aggregate.
512 if (MaxElements > 0 && ArgParts.size() > MaxElements) {
513 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
514 << "more than " << MaxElements << " parts\n");
515 return false;
518 // For now, we only support loading/storing one specific type at a given
519 // offset.
520 if (Part.Ty != Ty) {
521 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
522 << "accessed as both " << *Part.Ty << " and " << *Ty
523 << " at offset " << Off << "\n");
524 return false;
527 // If this instruction is not guaranteed to execute, and we haven't seen a
528 // load or store at this offset before (or it had lower alignment), then we
529 // need to remember that requirement.
530 // Note that skipping instructions of previously seen offsets is only
531 // correct because we only allow a single type for a given offset, which
532 // also means that the number of accessed bytes will be the same.
533 if (!GuaranteedToExecute &&
534 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
535 // We won't be able to prove dereferenceability for negative offsets.
536 if (Off < 0)
537 return false;
539 // If the offset is not aligned, an aligned base pointer won't help.
540 if (!isAligned(I->getAlign(), Off))
541 return false;
543 NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
544 NeededAlign = std::max(NeededAlign, I->getAlign());
547 Part.Alignment = std::max(Part.Alignment, I->getAlign());
548 return true;
551 // Look for loads and stores that are guaranteed to execute on entry.
552 for (Instruction &I : Arg->getParent()->getEntryBlock()) {
553 std::optional<bool> Res{};
554 if (LoadInst *LI = dyn_cast<LoadInst>(&I))
555 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
556 else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
557 Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
558 /* GuaranteedToExecute */ true);
559 if (Res && !*Res)
560 return false;
562 if (!isGuaranteedToTransferExecutionToSuccessor(&I))
563 break;
566 // Now look at all loads of the argument. Remember the load instructions
567 // for the aliasing check below.
568 SmallVector<const Use *, 16> Worklist;
569 SmallPtrSet<const Use *, 16> Visited;
570 SmallVector<LoadInst *, 16> Loads;
571 auto AppendUses = [&](const Value *V) {
572 for (const Use &U : V->uses())
573 if (Visited.insert(&U).second)
574 Worklist.push_back(&U);
576 AppendUses(Arg);
577 while (!Worklist.empty()) {
578 const Use *U = Worklist.pop_back_val();
579 Value *V = U->getUser();
580 if (isa<BitCastInst>(V)) {
581 AppendUses(V);
582 continue;
585 if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
586 if (!GEP->hasAllConstantIndices())
587 return false;
588 AppendUses(V);
589 continue;
592 if (auto *LI = dyn_cast<LoadInst>(V)) {
593 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
594 return false;
595 Loads.push_back(LI);
596 continue;
599 // Stores are allowed for byval arguments
600 auto *SI = dyn_cast<StoreInst>(V);
601 if (AreStoresAllowed && SI &&
602 U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
603 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
604 /* GuaranteedToExecute */ false))
605 return false;
606 continue;
607 // Only stores TO the argument is allowed, all the other stores are
608 // unknown users
611 // Unknown user.
612 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
613 << "unknown user " << *V << "\n");
614 return false;
617 if (NeededDerefBytes || NeededAlign > 1) {
618 // Try to prove a required deref / aligned requirement.
619 if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
620 NeededDerefBytes)) {
621 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
622 << "not dereferenceable or aligned\n");
623 return false;
627 if (ArgParts.empty())
628 return true; // No users, this is a dead argument.
630 // Sort parts by offset.
631 append_range(ArgPartsVec, ArgParts);
632 sort(ArgPartsVec, llvm::less_first());
634 // Make sure the parts are non-overlapping.
635 int64_t Offset = ArgPartsVec[0].first;
636 for (const auto &Pair : ArgPartsVec) {
637 if (Pair.first < Offset)
638 return false; // Overlap with previous part.
640 Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
643 // If store instructions are allowed, the path from the entry of the function
644 // to each load may be not free of instructions that potentially invalidate
645 // the load, and this is an admissible situation.
646 if (AreStoresAllowed)
647 return true;
649 // Okay, now we know that the argument is only used by load instructions, and
650 // it is safe to unconditionally perform all of them. Use alias analysis to
651 // check to see if the pointer is guaranteed to not be modified from entry of
652 // the function to each of the load instructions.
654 // Because there could be several/many load instructions, remember which
655 // blocks we know to be transparent to the load.
656 df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
658 for (LoadInst *Load : Loads) {
659 // Check to see if the load is invalidated from the start of the block to
660 // the load itself.
661 BasicBlock *BB = Load->getParent();
663 MemoryLocation Loc = MemoryLocation::get(Load);
664 if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
665 return false; // Pointer is invalidated!
667 // Now check every path from the entry block to the load for transparency.
668 // To do this, we perform a depth first search on the inverse CFG from the
669 // loading block.
670 for (BasicBlock *P : predecessors(BB)) {
671 for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
672 if (AAR.canBasicBlockModify(*TranspBB, Loc))
673 return false;
677 // If the path from the entry of the function to each load is free of
678 // instructions that potentially invalidate the load, we can make the
679 // transformation!
680 return true;
683 /// Check if callers and callee agree on how promoted arguments would be
684 /// passed.
685 static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
686 const TargetTransformInfo &TTI) {
687 return all_of(F.uses(), [&](const Use &U) {
688 CallBase *CB = dyn_cast<CallBase>(U.getUser());
689 if (!CB)
690 return false;
692 const Function *Caller = CB->getCaller();
693 const Function *Callee = CB->getCalledFunction();
694 return TTI.areTypesABICompatible(Caller, Callee, Types);
698 /// PromoteArguments - This method checks the specified function to see if there
699 /// are any promotable arguments and if it is safe to promote the function (for
700 /// example, all callers are direct). If safe to promote some arguments, it
701 /// calls the DoPromotion method.
702 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
703 unsigned MaxElements, bool IsRecursive) {
704 // Don't perform argument promotion for naked functions; otherwise we can end
705 // up removing parameters that are seemingly 'not used' as they are referred
706 // to in the assembly.
707 if (F->hasFnAttribute(Attribute::Naked))
708 return nullptr;
710 // Make sure that it is local to this module.
711 if (!F->hasLocalLinkage())
712 return nullptr;
714 // Don't promote arguments for variadic functions. Adding, removing, or
715 // changing non-pack parameters can change the classification of pack
716 // parameters. Frontends encode that classification at the call site in the
717 // IR, while in the callee the classification is determined dynamically based
718 // on the number of registers consumed so far.
719 if (F->isVarArg())
720 return nullptr;
722 // Don't transform functions that receive inallocas, as the transformation may
723 // not be safe depending on calling convention.
724 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
725 return nullptr;
727 // First check: see if there are any pointer arguments! If not, quick exit.
728 SmallVector<Argument *, 16> PointerArgs;
729 for (Argument &I : F->args())
730 if (I.getType()->isPointerTy())
731 PointerArgs.push_back(&I);
732 if (PointerArgs.empty())
733 return nullptr;
735 // Second check: make sure that all callers are direct callers. We can't
736 // transform functions that have indirect callers. Also see if the function
737 // is self-recursive.
738 for (Use &U : F->uses()) {
739 CallBase *CB = dyn_cast<CallBase>(U.getUser());
740 // Must be a direct call.
741 if (CB == nullptr || !CB->isCallee(&U) ||
742 CB->getFunctionType() != F->getFunctionType())
743 return nullptr;
745 // Can't change signature of musttail callee
746 if (CB->isMustTailCall())
747 return nullptr;
749 if (CB->getFunction() == F)
750 IsRecursive = true;
753 // Can't change signature of musttail caller
754 // FIXME: Support promoting whole chain of musttail functions
755 for (BasicBlock &BB : *F)
756 if (BB.getTerminatingMustTailCall())
757 return nullptr;
759 const DataLayout &DL = F->getParent()->getDataLayout();
760 auto &AAR = FAM.getResult<AAManager>(*F);
761 const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
763 // Check to see which arguments are promotable. If an argument is promotable,
764 // add it to ArgsToPromote.
765 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
766 unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
767 for (Argument *PtrArg : PointerArgs) {
768 // Replace sret attribute with noalias. This reduces register pressure by
769 // avoiding a register copy.
770 if (PtrArg->hasStructRetAttr()) {
771 unsigned ArgNo = PtrArg->getArgNo();
772 F->removeParamAttr(ArgNo, Attribute::StructRet);
773 F->addParamAttr(ArgNo, Attribute::NoAlias);
774 for (Use &U : F->uses()) {
775 CallBase &CB = cast<CallBase>(*U.getUser());
776 CB.removeParamAttr(ArgNo, Attribute::StructRet);
777 CB.addParamAttr(ArgNo, Attribute::NoAlias);
781 // If we can promote the pointer to its value.
782 SmallVector<OffsetAndArgPart, 4> ArgParts;
784 if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
785 SmallVector<Type *, 4> Types;
786 for (const auto &Pair : ArgParts)
787 Types.push_back(Pair.second.Ty);
789 if (areTypesABICompatible(Types, *F, TTI)) {
790 NumArgsAfterPromote += ArgParts.size() - 1;
791 ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
796 // No promotable pointer arguments.
797 if (ArgsToPromote.empty())
798 return nullptr;
800 if (NumArgsAfterPromote > TTI.getMaxNumArgs())
801 return nullptr;
803 return doPromotion(F, FAM, ArgsToPromote);
806 PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
807 CGSCCAnalysisManager &AM,
808 LazyCallGraph &CG,
809 CGSCCUpdateResult &UR) {
810 bool Changed = false, LocalChange;
812 // Iterate until we stop promoting from this SCC.
813 do {
814 LocalChange = false;
816 FunctionAnalysisManager &FAM =
817 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
819 bool IsRecursive = C.size() > 1;
820 for (LazyCallGraph::Node &N : C) {
821 Function &OldF = N.getFunction();
822 Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);
823 if (!NewF)
824 continue;
825 LocalChange = true;
827 // Directly substitute the functions in the call graph. Note that this
828 // requires the old function to be completely dead and completely
829 // replaced by the new function. It does no call graph updates, it merely
830 // swaps out the particular function mapped to a particular node in the
831 // graph.
832 C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
833 FAM.clear(OldF, OldF.getName());
834 OldF.eraseFromParent();
836 PreservedAnalyses FuncPA;
837 FuncPA.preserveSet<CFGAnalyses>();
838 for (auto *U : NewF->users()) {
839 auto *UserF = cast<CallBase>(U)->getFunction();
840 FAM.invalidate(*UserF, FuncPA);
844 Changed |= LocalChange;
845 } while (LocalChange);
847 if (!Changed)
848 return PreservedAnalyses::all();
850 PreservedAnalyses PA;
851 // We've cleared out analyses for deleted functions.
852 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
853 // We've manually invalidated analyses for functions we've modified.
854 PA.preserveSet<AllAnalysesOn<Function>>();
855 return PA;