1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass promotes "by reference" arguments to be "by value" arguments. In
10 // practice, this means looking for internal functions that have pointer
11 // arguments. If it can prove, through the use of alias analysis, that an
12 // argument is *only* loaded, then it can pass the value into the function
13 // instead of the address of the value. This can cause recursive simplification
14 // of code and lead to the elimination of allocas (especially in C++ template
15 // code like the STL).
17 // This pass also handles aggregate arguments that are passed into a function,
18 // scalarizing them if the elements of the aggregate are only loaded. Note that
19 // by default it refuses to scalarize aggregates which would require passing in
20 // more than three operands to the function, because passing thousands of
21 // operands for a large array or structure is unprofitable! This limit can be
22 // configured or disabled, however.
24 // Note that this transformation could also be done for arguments that are only
25 // stored to (returning the value instead), but does not currently. This case
26 // would be best handled when and if LLVM begins supporting multiple return
27 // values from functions.
29 //===----------------------------------------------------------------------===//
31 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
32 #include "llvm/ADT/DepthFirstIterator.h"
33 #include "llvm/ADT/None.h"
34 #include "llvm/ADT/Optional.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringExtras.h"
40 #include "llvm/ADT/Twine.h"
41 #include "llvm/Analysis/AliasAnalysis.h"
42 #include "llvm/Analysis/AssumptionCache.h"
43 #include "llvm/Analysis/BasicAliasAnalysis.h"
44 #include "llvm/Analysis/CGSCCPassManager.h"
45 #include "llvm/Analysis/CallGraph.h"
46 #include "llvm/Analysis/CallGraphSCCPass.h"
47 #include "llvm/Analysis/LazyCallGraph.h"
48 #include "llvm/Analysis/Loads.h"
49 #include "llvm/Analysis/MemoryLocation.h"
50 #include "llvm/Analysis/TargetLibraryInfo.h"
51 #include "llvm/Analysis/TargetTransformInfo.h"
52 #include "llvm/IR/Argument.h"
53 #include "llvm/IR/Attributes.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/CallSite.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DerivedTypes.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/InstrTypes.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/IR/Metadata.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/PassManager.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/Use.h"
69 #include "llvm/IR/User.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/Debug.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/IPO.h"
89 #define DEBUG_TYPE "argpromotion"
91 STATISTIC(NumArgumentsPromoted
, "Number of pointer arguments promoted");
92 STATISTIC(NumAggregatesPromoted
, "Number of aggregate arguments promoted");
93 STATISTIC(NumByValArgsPromoted
, "Number of byval arguments promoted");
94 STATISTIC(NumArgumentsDead
, "Number of dead pointer args eliminated");
96 /// A vector used to hold the indices of a single GEP instruction
97 using IndicesVector
= std::vector
<uint64_t>;
99 /// DoPromotion - This method actually performs the promotion of the specified
100 /// arguments, and returns the new function. At this point, we know that it's
103 doPromotion(Function
*F
, SmallPtrSetImpl
<Argument
*> &ArgsToPromote
,
104 SmallPtrSetImpl
<Argument
*> &ByValArgsToTransform
,
105 Optional
<function_ref
<void(CallSite OldCS
, CallSite NewCS
)>>
107 // Start by computing a new prototype for the function, which is the same as
108 // the old function, but has modified arguments.
109 FunctionType
*FTy
= F
->getFunctionType();
110 std::vector
<Type
*> Params
;
112 using ScalarizeTable
= std::set
<std::pair
<Type
*, IndicesVector
>>;
114 // ScalarizedElements - If we are promoting a pointer that has elements
115 // accessed out of it, keep track of which elements are accessed so that we
116 // can add one argument for each.
118 // Arguments that are directly loaded will have a zero element value here, to
119 // handle cases where there are both a direct load and GEP accesses.
120 std::map
<Argument
*, ScalarizeTable
> ScalarizedElements
;
122 // OriginalLoads - Keep track of a representative load instruction from the
123 // original function so that we can tell the alias analysis implementation
124 // what the new GEP/Load instructions we are inserting look like.
125 // We need to keep the original loads for each argument and the elements
126 // of the argument that are accessed.
127 std::map
<std::pair
<Argument
*, IndicesVector
>, LoadInst
*> OriginalLoads
;
129 // Attribute - Keep track of the parameter attributes for the arguments
130 // that we are *not* promoting. For the ones that we do promote, the parameter
131 // attributes are lost
132 SmallVector
<AttributeSet
, 8> ArgAttrVec
;
133 AttributeList PAL
= F
->getAttributes();
135 // First, determine the new argument list
137 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
;
139 if (ByValArgsToTransform
.count(&*I
)) {
140 // Simple byval argument? Just add all the struct element types.
141 Type
*AgTy
= cast
<PointerType
>(I
->getType())->getElementType();
142 StructType
*STy
= cast
<StructType
>(AgTy
);
143 Params
.insert(Params
.end(), STy
->element_begin(), STy
->element_end());
144 ArgAttrVec
.insert(ArgAttrVec
.end(), STy
->getNumElements(),
146 ++NumByValArgsPromoted
;
147 } else if (!ArgsToPromote
.count(&*I
)) {
148 // Unchanged argument
149 Params
.push_back(I
->getType());
150 ArgAttrVec
.push_back(PAL
.getParamAttributes(ArgNo
));
151 } else if (I
->use_empty()) {
152 // Dead argument (which are always marked as promotable)
155 // There may be remaining metadata uses of the argument for things like
156 // llvm.dbg.value. Replace them with undef.
157 I
->replaceAllUsesWith(UndefValue::get(I
->getType()));
159 // Okay, this is being promoted. This means that the only uses are loads
160 // or GEPs which are only used by loads
162 // In this table, we will track which indices are loaded from the argument
163 // (where direct loads are tracked as no indices).
164 ScalarizeTable
&ArgIndices
= ScalarizedElements
[&*I
];
165 for (User
*U
: I
->users()) {
166 Instruction
*UI
= cast
<Instruction
>(U
);
168 if (LoadInst
*L
= dyn_cast
<LoadInst
>(UI
))
169 SrcTy
= L
->getType();
171 SrcTy
= cast
<GetElementPtrInst
>(UI
)->getSourceElementType();
172 IndicesVector Indices
;
173 Indices
.reserve(UI
->getNumOperands() - 1);
174 // Since loads will only have a single operand, and GEPs only a single
175 // non-index operand, this will record direct loads without any indices,
176 // and gep+loads with the GEP indices.
177 for (User::op_iterator II
= UI
->op_begin() + 1, IE
= UI
->op_end();
179 Indices
.push_back(cast
<ConstantInt
>(*II
)->getSExtValue());
180 // GEPs with a single 0 index can be merged with direct loads
181 if (Indices
.size() == 1 && Indices
.front() == 0)
183 ArgIndices
.insert(std::make_pair(SrcTy
, Indices
));
185 if (LoadInst
*L
= dyn_cast
<LoadInst
>(UI
))
188 // Take any load, we will use it only to update Alias Analysis
189 OrigLoad
= cast
<LoadInst
>(UI
->user_back());
190 OriginalLoads
[std::make_pair(&*I
, Indices
)] = OrigLoad
;
193 // Add a parameter to the function for each element passed in.
194 for (const auto &ArgIndex
: ArgIndices
) {
195 // not allowed to dereference ->begin() if size() is 0
196 Params
.push_back(GetElementPtrInst::getIndexedType(
197 cast
<PointerType
>(I
->getType()->getScalarType())->getElementType(),
199 ArgAttrVec
.push_back(AttributeSet());
200 assert(Params
.back());
203 if (ArgIndices
.size() == 1 && ArgIndices
.begin()->second
.empty())
204 ++NumArgumentsPromoted
;
206 ++NumAggregatesPromoted
;
210 Type
*RetTy
= FTy
->getReturnType();
212 // Construct the new function type using the new arguments.
213 FunctionType
*NFTy
= FunctionType::get(RetTy
, Params
, FTy
->isVarArg());
215 // Create the new function body and insert it into the module.
216 Function
*NF
= Function::Create(NFTy
, F
->getLinkage(), F
->getAddressSpace(),
218 NF
->copyAttributesFrom(F
);
219 NF
->copyMetadata(F
, 0);
222 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF
<< "\n"
225 // Recompute the parameter attributes list based on the new arguments for
227 NF
->setAttributes(AttributeList::get(F
->getContext(), PAL
.getFnAttributes(),
228 PAL
.getRetAttributes(), ArgAttrVec
));
231 F
->getParent()->getFunctionList().insert(F
->getIterator(), NF
);
234 // Loop over all of the callers of the function, transforming the call sites
235 // to pass in the loaded pointers.
237 SmallVector
<Value
*, 16> Args
;
238 while (!F
->use_empty()) {
239 CallSite
CS(F
->user_back());
240 assert(CS
.getCalledFunction() == F
);
241 Instruction
*Call
= CS
.getInstruction();
242 const AttributeList
&CallPAL
= CS
.getAttributes();
244 // Loop over the operands, inserting GEP and loads in the caller as
246 CallSite::arg_iterator AI
= CS
.arg_begin();
248 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
;
250 if (!ArgsToPromote
.count(&*I
) && !ByValArgsToTransform
.count(&*I
)) {
251 Args
.push_back(*AI
); // Unmodified argument
252 ArgAttrVec
.push_back(CallPAL
.getParamAttributes(ArgNo
));
253 } else if (ByValArgsToTransform
.count(&*I
)) {
254 // Emit a GEP and load for each element of the struct.
255 Type
*AgTy
= cast
<PointerType
>(I
->getType())->getElementType();
256 StructType
*STy
= cast
<StructType
>(AgTy
);
258 ConstantInt::get(Type::getInt32Ty(F
->getContext()), 0), nullptr};
259 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
260 Idxs
[1] = ConstantInt::get(Type::getInt32Ty(F
->getContext()), i
);
261 Value
*Idx
= GetElementPtrInst::Create(
262 STy
, *AI
, Idxs
, (*AI
)->getName() + "." + Twine(i
), Call
);
263 // TODO: Tell AA about the new values?
264 Args
.push_back(new LoadInst(STy
->getElementType(i
), Idx
,
265 Idx
->getName() + ".val", Call
));
266 ArgAttrVec
.push_back(AttributeSet());
268 } else if (!I
->use_empty()) {
269 // Non-dead argument: insert GEPs and loads as appropriate.
270 ScalarizeTable
&ArgIndices
= ScalarizedElements
[&*I
];
271 // Store the Value* version of the indices in here, but declare it now
273 std::vector
<Value
*> Ops
;
274 for (const auto &ArgIndex
: ArgIndices
) {
277 OriginalLoads
[std::make_pair(&*I
, ArgIndex
.second
)];
278 if (!ArgIndex
.second
.empty()) {
279 Ops
.reserve(ArgIndex
.second
.size());
280 Type
*ElTy
= V
->getType();
281 for (auto II
: ArgIndex
.second
) {
282 // Use i32 to index structs, and i64 for others (pointers/arrays).
283 // This satisfies GEP constraints.
285 (ElTy
->isStructTy() ? Type::getInt32Ty(F
->getContext())
286 : Type::getInt64Ty(F
->getContext()));
287 Ops
.push_back(ConstantInt::get(IdxTy
, II
));
288 // Keep track of the type we're currently indexing.
289 if (auto *ElPTy
= dyn_cast
<PointerType
>(ElTy
))
290 ElTy
= ElPTy
->getElementType();
292 ElTy
= cast
<CompositeType
>(ElTy
)->getTypeAtIndex(II
);
294 // And create a GEP to extract those indices.
295 V
= GetElementPtrInst::Create(ArgIndex
.first
, V
, Ops
,
296 V
->getName() + ".idx", Call
);
299 // Since we're replacing a load make sure we take the alignment
300 // of the previous load.
302 new LoadInst(OrigLoad
->getType(), V
, V
->getName() + ".val", Call
);
303 newLoad
->setAlignment(OrigLoad
->getAlignment());
304 // Transfer the AA info too.
306 OrigLoad
->getAAMetadata(AAInfo
);
307 newLoad
->setAAMetadata(AAInfo
);
309 Args
.push_back(newLoad
);
310 ArgAttrVec
.push_back(AttributeSet());
314 // Push any varargs arguments on the list.
315 for (; AI
!= CS
.arg_end(); ++AI
, ++ArgNo
) {
317 ArgAttrVec
.push_back(CallPAL
.getParamAttributes(ArgNo
));
320 SmallVector
<OperandBundleDef
, 1> OpBundles
;
321 CS
.getOperandBundlesAsDefs(OpBundles
);
324 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Call
)) {
325 NewCS
= InvokeInst::Create(NF
, II
->getNormalDest(), II
->getUnwindDest(),
326 Args
, OpBundles
, "", Call
);
328 auto *NewCall
= CallInst::Create(NF
, Args
, OpBundles
, "", Call
);
329 NewCall
->setTailCallKind(cast
<CallInst
>(Call
)->getTailCallKind());
332 NewCS
.setCallingConv(CS
.getCallingConv());
334 AttributeList::get(F
->getContext(), CallPAL
.getFnAttributes(),
335 CallPAL
.getRetAttributes(), ArgAttrVec
));
336 NewCS
->setDebugLoc(Call
->getDebugLoc());
338 if (Call
->extractProfTotalWeight(W
))
339 NewCS
->setProfWeight(W
);
343 // Update the callgraph to know that the callsite has been transformed.
345 (*ReplaceCallSite
)(CS
, NewCS
);
347 if (!Call
->use_empty()) {
348 Call
->replaceAllUsesWith(NewCS
.getInstruction());
349 NewCS
->takeName(Call
);
352 // Finally, remove the old call from the program, reducing the use-count of
354 Call
->eraseFromParent();
357 const DataLayout
&DL
= F
->getParent()->getDataLayout();
359 // Since we have now created the new function, splice the body of the old
360 // function right into the new function, leaving the old rotting hulk of the
362 NF
->getBasicBlockList().splice(NF
->begin(), F
->getBasicBlockList());
364 // Loop over the argument list, transferring uses of the old arguments over to
365 // the new arguments, also transferring over the names as well.
366 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(),
367 I2
= NF
->arg_begin();
369 if (!ArgsToPromote
.count(&*I
) && !ByValArgsToTransform
.count(&*I
)) {
370 // If this is an unmodified argument, move the name and users over to the
372 I
->replaceAllUsesWith(&*I2
);
378 if (ByValArgsToTransform
.count(&*I
)) {
379 // In the callee, we create an alloca, and store each of the new incoming
380 // arguments into the alloca.
381 Instruction
*InsertPt
= &NF
->begin()->front();
383 // Just add all the struct element types.
384 Type
*AgTy
= cast
<PointerType
>(I
->getType())->getElementType();
385 Value
*TheAlloca
= new AllocaInst(AgTy
, DL
.getAllocaAddrSpace(), nullptr,
386 I
->getParamAlignment(), "", InsertPt
);
387 StructType
*STy
= cast
<StructType
>(AgTy
);
388 Value
*Idxs
[2] = {ConstantInt::get(Type::getInt32Ty(F
->getContext()), 0),
391 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
392 Idxs
[1] = ConstantInt::get(Type::getInt32Ty(F
->getContext()), i
);
393 Value
*Idx
= GetElementPtrInst::Create(
394 AgTy
, TheAlloca
, Idxs
, TheAlloca
->getName() + "." + Twine(i
),
396 I2
->setName(I
->getName() + "." + Twine(i
));
397 new StoreInst(&*I2
++, Idx
, InsertPt
);
400 // Anything that used the arg should now use the alloca.
401 I
->replaceAllUsesWith(TheAlloca
);
402 TheAlloca
->takeName(&*I
);
404 // If the alloca is used in a call, we must clear the tail flag since
405 // the callee now uses an alloca from the caller.
406 for (User
*U
: TheAlloca
->users()) {
407 CallInst
*Call
= dyn_cast
<CallInst
>(U
);
410 Call
->setTailCall(false);
418 // Otherwise, if we promoted this argument, then all users are load
419 // instructions (or GEPs with only load users), and all loads should be
420 // using the new argument that we added.
421 ScalarizeTable
&ArgIndices
= ScalarizedElements
[&*I
];
423 while (!I
->use_empty()) {
424 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
->user_back())) {
425 assert(ArgIndices
.begin()->second
.empty() &&
426 "Load element should sort to front!");
427 I2
->setName(I
->getName() + ".val");
428 LI
->replaceAllUsesWith(&*I2
);
429 LI
->eraseFromParent();
430 LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I
->getName()
431 << "' in function '" << F
->getName() << "'\n");
433 GetElementPtrInst
*GEP
= cast
<GetElementPtrInst
>(I
->user_back());
434 IndicesVector Operands
;
435 Operands
.reserve(GEP
->getNumIndices());
436 for (User::op_iterator II
= GEP
->idx_begin(), IE
= GEP
->idx_end();
438 Operands
.push_back(cast
<ConstantInt
>(*II
)->getSExtValue());
440 // GEPs with a single 0 index can be merged with direct loads
441 if (Operands
.size() == 1 && Operands
.front() == 0)
444 Function::arg_iterator TheArg
= I2
;
445 for (ScalarizeTable::iterator It
= ArgIndices
.begin();
446 It
->second
!= Operands
; ++It
, ++TheArg
) {
447 assert(It
!= ArgIndices
.end() && "GEP not handled??");
450 std::string NewName
= I
->getName();
451 for (unsigned i
= 0, e
= Operands
.size(); i
!= e
; ++i
) {
452 NewName
+= "." + utostr(Operands
[i
]);
455 TheArg
->setName(NewName
);
457 LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg
->getName()
458 << "' of function '" << NF
->getName() << "'\n");
460 // All of the uses must be load instructions. Replace them all with
461 // the argument specified by ArgNo.
462 while (!GEP
->use_empty()) {
463 LoadInst
*L
= cast
<LoadInst
>(GEP
->user_back());
464 L
->replaceAllUsesWith(&*TheArg
);
465 L
->eraseFromParent();
467 GEP
->eraseFromParent();
471 // Increment I2 past all of the arguments added for this promoted pointer.
472 std::advance(I2
, ArgIndices
.size());
475 assert(F
->isDeclaration());
479 /// AllCallersPassInValidPointerForArgument - Return true if we can prove that
480 /// all callees pass in a valid pointer for the specified function argument.
481 static bool allCallersPassInValidPointerForArgument(Argument
*Arg
) {
482 Function
*Callee
= Arg
->getParent();
483 const DataLayout
&DL
= Callee
->getParent()->getDataLayout();
485 unsigned ArgNo
= Arg
->getArgNo();
487 // Look at all call sites of the function. At this point we know we only have
489 for (User
*U
: Callee
->users()) {
491 assert(CS
&& "Should only have direct calls!");
493 if (!isDereferenceablePointer(CS
.getArgument(ArgNo
), DL
))
499 /// Returns true if Prefix is a prefix of longer. That means, Longer has a size
500 /// that is greater than or equal to the size of prefix, and each of the
501 /// elements in Prefix is the same as the corresponding elements in Longer.
503 /// This means it also returns true when Prefix and Longer are equal!
504 static bool isPrefix(const IndicesVector
&Prefix
, const IndicesVector
&Longer
) {
505 if (Prefix
.size() > Longer
.size())
507 return std::equal(Prefix
.begin(), Prefix
.end(), Longer
.begin());
510 /// Checks if Indices, or a prefix of Indices, is in Set.
511 static bool prefixIn(const IndicesVector
&Indices
,
512 std::set
<IndicesVector
> &Set
) {
513 std::set
<IndicesVector
>::iterator Low
;
514 Low
= Set
.upper_bound(Indices
);
515 if (Low
!= Set
.begin())
517 // Low is now the last element smaller than or equal to Indices. This means
518 // it points to a prefix of Indices (possibly Indices itself), if such
521 // This load is safe if any prefix of its operands is safe to load.
522 return Low
!= Set
.end() && isPrefix(*Low
, Indices
);
525 /// Mark the given indices (ToMark) as safe in the given set of indices
526 /// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
527 /// is already a prefix of Indices in Safe, Indices are implicitely marked safe
528 /// already. Furthermore, any indices that Indices is itself a prefix of, are
529 /// removed from Safe (since they are implicitely safe because of Indices now).
530 static void markIndicesSafe(const IndicesVector
&ToMark
,
531 std::set
<IndicesVector
> &Safe
) {
532 std::set
<IndicesVector
>::iterator Low
;
533 Low
= Safe
.upper_bound(ToMark
);
534 // Guard against the case where Safe is empty
535 if (Low
!= Safe
.begin())
537 // Low is now the last element smaller than or equal to Indices. This
538 // means it points to a prefix of Indices (possibly Indices itself), if
539 // such prefix exists.
540 if (Low
!= Safe
.end()) {
541 if (isPrefix(*Low
, ToMark
))
542 // If there is already a prefix of these indices (or exactly these
543 // indices) marked a safe, don't bother adding these indices
546 // Increment Low, so we can use it as a "insert before" hint
550 Low
= Safe
.insert(Low
, ToMark
);
552 // If there we're a prefix of longer index list(s), remove those
553 std::set
<IndicesVector
>::iterator End
= Safe
.end();
554 while (Low
!= End
&& isPrefix(ToMark
, *Low
)) {
555 std::set
<IndicesVector
>::iterator Remove
= Low
;
561 /// isSafeToPromoteArgument - As you might guess from the name of this method,
562 /// it checks to see if it is both safe and useful to promote the argument.
563 /// This method limits promotion of aggregates to only promote up to three
564 /// elements of the aggregate in order to avoid exploding the number of
565 /// arguments passed in.
566 static bool isSafeToPromoteArgument(Argument
*Arg
, bool isByValOrInAlloca
,
567 AAResults
&AAR
, unsigned MaxElements
) {
568 using GEPIndicesSet
= std::set
<IndicesVector
>;
570 // Quick exit for unused arguments
571 if (Arg
->use_empty())
574 // We can only promote this argument if all of the uses are loads, or are GEP
575 // instructions (with constant indices) that are subsequently loaded.
577 // Promoting the argument causes it to be loaded in the caller
578 // unconditionally. This is only safe if we can prove that either the load
579 // would have happened in the callee anyway (ie, there is a load in the entry
580 // block) or the pointer passed in at every call site is guaranteed to be
582 // In the former case, invalid loads can happen, but would have happened
583 // anyway, in the latter case, invalid loads won't happen. This prevents us
584 // from introducing an invalid load that wouldn't have happened in the
587 // This set will contain all sets of indices that are loaded in the entry
588 // block, and thus are safe to unconditionally load in the caller.
590 // This optimization is also safe for InAlloca parameters, because it verifies
591 // that the address isn't captured.
592 GEPIndicesSet SafeToUnconditionallyLoad
;
594 // This set contains all the sets of indices that we are planning to promote.
595 // This makes it possible to limit the number of arguments added.
596 GEPIndicesSet ToPromote
;
598 // If the pointer is always valid, any load with first index 0 is valid.
599 if (isByValOrInAlloca
|| allCallersPassInValidPointerForArgument(Arg
))
600 SafeToUnconditionallyLoad
.insert(IndicesVector(1, 0));
602 // First, iterate the entry block and mark loads of (geps of) arguments as
604 BasicBlock
&EntryBlock
= Arg
->getParent()->front();
605 // Declare this here so we can reuse it
606 IndicesVector Indices
;
607 for (Instruction
&I
: EntryBlock
)
608 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(&I
)) {
609 Value
*V
= LI
->getPointerOperand();
610 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(V
)) {
611 V
= GEP
->getPointerOperand();
613 // This load actually loads (part of) Arg? Check the indices then.
614 Indices
.reserve(GEP
->getNumIndices());
615 for (User::op_iterator II
= GEP
->idx_begin(), IE
= GEP
->idx_end();
617 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(*II
))
618 Indices
.push_back(CI
->getSExtValue());
620 // We found a non-constant GEP index for this argument? Bail out
621 // right away, can't promote this argument at all.
624 // Indices checked out, mark them as safe
625 markIndicesSafe(Indices
, SafeToUnconditionallyLoad
);
628 } else if (V
== Arg
) {
629 // Direct loads are equivalent to a GEP with a single 0 index.
630 markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad
);
634 // Now, iterate all uses of the argument to see if there are any uses that are
635 // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
636 SmallVector
<LoadInst
*, 16> Loads
;
637 IndicesVector Operands
;
638 for (Use
&U
: Arg
->uses()) {
639 User
*UR
= U
.getUser();
641 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(UR
)) {
642 // Don't hack volatile/atomic loads
646 // Direct loads are equivalent to a GEP with a zero index and then a load.
647 Operands
.push_back(0);
648 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(UR
)) {
649 if (GEP
->use_empty()) {
650 // Dead GEP's cause trouble later. Just remove them if we run into
652 GEP
->eraseFromParent();
653 // TODO: This runs the above loop over and over again for dead GEPs
654 // Couldn't we just do increment the UI iterator earlier and erase the
656 return isSafeToPromoteArgument(Arg
, isByValOrInAlloca
, AAR
,
660 // Ensure that all of the indices are constants.
661 for (User::op_iterator i
= GEP
->idx_begin(), e
= GEP
->idx_end(); i
!= e
;
663 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(*i
))
664 Operands
.push_back(C
->getSExtValue());
666 return false; // Not a constant operand GEP!
668 // Ensure that the only users of the GEP are load instructions.
669 for (User
*GEPU
: GEP
->users())
670 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(GEPU
)) {
671 // Don't hack volatile/atomic loads
676 // Other uses than load?
680 return false; // Not a load or a GEP.
683 // Now, see if it is safe to promote this load / loads of this GEP. Loading
684 // is safe if Operands, or a prefix of Operands, is marked as safe.
685 if (!prefixIn(Operands
, SafeToUnconditionallyLoad
))
688 // See if we are already promoting a load with these indices. If not, check
689 // to make sure that we aren't promoting too many elements. If so, nothing
691 if (ToPromote
.find(Operands
) == ToPromote
.end()) {
692 if (MaxElements
> 0 && ToPromote
.size() == MaxElements
) {
693 LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '"
695 << "' because it would require adding more "
696 << "than " << MaxElements
697 << " arguments to the function.\n");
698 // We limit aggregate promotion to only promoting up to a fixed number
699 // of elements of the aggregate.
702 ToPromote
.insert(std::move(Operands
));
707 return true; // No users, this is a dead argument.
709 // Okay, now we know that the argument is only used by load instructions and
710 // it is safe to unconditionally perform all of them. Use alias analysis to
711 // check to see if the pointer is guaranteed to not be modified from entry of
712 // the function to each of the load instructions.
714 // Because there could be several/many load instructions, remember which
715 // blocks we know to be transparent to the load.
716 df_iterator_default_set
<BasicBlock
*, 16> TranspBlocks
;
718 for (LoadInst
*Load
: Loads
) {
719 // Check to see if the load is invalidated from the start of the block to
721 BasicBlock
*BB
= Load
->getParent();
723 MemoryLocation Loc
= MemoryLocation::get(Load
);
724 if (AAR
.canInstructionRangeModRef(BB
->front(), *Load
, Loc
, ModRefInfo::Mod
))
725 return false; // Pointer is invalidated!
727 // Now check every path from the entry block to the load for transparency.
728 // To do this, we perform a depth first search on the inverse CFG from the
730 for (BasicBlock
*P
: predecessors(BB
)) {
731 for (BasicBlock
*TranspBB
: inverse_depth_first_ext(P
, TranspBlocks
))
732 if (AAR
.canBasicBlockModify(*TranspBB
, Loc
))
737 // If the path from the entry of the function to each load is free of
738 // instructions that potentially invalidate the load, we can make the
743 /// Checks if a type could have padding bytes.
744 static bool isDenselyPacked(Type
*type
, const DataLayout
&DL
) {
745 // There is no size information, so be conservative.
746 if (!type
->isSized())
749 // If the alloc size is not equal to the storage size, then there are padding
750 // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
751 if (DL
.getTypeSizeInBits(type
) != DL
.getTypeAllocSizeInBits(type
))
754 if (!isa
<CompositeType
>(type
))
757 // For homogenous sequential types, check for padding within members.
758 if (SequentialType
*seqTy
= dyn_cast
<SequentialType
>(type
))
759 return isDenselyPacked(seqTy
->getElementType(), DL
);
761 // Check for padding within and between elements of a struct.
762 StructType
*StructTy
= cast
<StructType
>(type
);
763 const StructLayout
*Layout
= DL
.getStructLayout(StructTy
);
764 uint64_t StartPos
= 0;
765 for (unsigned i
= 0, E
= StructTy
->getNumElements(); i
< E
; ++i
) {
766 Type
*ElTy
= StructTy
->getElementType(i
);
767 if (!isDenselyPacked(ElTy
, DL
))
769 if (StartPos
!= Layout
->getElementOffsetInBits(i
))
771 StartPos
+= DL
.getTypeAllocSizeInBits(ElTy
);
777 /// Checks if the padding bytes of an argument could be accessed.
778 static bool canPaddingBeAccessed(Argument
*arg
) {
779 assert(arg
->hasByValAttr());
781 // Track all the pointers to the argument to make sure they are not captured.
782 SmallPtrSet
<Value
*, 16> PtrValues
;
783 PtrValues
.insert(arg
);
785 // Track all of the stores.
786 SmallVector
<StoreInst
*, 16> Stores
;
788 // Scan through the uses recursively to make sure the pointer is always used
790 SmallVector
<Value
*, 16> WorkList
;
791 WorkList
.insert(WorkList
.end(), arg
->user_begin(), arg
->user_end());
792 while (!WorkList
.empty()) {
793 Value
*V
= WorkList
.back();
795 if (isa
<GetElementPtrInst
>(V
) || isa
<PHINode
>(V
)) {
796 if (PtrValues
.insert(V
).second
)
797 WorkList
.insert(WorkList
.end(), V
->user_begin(), V
->user_end());
798 } else if (StoreInst
*Store
= dyn_cast
<StoreInst
>(V
)) {
799 Stores
.push_back(Store
);
800 } else if (!isa
<LoadInst
>(V
)) {
805 // Check to make sure the pointers aren't captured
806 for (StoreInst
*Store
: Stores
)
807 if (PtrValues
.count(Store
->getValueOperand()))
813 static bool areFunctionArgsABICompatible(
814 const Function
&F
, const TargetTransformInfo
&TTI
,
815 SmallPtrSetImpl
<Argument
*> &ArgsToPromote
,
816 SmallPtrSetImpl
<Argument
*> &ByValArgsToTransform
) {
817 for (const Use
&U
: F
.uses()) {
818 CallSite
CS(U
.getUser());
819 const Function
*Caller
= CS
.getCaller();
820 const Function
*Callee
= CS
.getCalledFunction();
821 if (!TTI
.areFunctionArgsABICompatible(Caller
, Callee
, ArgsToPromote
) ||
822 !TTI
.areFunctionArgsABICompatible(Caller
, Callee
, ByValArgsToTransform
))
828 /// PromoteArguments - This method checks the specified function to see if there
829 /// are any promotable arguments and if it is safe to promote the function (for
830 /// example, all callers are direct). If safe to promote some arguments, it
831 /// calls the DoPromotion method.
833 promoteArguments(Function
*F
, function_ref
<AAResults
&(Function
&F
)> AARGetter
,
834 unsigned MaxElements
,
835 Optional
<function_ref
<void(CallSite OldCS
, CallSite NewCS
)>>
837 const TargetTransformInfo
&TTI
) {
838 // Don't perform argument promotion for naked functions; otherwise we can end
839 // up removing parameters that are seemingly 'not used' as they are referred
840 // to in the assembly.
841 if(F
->hasFnAttribute(Attribute::Naked
))
844 // Make sure that it is local to this module.
845 if (!F
->hasLocalLinkage())
848 // Don't promote arguments for variadic functions. Adding, removing, or
849 // changing non-pack parameters can change the classification of pack
850 // parameters. Frontends encode that classification at the call site in the
851 // IR, while in the callee the classification is determined dynamically based
852 // on the number of registers consumed so far.
856 // First check: see if there are any pointer arguments! If not, quick exit.
857 SmallVector
<Argument
*, 16> PointerArgs
;
858 for (Argument
&I
: F
->args())
859 if (I
.getType()->isPointerTy())
860 PointerArgs
.push_back(&I
);
861 if (PointerArgs
.empty())
864 // Second check: make sure that all callers are direct callers. We can't
865 // transform functions that have indirect callers. Also see if the function
866 // is self-recursive and check that target features are compatible.
867 bool isSelfRecursive
= false;
868 for (Use
&U
: F
->uses()) {
869 CallSite
CS(U
.getUser());
870 // Must be a direct call.
871 if (CS
.getInstruction() == nullptr || !CS
.isCallee(&U
))
874 // Can't change signature of musttail callee
875 if (CS
.isMustTailCall())
878 if (CS
.getInstruction()->getParent()->getParent() == F
)
879 isSelfRecursive
= true;
882 // Can't change signature of musttail caller
883 // FIXME: Support promoting whole chain of musttail functions
884 for (BasicBlock
&BB
: *F
)
885 if (BB
.getTerminatingMustTailCall())
888 const DataLayout
&DL
= F
->getParent()->getDataLayout();
890 AAResults
&AAR
= AARGetter(*F
);
892 // Check to see which arguments are promotable. If an argument is promotable,
893 // add it to ArgsToPromote.
894 SmallPtrSet
<Argument
*, 8> ArgsToPromote
;
895 SmallPtrSet
<Argument
*, 8> ByValArgsToTransform
;
896 for (Argument
*PtrArg
: PointerArgs
) {
897 Type
*AgTy
= cast
<PointerType
>(PtrArg
->getType())->getElementType();
899 // Replace sret attribute with noalias. This reduces register pressure by
900 // avoiding a register copy.
901 if (PtrArg
->hasStructRetAttr()) {
902 unsigned ArgNo
= PtrArg
->getArgNo();
903 F
->removeParamAttr(ArgNo
, Attribute::StructRet
);
904 F
->addParamAttr(ArgNo
, Attribute::NoAlias
);
905 for (Use
&U
: F
->uses()) {
906 CallSite
CS(U
.getUser());
907 CS
.removeParamAttr(ArgNo
, Attribute::StructRet
);
908 CS
.addParamAttr(ArgNo
, Attribute::NoAlias
);
912 // If this is a byval argument, and if the aggregate type is small, just
913 // pass the elements, which is always safe, if the passed value is densely
914 // packed or if we can prove the padding bytes are never accessed. This does
915 // not apply to inalloca.
916 bool isSafeToPromote
=
917 PtrArg
->hasByValAttr() &&
918 (isDenselyPacked(AgTy
, DL
) || !canPaddingBeAccessed(PtrArg
));
919 if (isSafeToPromote
) {
920 if (StructType
*STy
= dyn_cast
<StructType
>(AgTy
)) {
921 if (MaxElements
> 0 && STy
->getNumElements() > MaxElements
) {
922 LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '"
924 << "' because it would require adding more"
925 << " than " << MaxElements
926 << " arguments to the function.\n");
930 // If all the elements are single-value types, we can promote it.
931 bool AllSimple
= true;
932 for (const auto *EltTy
: STy
->elements()) {
933 if (!EltTy
->isSingleValueType()) {
939 // Safe to transform, don't even bother trying to "promote" it.
940 // Passing the elements as a scalar will allow sroa to hack on
941 // the new alloca we introduce.
943 ByValArgsToTransform
.insert(PtrArg
);
949 // If the argument is a recursive type and we're in a recursive
950 // function, we could end up infinitely peeling the function argument.
951 if (isSelfRecursive
) {
952 if (StructType
*STy
= dyn_cast
<StructType
>(AgTy
)) {
953 bool RecursiveType
= false;
954 for (const auto *EltTy
: STy
->elements()) {
955 if (EltTy
== PtrArg
->getType()) {
956 RecursiveType
= true;
965 // Otherwise, see if we can promote the pointer to its value.
966 if (isSafeToPromoteArgument(PtrArg
, PtrArg
->hasByValOrInAllocaAttr(), AAR
,
968 ArgsToPromote
.insert(PtrArg
);
971 // No promotable pointer arguments.
972 if (ArgsToPromote
.empty() && ByValArgsToTransform
.empty())
975 if (!areFunctionArgsABICompatible(*F
, TTI
, ArgsToPromote
,
976 ByValArgsToTransform
))
979 return doPromotion(F
, ArgsToPromote
, ByValArgsToTransform
, ReplaceCallSite
);
982 PreservedAnalyses
ArgumentPromotionPass::run(LazyCallGraph::SCC
&C
,
983 CGSCCAnalysisManager
&AM
,
985 CGSCCUpdateResult
&UR
) {
986 bool Changed
= false, LocalChange
;
988 // Iterate until we stop promoting from this SCC.
992 for (LazyCallGraph::Node
&N
: C
) {
993 Function
&OldF
= N
.getFunction();
995 FunctionAnalysisManager
&FAM
=
996 AM
.getResult
<FunctionAnalysisManagerCGSCCProxy
>(C
, CG
).getManager();
997 // FIXME: This lambda must only be used with this function. We should
998 // skip the lambda and just get the AA results directly.
999 auto AARGetter
= [&](Function
&F
) -> AAResults
& {
1000 assert(&F
== &OldF
&& "Called with an unexpected function!");
1001 return FAM
.getResult
<AAManager
>(F
);
1004 const TargetTransformInfo
&TTI
= FAM
.getResult
<TargetIRAnalysis
>(OldF
);
1006 promoteArguments(&OldF
, AARGetter
, MaxElements
, None
, TTI
);
1011 // Directly substitute the functions in the call graph. Note that this
1012 // requires the old function to be completely dead and completely
1013 // replaced by the new function. It does no call graph updates, it merely
1014 // swaps out the particular function mapped to a particular node in the
1016 C
.getOuterRefSCC().replaceNodeFunction(N
, *NewF
);
1017 OldF
.eraseFromParent();
1020 Changed
|= LocalChange
;
1021 } while (LocalChange
);
1024 return PreservedAnalyses::all();
1026 return PreservedAnalyses::none();
1031 /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
1032 struct ArgPromotion
: public CallGraphSCCPass
{
1033 // Pass identification, replacement for typeid
1036 explicit ArgPromotion(unsigned MaxElements
= 3)
1037 : CallGraphSCCPass(ID
), MaxElements(MaxElements
) {
1038 initializeArgPromotionPass(*PassRegistry::getPassRegistry());
1041 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1042 AU
.addRequired
<AssumptionCacheTracker
>();
1043 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1044 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
1045 getAAResultsAnalysisUsage(AU
);
1046 CallGraphSCCPass::getAnalysisUsage(AU
);
1049 bool runOnSCC(CallGraphSCC
&SCC
) override
;
1052 using llvm::Pass::doInitialization
;
1054 bool doInitialization(CallGraph
&CG
) override
;
1056 /// The maximum number of elements to expand, or 0 for unlimited.
1057 unsigned MaxElements
;
1060 } // end anonymous namespace
1062 char ArgPromotion::ID
= 0;
1064 INITIALIZE_PASS_BEGIN(ArgPromotion
, "argpromotion",
1065 "Promote 'by reference' arguments to scalars", false,
1067 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1068 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass
)
1069 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1070 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1071 INITIALIZE_PASS_END(ArgPromotion
, "argpromotion",
1072 "Promote 'by reference' arguments to scalars", false, false)
1074 Pass
*llvm::createArgumentPromotionPass(unsigned MaxElements
) {
1075 return new ArgPromotion(MaxElements
);
1078 bool ArgPromotion::runOnSCC(CallGraphSCC
&SCC
) {
1082 // Get the callgraph information that we need to update to reflect our
1084 CallGraph
&CG
= getAnalysis
<CallGraphWrapperPass
>().getCallGraph();
1086 LegacyAARGetter
AARGetter(*this);
1088 bool Changed
= false, LocalChange
;
1090 // Iterate until we stop promoting from this SCC.
1092 LocalChange
= false;
1093 // Attempt to promote arguments from all functions in this SCC.
1094 for (CallGraphNode
*OldNode
: SCC
) {
1095 Function
*OldF
= OldNode
->getFunction();
1099 auto ReplaceCallSite
= [&](CallSite OldCS
, CallSite NewCS
) {
1100 Function
*Caller
= OldCS
.getInstruction()->getParent()->getParent();
1101 CallGraphNode
*NewCalleeNode
=
1102 CG
.getOrInsertFunction(NewCS
.getCalledFunction());
1103 CallGraphNode
*CallerNode
= CG
[Caller
];
1104 CallerNode
->replaceCallEdge(OldCS
, NewCS
, NewCalleeNode
);
1107 const TargetTransformInfo
&TTI
=
1108 getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(*OldF
);
1109 if (Function
*NewF
= promoteArguments(OldF
, AARGetter
, MaxElements
,
1110 {ReplaceCallSite
}, TTI
)) {
1113 // Update the call graph for the newly promoted function.
1114 CallGraphNode
*NewNode
= CG
.getOrInsertFunction(NewF
);
1115 NewNode
->stealCalledFunctionsFrom(OldNode
);
1116 if (OldNode
->getNumReferences() == 0)
1117 delete CG
.removeFunctionFromModule(OldNode
);
1119 OldF
->setLinkage(Function::ExternalLinkage
);
1121 // And updat ethe SCC we're iterating as well.
1122 SCC
.ReplaceNode(OldNode
, NewNode
);
1125 // Remember that we changed something.
1126 Changed
|= LocalChange
;
1127 } while (LocalChange
);
1132 bool ArgPromotion::doInitialization(CallGraph
&CG
) {
1133 return CallGraphSCCPass::doInitialization(CG
);