1 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass promotes "by reference" arguments to be "by value" arguments. In
10 // practice, this means looking for internal functions that have pointer
11 // arguments. If it can prove, through the use of alias analysis, that an
12 // argument is *only* loaded, then it can pass the value into the function
13 // instead of the address of the value. This can cause recursive simplification
14 // of code and lead to the elimination of allocas (especially in C++ template
15 // code like the STL).
17 // This pass also handles aggregate arguments that are passed into a function,
18 // scalarizing them if the elements of the aggregate are only loaded. Note that
19 // by default it refuses to scalarize aggregates which would require passing in
20 // more than three operands to the function, because passing thousands of
21 // operands for a large array or structure is unprofitable! This limit can be
22 // configured or disabled, however.
24 // Note that this transformation could also be done for arguments that are only
25 // stored to (returning the value instead), but does not currently. This case
26 // would be best handled when and if LLVM begins supporting multiple return
27 // values from functions.
29 //===----------------------------------------------------------------------===//
31 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
32 #include "llvm/ADT/DepthFirstIterator.h"
33 #include "llvm/ADT/None.h"
34 #include "llvm/ADT/Optional.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringExtras.h"
40 #include "llvm/ADT/Twine.h"
41 #include "llvm/Analysis/AliasAnalysis.h"
42 #include "llvm/Analysis/AssumptionCache.h"
43 #include "llvm/Analysis/BasicAliasAnalysis.h"
44 #include "llvm/Analysis/CGSCCPassManager.h"
45 #include "llvm/Analysis/CallGraph.h"
46 #include "llvm/Analysis/CallGraphSCCPass.h"
47 #include "llvm/Analysis/LazyCallGraph.h"
48 #include "llvm/Analysis/Loads.h"
49 #include "llvm/Analysis/MemoryLocation.h"
50 #include "llvm/Analysis/TargetLibraryInfo.h"
51 #include "llvm/Analysis/TargetTransformInfo.h"
52 #include "llvm/IR/Argument.h"
53 #include "llvm/IR/Attributes.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/CFG.h"
56 #include "llvm/IR/CallSite.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DerivedTypes.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/IRBuilder.h"
62 #include "llvm/IR/InstrTypes.h"
63 #include "llvm/IR/Instruction.h"
64 #include "llvm/IR/Instructions.h"
65 #include "llvm/IR/Metadata.h"
66 #include "llvm/IR/Module.h"
67 #include "llvm/IR/NoFolder.h"
68 #include "llvm/IR/PassManager.h"
69 #include "llvm/IR/Type.h"
70 #include "llvm/IR/Use.h"
71 #include "llvm/IR/User.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/Pass.h"
74 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/raw_ostream.h"
77 #include "llvm/Transforms/IPO.h"
91 #define DEBUG_TYPE "argpromotion"
93 STATISTIC(NumArgumentsPromoted
, "Number of pointer arguments promoted");
94 STATISTIC(NumAggregatesPromoted
, "Number of aggregate arguments promoted");
95 STATISTIC(NumByValArgsPromoted
, "Number of byval arguments promoted");
96 STATISTIC(NumArgumentsDead
, "Number of dead pointer args eliminated");
98 /// A vector used to hold the indices of a single GEP instruction
99 using IndicesVector
= std::vector
<uint64_t>;
101 /// DoPromotion - This method actually performs the promotion of the specified
102 /// arguments, and returns the new function. At this point, we know that it's
105 doPromotion(Function
*F
, SmallPtrSetImpl
<Argument
*> &ArgsToPromote
,
106 SmallPtrSetImpl
<Argument
*> &ByValArgsToTransform
,
107 Optional
<function_ref
<void(CallSite OldCS
, CallSite NewCS
)>>
109 // Start by computing a new prototype for the function, which is the same as
110 // the old function, but has modified arguments.
111 FunctionType
*FTy
= F
->getFunctionType();
112 std::vector
<Type
*> Params
;
114 using ScalarizeTable
= std::set
<std::pair
<Type
*, IndicesVector
>>;
116 // ScalarizedElements - If we are promoting a pointer that has elements
117 // accessed out of it, keep track of which elements are accessed so that we
118 // can add one argument for each.
120 // Arguments that are directly loaded will have a zero element value here, to
121 // handle cases where there are both a direct load and GEP accesses.
122 std::map
<Argument
*, ScalarizeTable
> ScalarizedElements
;
124 // OriginalLoads - Keep track of a representative load instruction from the
125 // original function so that we can tell the alias analysis implementation
126 // what the new GEP/Load instructions we are inserting look like.
127 // We need to keep the original loads for each argument and the elements
128 // of the argument that are accessed.
129 std::map
<std::pair
<Argument
*, IndicesVector
>, LoadInst
*> OriginalLoads
;
131 // Attribute - Keep track of the parameter attributes for the arguments
132 // that we are *not* promoting. For the ones that we do promote, the parameter
133 // attributes are lost
134 SmallVector
<AttributeSet
, 8> ArgAttrVec
;
135 AttributeList PAL
= F
->getAttributes();
137 // First, determine the new argument list
139 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
;
141 if (ByValArgsToTransform
.count(&*I
)) {
142 // Simple byval argument? Just add all the struct element types.
143 Type
*AgTy
= cast
<PointerType
>(I
->getType())->getElementType();
144 StructType
*STy
= cast
<StructType
>(AgTy
);
145 Params
.insert(Params
.end(), STy
->element_begin(), STy
->element_end());
146 ArgAttrVec
.insert(ArgAttrVec
.end(), STy
->getNumElements(),
148 ++NumByValArgsPromoted
;
149 } else if (!ArgsToPromote
.count(&*I
)) {
150 // Unchanged argument
151 Params
.push_back(I
->getType());
152 ArgAttrVec
.push_back(PAL
.getParamAttributes(ArgNo
));
153 } else if (I
->use_empty()) {
154 // Dead argument (which are always marked as promotable)
157 // There may be remaining metadata uses of the argument for things like
158 // llvm.dbg.value. Replace them with undef.
159 I
->replaceAllUsesWith(UndefValue::get(I
->getType()));
161 // Okay, this is being promoted. This means that the only uses are loads
162 // or GEPs which are only used by loads
164 // In this table, we will track which indices are loaded from the argument
165 // (where direct loads are tracked as no indices).
166 ScalarizeTable
&ArgIndices
= ScalarizedElements
[&*I
];
167 for (User
*U
: I
->users()) {
168 Instruction
*UI
= cast
<Instruction
>(U
);
170 if (LoadInst
*L
= dyn_cast
<LoadInst
>(UI
))
171 SrcTy
= L
->getType();
173 SrcTy
= cast
<GetElementPtrInst
>(UI
)->getSourceElementType();
174 IndicesVector Indices
;
175 Indices
.reserve(UI
->getNumOperands() - 1);
176 // Since loads will only have a single operand, and GEPs only a single
177 // non-index operand, this will record direct loads without any indices,
178 // and gep+loads with the GEP indices.
179 for (User::op_iterator II
= UI
->op_begin() + 1, IE
= UI
->op_end();
181 Indices
.push_back(cast
<ConstantInt
>(*II
)->getSExtValue());
182 // GEPs with a single 0 index can be merged with direct loads
183 if (Indices
.size() == 1 && Indices
.front() == 0)
185 ArgIndices
.insert(std::make_pair(SrcTy
, Indices
));
187 if (LoadInst
*L
= dyn_cast
<LoadInst
>(UI
))
190 // Take any load, we will use it only to update Alias Analysis
191 OrigLoad
= cast
<LoadInst
>(UI
->user_back());
192 OriginalLoads
[std::make_pair(&*I
, Indices
)] = OrigLoad
;
195 // Add a parameter to the function for each element passed in.
196 for (const auto &ArgIndex
: ArgIndices
) {
197 // not allowed to dereference ->begin() if size() is 0
198 Params
.push_back(GetElementPtrInst::getIndexedType(
199 cast
<PointerType
>(I
->getType()->getScalarType())->getElementType(),
201 ArgAttrVec
.push_back(AttributeSet());
202 assert(Params
.back());
205 if (ArgIndices
.size() == 1 && ArgIndices
.begin()->second
.empty())
206 ++NumArgumentsPromoted
;
208 ++NumAggregatesPromoted
;
212 Type
*RetTy
= FTy
->getReturnType();
214 // Construct the new function type using the new arguments.
215 FunctionType
*NFTy
= FunctionType::get(RetTy
, Params
, FTy
->isVarArg());
217 // Create the new function body and insert it into the module.
218 Function
*NF
= Function::Create(NFTy
, F
->getLinkage(), F
->getAddressSpace(),
220 NF
->copyAttributesFrom(F
);
222 // Patch the pointer to LLVM function in debug info descriptor.
223 NF
->setSubprogram(F
->getSubprogram());
224 F
->setSubprogram(nullptr);
226 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF
<< "\n"
229 // Recompute the parameter attributes list based on the new arguments for
231 NF
->setAttributes(AttributeList::get(F
->getContext(), PAL
.getFnAttributes(),
232 PAL
.getRetAttributes(), ArgAttrVec
));
235 F
->getParent()->getFunctionList().insert(F
->getIterator(), NF
);
238 // Loop over all of the callers of the function, transforming the call sites
239 // to pass in the loaded pointers.
241 SmallVector
<Value
*, 16> Args
;
242 while (!F
->use_empty()) {
243 CallSite
CS(F
->user_back());
244 assert(CS
.getCalledFunction() == F
);
245 Instruction
*Call
= CS
.getInstruction();
246 const AttributeList
&CallPAL
= CS
.getAttributes();
247 IRBuilder
<NoFolder
> IRB(Call
);
249 // Loop over the operands, inserting GEP and loads in the caller as
251 CallSite::arg_iterator AI
= CS
.arg_begin();
253 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
;
255 if (!ArgsToPromote
.count(&*I
) && !ByValArgsToTransform
.count(&*I
)) {
256 Args
.push_back(*AI
); // Unmodified argument
257 ArgAttrVec
.push_back(CallPAL
.getParamAttributes(ArgNo
));
258 } else if (ByValArgsToTransform
.count(&*I
)) {
259 // Emit a GEP and load for each element of the struct.
260 Type
*AgTy
= cast
<PointerType
>(I
->getType())->getElementType();
261 StructType
*STy
= cast
<StructType
>(AgTy
);
263 ConstantInt::get(Type::getInt32Ty(F
->getContext()), 0), nullptr};
264 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
265 Idxs
[1] = ConstantInt::get(Type::getInt32Ty(F
->getContext()), i
);
267 IRB
.CreateGEP(STy
, *AI
, Idxs
, (*AI
)->getName() + "." + Twine(i
));
268 // TODO: Tell AA about the new values?
269 Args
.push_back(IRB
.CreateLoad(STy
->getElementType(i
), Idx
,
270 Idx
->getName() + ".val"));
271 ArgAttrVec
.push_back(AttributeSet());
273 } else if (!I
->use_empty()) {
274 // Non-dead argument: insert GEPs and loads as appropriate.
275 ScalarizeTable
&ArgIndices
= ScalarizedElements
[&*I
];
276 // Store the Value* version of the indices in here, but declare it now
278 std::vector
<Value
*> Ops
;
279 for (const auto &ArgIndex
: ArgIndices
) {
282 OriginalLoads
[std::make_pair(&*I
, ArgIndex
.second
)];
283 if (!ArgIndex
.second
.empty()) {
284 Ops
.reserve(ArgIndex
.second
.size());
285 Type
*ElTy
= V
->getType();
286 for (auto II
: ArgIndex
.second
) {
287 // Use i32 to index structs, and i64 for others (pointers/arrays).
288 // This satisfies GEP constraints.
290 (ElTy
->isStructTy() ? Type::getInt32Ty(F
->getContext())
291 : Type::getInt64Ty(F
->getContext()));
292 Ops
.push_back(ConstantInt::get(IdxTy
, II
));
293 // Keep track of the type we're currently indexing.
294 if (auto *ElPTy
= dyn_cast
<PointerType
>(ElTy
))
295 ElTy
= ElPTy
->getElementType();
297 ElTy
= cast
<CompositeType
>(ElTy
)->getTypeAtIndex(II
);
299 // And create a GEP to extract those indices.
300 V
= IRB
.CreateGEP(ArgIndex
.first
, V
, Ops
, V
->getName() + ".idx");
303 // Since we're replacing a load make sure we take the alignment
304 // of the previous load.
306 IRB
.CreateLoad(OrigLoad
->getType(), V
, V
->getName() + ".val");
307 newLoad
->setAlignment(MaybeAlign(OrigLoad
->getAlignment()));
308 // Transfer the AA info too.
310 OrigLoad
->getAAMetadata(AAInfo
);
311 newLoad
->setAAMetadata(AAInfo
);
313 Args
.push_back(newLoad
);
314 ArgAttrVec
.push_back(AttributeSet());
318 // Push any varargs arguments on the list.
319 for (; AI
!= CS
.arg_end(); ++AI
, ++ArgNo
) {
321 ArgAttrVec
.push_back(CallPAL
.getParamAttributes(ArgNo
));
324 SmallVector
<OperandBundleDef
, 1> OpBundles
;
325 CS
.getOperandBundlesAsDefs(OpBundles
);
328 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Call
)) {
329 NewCS
= InvokeInst::Create(NF
, II
->getNormalDest(), II
->getUnwindDest(),
330 Args
, OpBundles
, "", Call
);
332 auto *NewCall
= CallInst::Create(NF
, Args
, OpBundles
, "", Call
);
333 NewCall
->setTailCallKind(cast
<CallInst
>(Call
)->getTailCallKind());
336 NewCS
.setCallingConv(CS
.getCallingConv());
338 AttributeList::get(F
->getContext(), CallPAL
.getFnAttributes(),
339 CallPAL
.getRetAttributes(), ArgAttrVec
));
340 NewCS
->setDebugLoc(Call
->getDebugLoc());
342 if (Call
->extractProfTotalWeight(W
))
343 NewCS
->setProfWeight(W
);
347 // Update the callgraph to know that the callsite has been transformed.
349 (*ReplaceCallSite
)(CS
, NewCS
);
351 if (!Call
->use_empty()) {
352 Call
->replaceAllUsesWith(NewCS
.getInstruction());
353 NewCS
->takeName(Call
);
356 // Finally, remove the old call from the program, reducing the use-count of
358 Call
->eraseFromParent();
361 const DataLayout
&DL
= F
->getParent()->getDataLayout();
363 // Since we have now created the new function, splice the body of the old
364 // function right into the new function, leaving the old rotting hulk of the
366 NF
->getBasicBlockList().splice(NF
->begin(), F
->getBasicBlockList());
368 // Loop over the argument list, transferring uses of the old arguments over to
369 // the new arguments, also transferring over the names as well.
370 for (Function::arg_iterator I
= F
->arg_begin(), E
= F
->arg_end(),
371 I2
= NF
->arg_begin();
373 if (!ArgsToPromote
.count(&*I
) && !ByValArgsToTransform
.count(&*I
)) {
374 // If this is an unmodified argument, move the name and users over to the
376 I
->replaceAllUsesWith(&*I2
);
382 if (ByValArgsToTransform
.count(&*I
)) {
383 // In the callee, we create an alloca, and store each of the new incoming
384 // arguments into the alloca.
385 Instruction
*InsertPt
= &NF
->begin()->front();
387 // Just add all the struct element types.
388 Type
*AgTy
= cast
<PointerType
>(I
->getType())->getElementType();
389 Value
*TheAlloca
= new AllocaInst(AgTy
, DL
.getAllocaAddrSpace(), nullptr,
390 I
->getParamAlignment(), "", InsertPt
);
391 StructType
*STy
= cast
<StructType
>(AgTy
);
392 Value
*Idxs
[2] = {ConstantInt::get(Type::getInt32Ty(F
->getContext()), 0),
395 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
396 Idxs
[1] = ConstantInt::get(Type::getInt32Ty(F
->getContext()), i
);
397 Value
*Idx
= GetElementPtrInst::Create(
398 AgTy
, TheAlloca
, Idxs
, TheAlloca
->getName() + "." + Twine(i
),
400 I2
->setName(I
->getName() + "." + Twine(i
));
401 new StoreInst(&*I2
++, Idx
, InsertPt
);
404 // Anything that used the arg should now use the alloca.
405 I
->replaceAllUsesWith(TheAlloca
);
406 TheAlloca
->takeName(&*I
);
408 // If the alloca is used in a call, we must clear the tail flag since
409 // the callee now uses an alloca from the caller.
410 for (User
*U
: TheAlloca
->users()) {
411 CallInst
*Call
= dyn_cast
<CallInst
>(U
);
414 Call
->setTailCall(false);
422 // Otherwise, if we promoted this argument, then all users are load
423 // instructions (or GEPs with only load users), and all loads should be
424 // using the new argument that we added.
425 ScalarizeTable
&ArgIndices
= ScalarizedElements
[&*I
];
427 while (!I
->use_empty()) {
428 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
->user_back())) {
429 assert(ArgIndices
.begin()->second
.empty() &&
430 "Load element should sort to front!");
431 I2
->setName(I
->getName() + ".val");
432 LI
->replaceAllUsesWith(&*I2
);
433 LI
->eraseFromParent();
434 LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I
->getName()
435 << "' in function '" << F
->getName() << "'\n");
437 GetElementPtrInst
*GEP
= cast
<GetElementPtrInst
>(I
->user_back());
438 IndicesVector Operands
;
439 Operands
.reserve(GEP
->getNumIndices());
440 for (User::op_iterator II
= GEP
->idx_begin(), IE
= GEP
->idx_end();
442 Operands
.push_back(cast
<ConstantInt
>(*II
)->getSExtValue());
444 // GEPs with a single 0 index can be merged with direct loads
445 if (Operands
.size() == 1 && Operands
.front() == 0)
448 Function::arg_iterator TheArg
= I2
;
449 for (ScalarizeTable::iterator It
= ArgIndices
.begin();
450 It
->second
!= Operands
; ++It
, ++TheArg
) {
451 assert(It
!= ArgIndices
.end() && "GEP not handled??");
454 std::string NewName
= I
->getName();
455 for (unsigned i
= 0, e
= Operands
.size(); i
!= e
; ++i
) {
456 NewName
+= "." + utostr(Operands
[i
]);
459 TheArg
->setName(NewName
);
461 LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg
->getName()
462 << "' of function '" << NF
->getName() << "'\n");
464 // All of the uses must be load instructions. Replace them all with
465 // the argument specified by ArgNo.
466 while (!GEP
->use_empty()) {
467 LoadInst
*L
= cast
<LoadInst
>(GEP
->user_back());
468 L
->replaceAllUsesWith(&*TheArg
);
469 L
->eraseFromParent();
471 GEP
->eraseFromParent();
475 // Increment I2 past all of the arguments added for this promoted pointer.
476 std::advance(I2
, ArgIndices
.size());
482 /// Return true if we can prove that all callees pass in a valid pointer for the
483 /// specified function argument.
484 static bool allCallersPassValidPointerForArgument(Argument
*Arg
, Type
*Ty
) {
485 Function
*Callee
= Arg
->getParent();
486 const DataLayout
&DL
= Callee
->getParent()->getDataLayout();
488 unsigned ArgNo
= Arg
->getArgNo();
490 // Look at all call sites of the function. At this point we know we only have
492 for (User
*U
: Callee
->users()) {
494 assert(CS
&& "Should only have direct calls!");
496 if (!isDereferenceablePointer(CS
.getArgument(ArgNo
), Ty
, DL
))
502 /// Returns true if Prefix is a prefix of longer. That means, Longer has a size
503 /// that is greater than or equal to the size of prefix, and each of the
504 /// elements in Prefix is the same as the corresponding elements in Longer.
506 /// This means it also returns true when Prefix and Longer are equal!
507 static bool isPrefix(const IndicesVector
&Prefix
, const IndicesVector
&Longer
) {
508 if (Prefix
.size() > Longer
.size())
510 return std::equal(Prefix
.begin(), Prefix
.end(), Longer
.begin());
513 /// Checks if Indices, or a prefix of Indices, is in Set.
514 static bool prefixIn(const IndicesVector
&Indices
,
515 std::set
<IndicesVector
> &Set
) {
516 std::set
<IndicesVector
>::iterator Low
;
517 Low
= Set
.upper_bound(Indices
);
518 if (Low
!= Set
.begin())
520 // Low is now the last element smaller than or equal to Indices. This means
521 // it points to a prefix of Indices (possibly Indices itself), if such
524 // This load is safe if any prefix of its operands is safe to load.
525 return Low
!= Set
.end() && isPrefix(*Low
, Indices
);
528 /// Mark the given indices (ToMark) as safe in the given set of indices
529 /// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
530 /// is already a prefix of Indices in Safe, Indices are implicitely marked safe
531 /// already. Furthermore, any indices that Indices is itself a prefix of, are
532 /// removed from Safe (since they are implicitely safe because of Indices now).
533 static void markIndicesSafe(const IndicesVector
&ToMark
,
534 std::set
<IndicesVector
> &Safe
) {
535 std::set
<IndicesVector
>::iterator Low
;
536 Low
= Safe
.upper_bound(ToMark
);
537 // Guard against the case where Safe is empty
538 if (Low
!= Safe
.begin())
540 // Low is now the last element smaller than or equal to Indices. This
541 // means it points to a prefix of Indices (possibly Indices itself), if
542 // such prefix exists.
543 if (Low
!= Safe
.end()) {
544 if (isPrefix(*Low
, ToMark
))
545 // If there is already a prefix of these indices (or exactly these
546 // indices) marked a safe, don't bother adding these indices
549 // Increment Low, so we can use it as a "insert before" hint
553 Low
= Safe
.insert(Low
, ToMark
);
555 // If there we're a prefix of longer index list(s), remove those
556 std::set
<IndicesVector
>::iterator End
= Safe
.end();
557 while (Low
!= End
&& isPrefix(ToMark
, *Low
)) {
558 std::set
<IndicesVector
>::iterator Remove
= Low
;
564 /// isSafeToPromoteArgument - As you might guess from the name of this method,
565 /// it checks to see if it is both safe and useful to promote the argument.
566 /// This method limits promotion of aggregates to only promote up to three
567 /// elements of the aggregate in order to avoid exploding the number of
568 /// arguments passed in.
569 static bool isSafeToPromoteArgument(Argument
*Arg
, Type
*ByValTy
, AAResults
&AAR
,
570 unsigned MaxElements
) {
571 using GEPIndicesSet
= std::set
<IndicesVector
>;
573 // Quick exit for unused arguments
574 if (Arg
->use_empty())
577 // We can only promote this argument if all of the uses are loads, or are GEP
578 // instructions (with constant indices) that are subsequently loaded.
580 // Promoting the argument causes it to be loaded in the caller
581 // unconditionally. This is only safe if we can prove that either the load
582 // would have happened in the callee anyway (ie, there is a load in the entry
583 // block) or the pointer passed in at every call site is guaranteed to be
585 // In the former case, invalid loads can happen, but would have happened
586 // anyway, in the latter case, invalid loads won't happen. This prevents us
587 // from introducing an invalid load that wouldn't have happened in the
590 // This set will contain all sets of indices that are loaded in the entry
591 // block, and thus are safe to unconditionally load in the caller.
592 GEPIndicesSet SafeToUnconditionallyLoad
;
594 // This set contains all the sets of indices that we are planning to promote.
595 // This makes it possible to limit the number of arguments added.
596 GEPIndicesSet ToPromote
;
598 // If the pointer is always valid, any load with first index 0 is valid.
601 SafeToUnconditionallyLoad
.insert(IndicesVector(1, 0));
603 // Whenever a new underlying type for the operand is found, make sure it's
604 // consistent with the GEPs and loads we've already seen and, if necessary,
605 // use it to see if all incoming pointers are valid (which implies the 0-index
607 Type
*BaseTy
= ByValTy
;
608 auto UpdateBaseTy
= [&](Type
*NewBaseTy
) {
610 return BaseTy
== NewBaseTy
;
613 if (allCallersPassValidPointerForArgument(Arg
, BaseTy
)) {
614 assert(SafeToUnconditionallyLoad
.empty());
615 SafeToUnconditionallyLoad
.insert(IndicesVector(1, 0));
621 // First, iterate the entry block and mark loads of (geps of) arguments as
623 BasicBlock
&EntryBlock
= Arg
->getParent()->front();
624 // Declare this here so we can reuse it
625 IndicesVector Indices
;
626 for (Instruction
&I
: EntryBlock
)
627 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(&I
)) {
628 Value
*V
= LI
->getPointerOperand();
629 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(V
)) {
630 V
= GEP
->getPointerOperand();
632 // This load actually loads (part of) Arg? Check the indices then.
633 Indices
.reserve(GEP
->getNumIndices());
634 for (User::op_iterator II
= GEP
->idx_begin(), IE
= GEP
->idx_end();
636 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(*II
))
637 Indices
.push_back(CI
->getSExtValue());
639 // We found a non-constant GEP index for this argument? Bail out
640 // right away, can't promote this argument at all.
643 if (!UpdateBaseTy(GEP
->getSourceElementType()))
646 // Indices checked out, mark them as safe
647 markIndicesSafe(Indices
, SafeToUnconditionallyLoad
);
650 } else if (V
== Arg
) {
651 // Direct loads are equivalent to a GEP with a single 0 index.
652 markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad
);
654 if (BaseTy
&& LI
->getType() != BaseTy
)
657 BaseTy
= LI
->getType();
661 // Now, iterate all uses of the argument to see if there are any uses that are
662 // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
663 SmallVector
<LoadInst
*, 16> Loads
;
664 IndicesVector Operands
;
665 for (Use
&U
: Arg
->uses()) {
666 User
*UR
= U
.getUser();
668 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(UR
)) {
669 // Don't hack volatile/atomic loads
673 // Direct loads are equivalent to a GEP with a zero index and then a load.
674 Operands
.push_back(0);
676 if (!UpdateBaseTy(LI
->getType()))
678 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(UR
)) {
679 if (GEP
->use_empty()) {
680 // Dead GEP's cause trouble later. Just remove them if we run into
682 GEP
->eraseFromParent();
683 // TODO: This runs the above loop over and over again for dead GEPs
684 // Couldn't we just do increment the UI iterator earlier and erase the
686 return isSafeToPromoteArgument(Arg
, ByValTy
, AAR
, MaxElements
);
689 if (!UpdateBaseTy(GEP
->getSourceElementType()))
692 // Ensure that all of the indices are constants.
693 for (User::op_iterator i
= GEP
->idx_begin(), e
= GEP
->idx_end(); i
!= e
;
695 if (ConstantInt
*C
= dyn_cast
<ConstantInt
>(*i
))
696 Operands
.push_back(C
->getSExtValue());
698 return false; // Not a constant operand GEP!
700 // Ensure that the only users of the GEP are load instructions.
701 for (User
*GEPU
: GEP
->users())
702 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(GEPU
)) {
703 // Don't hack volatile/atomic loads
708 // Other uses than load?
712 return false; // Not a load or a GEP.
715 // Now, see if it is safe to promote this load / loads of this GEP. Loading
716 // is safe if Operands, or a prefix of Operands, is marked as safe.
717 if (!prefixIn(Operands
, SafeToUnconditionallyLoad
))
720 // See if we are already promoting a load with these indices. If not, check
721 // to make sure that we aren't promoting too many elements. If so, nothing
723 if (ToPromote
.find(Operands
) == ToPromote
.end()) {
724 if (MaxElements
> 0 && ToPromote
.size() == MaxElements
) {
725 LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '"
727 << "' because it would require adding more "
728 << "than " << MaxElements
729 << " arguments to the function.\n");
730 // We limit aggregate promotion to only promoting up to a fixed number
731 // of elements of the aggregate.
734 ToPromote
.insert(std::move(Operands
));
739 return true; // No users, this is a dead argument.
741 // Okay, now we know that the argument is only used by load instructions and
742 // it is safe to unconditionally perform all of them. Use alias analysis to
743 // check to see if the pointer is guaranteed to not be modified from entry of
744 // the function to each of the load instructions.
746 // Because there could be several/many load instructions, remember which
747 // blocks we know to be transparent to the load.
748 df_iterator_default_set
<BasicBlock
*, 16> TranspBlocks
;
750 for (LoadInst
*Load
: Loads
) {
751 // Check to see if the load is invalidated from the start of the block to
753 BasicBlock
*BB
= Load
->getParent();
755 MemoryLocation Loc
= MemoryLocation::get(Load
);
756 if (AAR
.canInstructionRangeModRef(BB
->front(), *Load
, Loc
, ModRefInfo::Mod
))
757 return false; // Pointer is invalidated!
759 // Now check every path from the entry block to the load for transparency.
760 // To do this, we perform a depth first search on the inverse CFG from the
762 for (BasicBlock
*P
: predecessors(BB
)) {
763 for (BasicBlock
*TranspBB
: inverse_depth_first_ext(P
, TranspBlocks
))
764 if (AAR
.canBasicBlockModify(*TranspBB
, Loc
))
769 // If the path from the entry of the function to each load is free of
770 // instructions that potentially invalidate the load, we can make the
775 /// Checks if a type could have padding bytes.
776 static bool isDenselyPacked(Type
*type
, const DataLayout
&DL
) {
777 // There is no size information, so be conservative.
778 if (!type
->isSized())
781 // If the alloc size is not equal to the storage size, then there are padding
782 // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
783 if (DL
.getTypeSizeInBits(type
) != DL
.getTypeAllocSizeInBits(type
))
786 if (!isa
<CompositeType
>(type
))
789 // For homogenous sequential types, check for padding within members.
790 if (SequentialType
*seqTy
= dyn_cast
<SequentialType
>(type
))
791 return isDenselyPacked(seqTy
->getElementType(), DL
);
793 // Check for padding within and between elements of a struct.
794 StructType
*StructTy
= cast
<StructType
>(type
);
795 const StructLayout
*Layout
= DL
.getStructLayout(StructTy
);
796 uint64_t StartPos
= 0;
797 for (unsigned i
= 0, E
= StructTy
->getNumElements(); i
< E
; ++i
) {
798 Type
*ElTy
= StructTy
->getElementType(i
);
799 if (!isDenselyPacked(ElTy
, DL
))
801 if (StartPos
!= Layout
->getElementOffsetInBits(i
))
803 StartPos
+= DL
.getTypeAllocSizeInBits(ElTy
);
809 /// Checks if the padding bytes of an argument could be accessed.
810 static bool canPaddingBeAccessed(Argument
*arg
) {
811 assert(arg
->hasByValAttr());
813 // Track all the pointers to the argument to make sure they are not captured.
814 SmallPtrSet
<Value
*, 16> PtrValues
;
815 PtrValues
.insert(arg
);
817 // Track all of the stores.
818 SmallVector
<StoreInst
*, 16> Stores
;
820 // Scan through the uses recursively to make sure the pointer is always used
822 SmallVector
<Value
*, 16> WorkList
;
823 WorkList
.insert(WorkList
.end(), arg
->user_begin(), arg
->user_end());
824 while (!WorkList
.empty()) {
825 Value
*V
= WorkList
.back();
827 if (isa
<GetElementPtrInst
>(V
) || isa
<PHINode
>(V
)) {
828 if (PtrValues
.insert(V
).second
)
829 WorkList
.insert(WorkList
.end(), V
->user_begin(), V
->user_end());
830 } else if (StoreInst
*Store
= dyn_cast
<StoreInst
>(V
)) {
831 Stores
.push_back(Store
);
832 } else if (!isa
<LoadInst
>(V
)) {
837 // Check to make sure the pointers aren't captured
838 for (StoreInst
*Store
: Stores
)
839 if (PtrValues
.count(Store
->getValueOperand()))
845 static bool areFunctionArgsABICompatible(
846 const Function
&F
, const TargetTransformInfo
&TTI
,
847 SmallPtrSetImpl
<Argument
*> &ArgsToPromote
,
848 SmallPtrSetImpl
<Argument
*> &ByValArgsToTransform
) {
849 for (const Use
&U
: F
.uses()) {
850 CallSite
CS(U
.getUser());
851 const Function
*Caller
= CS
.getCaller();
852 const Function
*Callee
= CS
.getCalledFunction();
853 if (!TTI
.areFunctionArgsABICompatible(Caller
, Callee
, ArgsToPromote
) ||
854 !TTI
.areFunctionArgsABICompatible(Caller
, Callee
, ByValArgsToTransform
))
860 /// PromoteArguments - This method checks the specified function to see if there
861 /// are any promotable arguments and if it is safe to promote the function (for
862 /// example, all callers are direct). If safe to promote some arguments, it
863 /// calls the DoPromotion method.
865 promoteArguments(Function
*F
, function_ref
<AAResults
&(Function
&F
)> AARGetter
,
866 unsigned MaxElements
,
867 Optional
<function_ref
<void(CallSite OldCS
, CallSite NewCS
)>>
869 const TargetTransformInfo
&TTI
) {
870 // Don't perform argument promotion for naked functions; otherwise we can end
871 // up removing parameters that are seemingly 'not used' as they are referred
872 // to in the assembly.
873 if(F
->hasFnAttribute(Attribute::Naked
))
876 // Make sure that it is local to this module.
877 if (!F
->hasLocalLinkage())
880 // Don't promote arguments for variadic functions. Adding, removing, or
881 // changing non-pack parameters can change the classification of pack
882 // parameters. Frontends encode that classification at the call site in the
883 // IR, while in the callee the classification is determined dynamically based
884 // on the number of registers consumed so far.
888 // Don't transform functions that receive inallocas, as the transformation may
889 // not be safe depending on calling convention.
890 if (F
->getAttributes().hasAttrSomewhere(Attribute::InAlloca
))
893 // First check: see if there are any pointer arguments! If not, quick exit.
894 SmallVector
<Argument
*, 16> PointerArgs
;
895 for (Argument
&I
: F
->args())
896 if (I
.getType()->isPointerTy())
897 PointerArgs
.push_back(&I
);
898 if (PointerArgs
.empty())
901 // Second check: make sure that all callers are direct callers. We can't
902 // transform functions that have indirect callers. Also see if the function
903 // is self-recursive and check that target features are compatible.
904 bool isSelfRecursive
= false;
905 for (Use
&U
: F
->uses()) {
906 CallSite
CS(U
.getUser());
907 // Must be a direct call.
908 if (CS
.getInstruction() == nullptr || !CS
.isCallee(&U
))
911 // Can't change signature of musttail callee
912 if (CS
.isMustTailCall())
915 if (CS
.getInstruction()->getParent()->getParent() == F
)
916 isSelfRecursive
= true;
919 // Can't change signature of musttail caller
920 // FIXME: Support promoting whole chain of musttail functions
921 for (BasicBlock
&BB
: *F
)
922 if (BB
.getTerminatingMustTailCall())
925 const DataLayout
&DL
= F
->getParent()->getDataLayout();
927 AAResults
&AAR
= AARGetter(*F
);
929 // Check to see which arguments are promotable. If an argument is promotable,
930 // add it to ArgsToPromote.
931 SmallPtrSet
<Argument
*, 8> ArgsToPromote
;
932 SmallPtrSet
<Argument
*, 8> ByValArgsToTransform
;
933 for (Argument
*PtrArg
: PointerArgs
) {
934 Type
*AgTy
= cast
<PointerType
>(PtrArg
->getType())->getElementType();
936 // Replace sret attribute with noalias. This reduces register pressure by
937 // avoiding a register copy.
938 if (PtrArg
->hasStructRetAttr()) {
939 unsigned ArgNo
= PtrArg
->getArgNo();
940 F
->removeParamAttr(ArgNo
, Attribute::StructRet
);
941 F
->addParamAttr(ArgNo
, Attribute::NoAlias
);
942 for (Use
&U
: F
->uses()) {
943 CallSite
CS(U
.getUser());
944 CS
.removeParamAttr(ArgNo
, Attribute::StructRet
);
945 CS
.addParamAttr(ArgNo
, Attribute::NoAlias
);
949 // If this is a byval argument, and if the aggregate type is small, just
950 // pass the elements, which is always safe, if the passed value is densely
951 // packed or if we can prove the padding bytes are never accessed.
952 bool isSafeToPromote
=
953 PtrArg
->hasByValAttr() &&
954 (isDenselyPacked(AgTy
, DL
) || !canPaddingBeAccessed(PtrArg
));
955 if (isSafeToPromote
) {
956 if (StructType
*STy
= dyn_cast
<StructType
>(AgTy
)) {
957 if (MaxElements
> 0 && STy
->getNumElements() > MaxElements
) {
958 LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '"
960 << "' because it would require adding more"
961 << " than " << MaxElements
962 << " arguments to the function.\n");
966 // If all the elements are single-value types, we can promote it.
967 bool AllSimple
= true;
968 for (const auto *EltTy
: STy
->elements()) {
969 if (!EltTy
->isSingleValueType()) {
975 // Safe to transform, don't even bother trying to "promote" it.
976 // Passing the elements as a scalar will allow sroa to hack on
977 // the new alloca we introduce.
979 ByValArgsToTransform
.insert(PtrArg
);
985 // If the argument is a recursive type and we're in a recursive
986 // function, we could end up infinitely peeling the function argument.
987 if (isSelfRecursive
) {
988 if (StructType
*STy
= dyn_cast
<StructType
>(AgTy
)) {
989 bool RecursiveType
= false;
990 for (const auto *EltTy
: STy
->elements()) {
991 if (EltTy
== PtrArg
->getType()) {
992 RecursiveType
= true;
1001 // Otherwise, see if we can promote the pointer to its value.
1003 PtrArg
->hasByValAttr() ? PtrArg
->getParamByValType() : nullptr;
1004 if (isSafeToPromoteArgument(PtrArg
, ByValTy
, AAR
, MaxElements
))
1005 ArgsToPromote
.insert(PtrArg
);
1008 // No promotable pointer arguments.
1009 if (ArgsToPromote
.empty() && ByValArgsToTransform
.empty())
1012 if (!areFunctionArgsABICompatible(*F
, TTI
, ArgsToPromote
,
1013 ByValArgsToTransform
))
1016 return doPromotion(F
, ArgsToPromote
, ByValArgsToTransform
, ReplaceCallSite
);
1019 PreservedAnalyses
ArgumentPromotionPass::run(LazyCallGraph::SCC
&C
,
1020 CGSCCAnalysisManager
&AM
,
1022 CGSCCUpdateResult
&UR
) {
1023 bool Changed
= false, LocalChange
;
1025 // Iterate until we stop promoting from this SCC.
1027 LocalChange
= false;
1029 for (LazyCallGraph::Node
&N
: C
) {
1030 Function
&OldF
= N
.getFunction();
1032 FunctionAnalysisManager
&FAM
=
1033 AM
.getResult
<FunctionAnalysisManagerCGSCCProxy
>(C
, CG
).getManager();
1034 // FIXME: This lambda must only be used with this function. We should
1035 // skip the lambda and just get the AA results directly.
1036 auto AARGetter
= [&](Function
&F
) -> AAResults
& {
1037 assert(&F
== &OldF
&& "Called with an unexpected function!");
1038 return FAM
.getResult
<AAManager
>(F
);
1041 const TargetTransformInfo
&TTI
= FAM
.getResult
<TargetIRAnalysis
>(OldF
);
1043 promoteArguments(&OldF
, AARGetter
, MaxElements
, None
, TTI
);
1048 // Directly substitute the functions in the call graph. Note that this
1049 // requires the old function to be completely dead and completely
1050 // replaced by the new function. It does no call graph updates, it merely
1051 // swaps out the particular function mapped to a particular node in the
1053 C
.getOuterRefSCC().replaceNodeFunction(N
, *NewF
);
1054 OldF
.eraseFromParent();
1057 Changed
|= LocalChange
;
1058 } while (LocalChange
);
1061 return PreservedAnalyses::all();
1063 return PreservedAnalyses::none();
1068 /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
1069 struct ArgPromotion
: public CallGraphSCCPass
{
1070 // Pass identification, replacement for typeid
1073 explicit ArgPromotion(unsigned MaxElements
= 3)
1074 : CallGraphSCCPass(ID
), MaxElements(MaxElements
) {
1075 initializeArgPromotionPass(*PassRegistry::getPassRegistry());
1078 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
1079 AU
.addRequired
<AssumptionCacheTracker
>();
1080 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1081 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
1082 getAAResultsAnalysisUsage(AU
);
1083 CallGraphSCCPass::getAnalysisUsage(AU
);
1086 bool runOnSCC(CallGraphSCC
&SCC
) override
;
1089 using llvm::Pass::doInitialization
;
1091 bool doInitialization(CallGraph
&CG
) override
;
1093 /// The maximum number of elements to expand, or 0 for unlimited.
1094 unsigned MaxElements
;
1097 } // end anonymous namespace
1099 char ArgPromotion::ID
= 0;
1101 INITIALIZE_PASS_BEGIN(ArgPromotion
, "argpromotion",
1102 "Promote 'by reference' arguments to scalars", false,
1104 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1105 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass
)
1106 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
1107 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1108 INITIALIZE_PASS_END(ArgPromotion
, "argpromotion",
1109 "Promote 'by reference' arguments to scalars", false, false)
1111 Pass
*llvm::createArgumentPromotionPass(unsigned MaxElements
) {
1112 return new ArgPromotion(MaxElements
);
1115 bool ArgPromotion::runOnSCC(CallGraphSCC
&SCC
) {
1119 // Get the callgraph information that we need to update to reflect our
1121 CallGraph
&CG
= getAnalysis
<CallGraphWrapperPass
>().getCallGraph();
1123 LegacyAARGetter
AARGetter(*this);
1125 bool Changed
= false, LocalChange
;
1127 // Iterate until we stop promoting from this SCC.
1129 LocalChange
= false;
1130 // Attempt to promote arguments from all functions in this SCC.
1131 for (CallGraphNode
*OldNode
: SCC
) {
1132 Function
*OldF
= OldNode
->getFunction();
1136 auto ReplaceCallSite
= [&](CallSite OldCS
, CallSite NewCS
) {
1137 Function
*Caller
= OldCS
.getInstruction()->getParent()->getParent();
1138 CallGraphNode
*NewCalleeNode
=
1139 CG
.getOrInsertFunction(NewCS
.getCalledFunction());
1140 CallGraphNode
*CallerNode
= CG
[Caller
];
1141 CallerNode
->replaceCallEdge(*cast
<CallBase
>(OldCS
.getInstruction()),
1142 *cast
<CallBase
>(NewCS
.getInstruction()),
1146 const TargetTransformInfo
&TTI
=
1147 getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(*OldF
);
1148 if (Function
*NewF
= promoteArguments(OldF
, AARGetter
, MaxElements
,
1149 {ReplaceCallSite
}, TTI
)) {
1152 // Update the call graph for the newly promoted function.
1153 CallGraphNode
*NewNode
= CG
.getOrInsertFunction(NewF
);
1154 NewNode
->stealCalledFunctionsFrom(OldNode
);
1155 if (OldNode
->getNumReferences() == 0)
1156 delete CG
.removeFunctionFromModule(OldNode
);
1158 OldF
->setLinkage(Function::ExternalLinkage
);
1160 // And updat ethe SCC we're iterating as well.
1161 SCC
.ReplaceNode(OldNode
, NewNode
);
1164 // Remember that we changed something.
1165 Changed
|= LocalChange
;
1166 } while (LocalChange
);
1171 bool ArgPromotion::doInitialization(CallGraph
&CG
) {
1172 return CallGraphSCCPass::doInitialization(CG
);