1 //===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements utilities useful for promoting indirect call sites to
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
15 #include "llvm/Analysis/Loads.h"
16 #include "llvm/Analysis/TypeMetadataUtils.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23 #define DEBUG_TYPE "call-promotion-utils"
25 /// Fix-up phi nodes in an invoke instruction's normal destination.
27 /// After versioning an invoke instruction, values coming from the original
28 /// block will now be coming from the "merge" block. For example, in the code
32 /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
35 /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
38 /// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ]
42 /// %t3 = phi i32 [ %x, %orig_bb ], ...
44 /// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in
45 /// "normal_dst" must be fixed to refer to "merge_bb":
48 /// %t3 = phi i32 [ %x, %merge_bb ], ...
50 static void fixupPHINodeForNormalDest(InvokeInst
*Invoke
, BasicBlock
*OrigBlock
,
51 BasicBlock
*MergeBlock
) {
52 for (PHINode
&Phi
: Invoke
->getNormalDest()->phis()) {
53 int Idx
= Phi
.getBasicBlockIndex(OrigBlock
);
56 Phi
.setIncomingBlock(Idx
, MergeBlock
);
60 /// Fix-up phi nodes in an invoke instruction's unwind destination.
62 /// After versioning an invoke instruction, values coming from the original
63 /// block will now be coming from either the "then" block or the "else" block.
64 /// For example, in the code below:
67 /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
70 /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
73 /// %t3 = phi i32 [ %x, %orig_bb ], ...
75 /// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in
76 /// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb":
79 /// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ...
81 static void fixupPHINodeForUnwindDest(InvokeInst
*Invoke
, BasicBlock
*OrigBlock
,
82 BasicBlock
*ThenBlock
,
83 BasicBlock
*ElseBlock
) {
84 for (PHINode
&Phi
: Invoke
->getUnwindDest()->phis()) {
85 int Idx
= Phi
.getBasicBlockIndex(OrigBlock
);
88 auto *V
= Phi
.getIncomingValue(Idx
);
89 Phi
.setIncomingBlock(Idx
, ThenBlock
);
90 Phi
.addIncoming(V
, ElseBlock
);
94 /// Create a phi node for the returned value of a call or invoke instruction.
96 /// After versioning a call or invoke instruction that returns a value, we have
97 /// to merge the value of the original and new instructions. We do this by
98 /// creating a phi node and replacing uses of the original instruction with this
101 /// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is
102 /// defined in "then_bb", we create the following phi node:
104 /// ; Uses of the original instruction are replaced by uses of the phi node.
105 /// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ],
107 static void createRetPHINode(Instruction
*OrigInst
, Instruction
*NewInst
,
108 BasicBlock
*MergeBlock
, IRBuilder
<> &Builder
) {
110 if (OrigInst
->getType()->isVoidTy() || OrigInst
->use_empty())
113 Builder
.SetInsertPoint(&MergeBlock
->front());
114 PHINode
*Phi
= Builder
.CreatePHI(OrigInst
->getType(), 0);
115 SmallVector
<User
*, 16> UsersToUpdate(OrigInst
->users());
116 for (User
*U
: UsersToUpdate
)
117 U
->replaceUsesOfWith(OrigInst
, Phi
);
118 Phi
->addIncoming(OrigInst
, OrigInst
->getParent());
119 Phi
->addIncoming(NewInst
, NewInst
->getParent());
122 /// Cast a call or invoke instruction to the given type.
124 /// When promoting a call site, the return type of the call site might not match
125 /// that of the callee. If this is the case, we have to cast the returned value
126 /// to the correct type. The location of the cast depends on if we have a call
127 /// or invoke instruction.
129 /// For example, if the call instruction below requires a bitcast after
133 /// %t0 = call i32 @func()
136 /// The bitcast is placed after the call instruction:
139 /// ; Uses of the original return value are replaced by uses of the bitcast.
140 /// %t0 = call i32 @func()
141 /// %t1 = bitcast i32 %t0 to ...
144 /// A similar transformation is performed for invoke instructions. However,
145 /// since invokes are terminating, a new block is created for the bitcast. For
146 /// example, if the invoke instruction below requires a bitcast after promotion:
149 /// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst
151 /// The edge between the original block and the invoke's normal destination is
152 /// split, and the bitcast is placed there:
155 /// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst
158 /// ; Uses of the original return value are replaced by uses of the bitcast.
159 /// %t1 = bitcast i32 %t0 to ...
160 /// br label %normal_dst
162 static void createRetBitCast(CallBase
&CB
, Type
*RetTy
, CastInst
**RetBitCast
) {
164 // Save the users of the calling instruction. These uses will be changed to
165 // use the bitcast after we create it.
166 SmallVector
<User
*, 16> UsersToUpdate(CB
.users());
168 // Determine an appropriate location to create the bitcast for the return
169 // value. The location depends on if we have a call or invoke instruction.
170 Instruction
*InsertBefore
= nullptr;
171 if (auto *Invoke
= dyn_cast
<InvokeInst
>(&CB
))
173 &SplitEdge(Invoke
->getParent(), Invoke
->getNormalDest())->front();
175 InsertBefore
= &*std::next(CB
.getIterator());
177 // Bitcast the return value to the correct type.
178 auto *Cast
= CastInst::CreateBitOrPointerCast(&CB
, RetTy
, "", InsertBefore
);
182 // Replace all the original uses of the calling instruction with the bitcast.
183 for (User
*U
: UsersToUpdate
)
184 U
->replaceUsesOfWith(&CB
, Cast
);
187 /// Predicate and clone the given call site.
189 /// This function creates an if-then-else structure at the location of the call
190 /// site. The "if" condition compares the call site's called value to the given
191 /// callee. The original call site is moved into the "else" block, and a clone
192 /// of the call site is placed in the "then" block. The cloned instruction is
195 /// For example, the call instruction below:
198 /// %t0 = call i32 %ptr()
201 /// Is replace by the following:
204 /// %cond = icmp eq i32 ()* %ptr, @func
205 /// br i1 %cond, %then_bb, %else_bb
208 /// ; The clone of the original call instruction is placed in the "then"
209 /// ; block. It is not yet promoted.
210 /// %t1 = call i32 %ptr()
214 /// ; The original call instruction is moved to the "else" block.
215 /// %t0 = call i32 %ptr()
219 /// ; Uses of the original call instruction are replaced by uses of the phi
221 /// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
224 /// A similar transformation is performed for invoke instructions. However,
225 /// since invokes are terminating, more work is required. For example, the
226 /// invoke instruction below:
229 /// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst
231 /// Is replace by the following:
234 /// %cond = icmp eq i32 ()* %ptr, @func
235 /// br i1 %cond, %then_bb, %else_bb
238 /// ; The clone of the original invoke instruction is placed in the "then"
239 /// ; block, and its normal destination is set to the "merge" block. It is
240 /// ; not yet promoted.
241 /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
244 /// ; The original invoke instruction is moved into the "else" block, and
245 /// ; its normal destination is set to the "merge" block.
246 /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
249 /// ; Uses of the original invoke instruction are replaced by uses of the
250 /// ; phi node, and the merge block branches to the normal destination.
251 /// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
254 /// An indirect musttail call is processed slightly differently in that:
255 /// 1. No merge block needed for the orginal and the cloned callsite, since
256 /// either one ends the flow. No phi node is needed either.
257 /// 2. The return statement following the original call site is duplicated too
258 /// and placed immediately after the cloned call site per the IR convention.
260 /// For example, the musttail call instruction below:
263 /// %t0 = musttail call i32 %ptr()
266 /// Is replaced by the following:
269 /// %cond = icmp eq i32 ()* %ptr, @func
270 /// br i1 %cond, %then_bb, %orig_bb
273 /// ; The clone of the original call instruction is placed in the "then"
274 /// ; block. It is not yet promoted.
275 /// %t1 = musttail call i32 %ptr()
279 /// ; The original call instruction stays in its original block.
280 /// %t0 = musttail call i32 %ptr()
282 static CallBase
&versionCallSite(CallBase
&CB
, Value
*Callee
,
283 MDNode
*BranchWeights
) {
285 IRBuilder
<> Builder(&CB
);
286 CallBase
*OrigInst
= &CB
;
287 BasicBlock
*OrigBlock
= OrigInst
->getParent();
289 // Create the compare. The called value and callee must have the same type to
291 if (CB
.getCalledOperand()->getType() != Callee
->getType())
292 Callee
= Builder
.CreateBitCast(Callee
, CB
.getCalledOperand()->getType());
293 auto *Cond
= Builder
.CreateICmpEQ(CB
.getCalledOperand(), Callee
);
295 if (OrigInst
->isMustTailCall()) {
296 // Create an if-then structure. The original instruction stays in its block,
297 // and a clone of the original instruction is placed in the "then" block.
298 Instruction
*ThenTerm
=
299 SplitBlockAndInsertIfThen(Cond
, &CB
, false, BranchWeights
);
300 BasicBlock
*ThenBlock
= ThenTerm
->getParent();
301 ThenBlock
->setName("if.true.direct_targ");
302 CallBase
*NewInst
= cast
<CallBase
>(OrigInst
->clone());
303 NewInst
->insertBefore(ThenTerm
);
305 // Place a clone of the optional bitcast after the new call site.
306 Value
*NewRetVal
= NewInst
;
307 auto Next
= OrigInst
->getNextNode();
308 if (auto *BitCast
= dyn_cast_or_null
<BitCastInst
>(Next
)) {
309 assert(BitCast
->getOperand(0) == OrigInst
&&
310 "bitcast following musttail call must use the call");
311 auto NewBitCast
= BitCast
->clone();
312 NewBitCast
->replaceUsesOfWith(OrigInst
, NewInst
);
313 NewBitCast
->insertBefore(ThenTerm
);
314 NewRetVal
= NewBitCast
;
315 Next
= BitCast
->getNextNode();
318 // Place a clone of the return instruction after the new call site.
319 ReturnInst
*Ret
= dyn_cast_or_null
<ReturnInst
>(Next
);
320 assert(Ret
&& "musttail call must precede a ret with an optional bitcast");
321 auto NewRet
= Ret
->clone();
322 if (Ret
->getReturnValue())
323 NewRet
->replaceUsesOfWith(Ret
->getReturnValue(), NewRetVal
);
324 NewRet
->insertBefore(ThenTerm
);
326 // A return instructions is terminating, so we don't need the terminator
327 // instruction just created.
328 ThenTerm
->eraseFromParent();
333 // Create an if-then-else structure. The original instruction is moved into
334 // the "else" block, and a clone of the original instruction is placed in the
336 Instruction
*ThenTerm
= nullptr;
337 Instruction
*ElseTerm
= nullptr;
338 SplitBlockAndInsertIfThenElse(Cond
, &CB
, &ThenTerm
, &ElseTerm
, BranchWeights
);
339 BasicBlock
*ThenBlock
= ThenTerm
->getParent();
340 BasicBlock
*ElseBlock
= ElseTerm
->getParent();
341 BasicBlock
*MergeBlock
= OrigInst
->getParent();
343 ThenBlock
->setName("if.true.direct_targ");
344 ElseBlock
->setName("if.false.orig_indirect");
345 MergeBlock
->setName("if.end.icp");
347 CallBase
*NewInst
= cast
<CallBase
>(OrigInst
->clone());
348 OrigInst
->moveBefore(ElseTerm
);
349 NewInst
->insertBefore(ThenTerm
);
351 // If the original call site is an invoke instruction, we have extra work to
352 // do since invoke instructions are terminating. We have to fix-up phi nodes
353 // in the invoke's normal and unwind destinations.
354 if (auto *OrigInvoke
= dyn_cast
<InvokeInst
>(OrigInst
)) {
355 auto *NewInvoke
= cast
<InvokeInst
>(NewInst
);
357 // Invoke instructions are terminating, so we don't need the terminator
358 // instructions that were just created.
359 ThenTerm
->eraseFromParent();
360 ElseTerm
->eraseFromParent();
362 // Branch from the "merge" block to the original normal destination.
363 Builder
.SetInsertPoint(MergeBlock
);
364 Builder
.CreateBr(OrigInvoke
->getNormalDest());
366 // Fix-up phi nodes in the original invoke's normal and unwind destinations.
367 fixupPHINodeForNormalDest(OrigInvoke
, OrigBlock
, MergeBlock
);
368 fixupPHINodeForUnwindDest(OrigInvoke
, MergeBlock
, ThenBlock
, ElseBlock
);
370 // Now set the normal destinations of the invoke instructions to be the
372 OrigInvoke
->setNormalDest(MergeBlock
);
373 NewInvoke
->setNormalDest(MergeBlock
);
376 // Create a phi node for the returned value of the call site.
377 createRetPHINode(OrigInst
, NewInst
, MergeBlock
, Builder
);
382 bool llvm::isLegalToPromote(const CallBase
&CB
, Function
*Callee
,
383 const char **FailureReason
) {
384 assert(!CB
.getCalledFunction() && "Only indirect call sites can be promoted");
386 auto &DL
= Callee
->getParent()->getDataLayout();
388 // Check the return type. The callee's return value type must be bitcast
389 // compatible with the call site's type.
390 Type
*CallRetTy
= CB
.getType();
391 Type
*FuncRetTy
= Callee
->getReturnType();
392 if (CallRetTy
!= FuncRetTy
)
393 if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy
, CallRetTy
, DL
)) {
395 *FailureReason
= "Return type mismatch";
399 // The number of formal arguments of the callee.
400 unsigned NumParams
= Callee
->getFunctionType()->getNumParams();
402 // The number of actual arguments in the call.
403 unsigned NumArgs
= CB
.arg_size();
405 // Check the number of arguments. The callee and call site must agree on the
406 // number of arguments.
407 if (NumArgs
!= NumParams
&& !Callee
->isVarArg()) {
409 *FailureReason
= "The number of arguments mismatch";
413 // Check the argument types. The callee's formal argument types must be
414 // bitcast compatible with the corresponding actual argument types of the call
417 for (; I
< NumParams
; ++I
) {
418 Type
*FormalTy
= Callee
->getFunctionType()->getFunctionParamType(I
);
419 Type
*ActualTy
= CB
.getArgOperand(I
)->getType();
420 if (FormalTy
== ActualTy
)
422 if (!CastInst::isBitOrNoopPointerCastable(ActualTy
, FormalTy
, DL
)) {
424 *FailureReason
= "Argument type mismatch";
427 // Make sure that the callee and call agree on byval/inalloca. The types do
428 // not have to match.
430 if (Callee
->hasParamAttribute(I
, Attribute::ByVal
) !=
431 CB
.getAttributes().hasParamAttr(I
, Attribute::ByVal
)) {
433 *FailureReason
= "byval mismatch";
436 if (Callee
->hasParamAttribute(I
, Attribute::InAlloca
) !=
437 CB
.getAttributes().hasParamAttr(I
, Attribute::InAlloca
)) {
439 *FailureReason
= "inalloca mismatch";
443 for (; I
< NumArgs
; I
++) {
444 // Vararg functions can have more arguments than parameters.
445 assert(Callee
->isVarArg());
446 if (CB
.paramHasAttr(I
, Attribute::StructRet
)) {
448 *FailureReason
= "SRet arg to vararg function";
456 CallBase
&llvm::promoteCall(CallBase
&CB
, Function
*Callee
,
457 CastInst
**RetBitCast
) {
458 assert(!CB
.getCalledFunction() && "Only indirect call sites can be promoted");
460 // Set the called function of the call site to be the given callee (but don't
462 CB
.setCalledOperand(Callee
);
464 // Since the call site will no longer be direct, we must clear metadata that
465 // is only appropriate for indirect calls. This includes !prof and !callees
467 CB
.setMetadata(LLVMContext::MD_prof
, nullptr);
468 CB
.setMetadata(LLVMContext::MD_callees
, nullptr);
470 // If the function type of the call site matches that of the callee, no
471 // additional work is required.
472 if (CB
.getFunctionType() == Callee
->getFunctionType())
475 // Save the return types of the call site and callee.
476 Type
*CallSiteRetTy
= CB
.getType();
477 Type
*CalleeRetTy
= Callee
->getReturnType();
479 // Change the function type of the call site the match that of the callee.
480 CB
.mutateFunctionType(Callee
->getFunctionType());
482 // Inspect the arguments of the call site. If an argument's type doesn't
483 // match the corresponding formal argument's type in the callee, bitcast it
484 // to the correct type.
485 auto CalleeType
= Callee
->getFunctionType();
486 auto CalleeParamNum
= CalleeType
->getNumParams();
488 LLVMContext
&Ctx
= Callee
->getContext();
489 const AttributeList
&CallerPAL
= CB
.getAttributes();
490 // The new list of argument attributes.
491 SmallVector
<AttributeSet
, 4> NewArgAttrs
;
492 bool AttributeChanged
= false;
494 for (unsigned ArgNo
= 0; ArgNo
< CalleeParamNum
; ++ArgNo
) {
495 auto *Arg
= CB
.getArgOperand(ArgNo
);
496 Type
*FormalTy
= CalleeType
->getParamType(ArgNo
);
497 Type
*ActualTy
= Arg
->getType();
498 if (FormalTy
!= ActualTy
) {
499 auto *Cast
= CastInst::CreateBitOrPointerCast(Arg
, FormalTy
, "", &CB
);
500 CB
.setArgOperand(ArgNo
, Cast
);
502 // Remove any incompatible attributes for the argument.
503 AttrBuilder
ArgAttrs(CallerPAL
.getParamAttrs(ArgNo
));
504 ArgAttrs
.remove(AttributeFuncs::typeIncompatible(FormalTy
));
506 // We may have a different byval/inalloca type.
507 if (ArgAttrs
.getByValType())
508 ArgAttrs
.addByValAttr(Callee
->getParamByValType(ArgNo
));
509 if (ArgAttrs
.getInAllocaType())
510 ArgAttrs
.addInAllocaAttr(Callee
->getParamInAllocaType(ArgNo
));
512 NewArgAttrs
.push_back(AttributeSet::get(Ctx
, ArgAttrs
));
513 AttributeChanged
= true;
515 NewArgAttrs
.push_back(CallerPAL
.getParamAttrs(ArgNo
));
518 // If the return type of the call site doesn't match that of the callee, cast
519 // the returned value to the appropriate type.
520 // Remove any incompatible return value attribute.
521 AttrBuilder
RAttrs(CallerPAL
, AttributeList::ReturnIndex
);
522 if (!CallSiteRetTy
->isVoidTy() && CallSiteRetTy
!= CalleeRetTy
) {
523 createRetBitCast(CB
, CallSiteRetTy
, RetBitCast
);
524 RAttrs
.remove(AttributeFuncs::typeIncompatible(CalleeRetTy
));
525 AttributeChanged
= true;
528 // Set the new callsite attribute.
529 if (AttributeChanged
)
530 CB
.setAttributes(AttributeList::get(Ctx
, CallerPAL
.getFnAttrs(),
531 AttributeSet::get(Ctx
, RAttrs
),
537 CallBase
&llvm::promoteCallWithIfThenElse(CallBase
&CB
, Function
*Callee
,
538 MDNode
*BranchWeights
) {
540 // Version the indirect call site. If the called value is equal to the given
541 // callee, 'NewInst' will be executed, otherwise the original call site will
543 CallBase
&NewInst
= versionCallSite(CB
, Callee
, BranchWeights
);
545 // Promote 'NewInst' so that it directly calls the desired function.
546 return promoteCall(NewInst
, Callee
);
549 bool llvm::tryPromoteCall(CallBase
&CB
) {
550 assert(!CB
.getCalledFunction());
551 Module
*M
= CB
.getCaller()->getParent();
552 const DataLayout
&DL
= M
->getDataLayout();
553 Value
*Callee
= CB
.getCalledOperand();
555 LoadInst
*VTableEntryLoad
= dyn_cast
<LoadInst
>(Callee
);
556 if (!VTableEntryLoad
)
557 return false; // Not a vtable entry load.
558 Value
*VTableEntryPtr
= VTableEntryLoad
->getPointerOperand();
559 APInt
VTableOffset(DL
.getTypeSizeInBits(VTableEntryPtr
->getType()), 0);
560 Value
*VTableBasePtr
= VTableEntryPtr
->stripAndAccumulateConstantOffsets(
561 DL
, VTableOffset
, /* AllowNonInbounds */ true);
562 LoadInst
*VTablePtrLoad
= dyn_cast
<LoadInst
>(VTableBasePtr
);
564 return false; // Not a vtable load.
565 Value
*Object
= VTablePtrLoad
->getPointerOperand();
566 APInt
ObjectOffset(DL
.getTypeSizeInBits(Object
->getType()), 0);
567 Value
*ObjectBase
= Object
->stripAndAccumulateConstantOffsets(
568 DL
, ObjectOffset
, /* AllowNonInbounds */ true);
569 if (!(isa
<AllocaInst
>(ObjectBase
) && ObjectOffset
== 0))
570 // Not an Alloca or the offset isn't zero.
573 // Look for the vtable pointer store into the object by the ctor.
574 BasicBlock::iterator
BBI(VTablePtrLoad
);
575 Value
*VTablePtr
= FindAvailableLoadedValue(
576 VTablePtrLoad
, VTablePtrLoad
->getParent(), BBI
, 0, nullptr, nullptr);
578 return false; // No vtable found.
579 APInt
VTableOffsetGVBase(DL
.getTypeSizeInBits(VTablePtr
->getType()), 0);
580 Value
*VTableGVBase
= VTablePtr
->stripAndAccumulateConstantOffsets(
581 DL
, VTableOffsetGVBase
, /* AllowNonInbounds */ true);
582 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(VTableGVBase
);
583 if (!(GV
&& GV
->isConstant() && GV
->hasDefinitiveInitializer()))
584 // Not in the form of a global constant variable with an initializer.
587 Constant
*VTableGVInitializer
= GV
->getInitializer();
588 APInt VTableGVOffset
= VTableOffsetGVBase
+ VTableOffset
;
589 if (!(VTableGVOffset
.getActiveBits() <= 64))
590 return false; // Out of range.
591 Constant
*Ptr
= getPointerAtOffset(VTableGVInitializer
,
592 VTableGVOffset
.getZExtValue(),
595 return false; // No constant (function) pointer found.
596 Function
*DirectCallee
= dyn_cast
<Function
>(Ptr
->stripPointerCasts());
598 return false; // No function pointer found.
600 if (!isLegalToPromote(CB
, DirectCallee
))
604 promoteCall(CB
, DirectCallee
);