1 //===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements utilities useful for promoting indirect call sites to
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
15 #include "llvm/Analysis/CtxProfAnalysis.h"
16 #include "llvm/Analysis/Loads.h"
17 #include "llvm/Analysis/TypeMetadataUtils.h"
18 #include "llvm/IR/AttributeMask.h"
19 #include "llvm/IR/Constant.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/ProfileData/PGOCtxProfReader.h"
25 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
29 #define DEBUG_TYPE "call-promotion-utils"
31 /// Fix-up phi nodes in an invoke instruction's normal destination.
33 /// After versioning an invoke instruction, values coming from the original
34 /// block will now be coming from the "merge" block. For example, in the code
38 /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
41 /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
44 /// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ]
48 /// %t3 = phi i32 [ %x, %orig_bb ], ...
50 /// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in
51 /// "normal_dst" must be fixed to refer to "merge_bb":
54 /// %t3 = phi i32 [ %x, %merge_bb ], ...
56 static void fixupPHINodeForNormalDest(InvokeInst
*Invoke
, BasicBlock
*OrigBlock
,
57 BasicBlock
*MergeBlock
) {
58 for (PHINode
&Phi
: Invoke
->getNormalDest()->phis()) {
59 int Idx
= Phi
.getBasicBlockIndex(OrigBlock
);
62 Phi
.setIncomingBlock(Idx
, MergeBlock
);
66 /// Fix-up phi nodes in an invoke instruction's unwind destination.
68 /// After versioning an invoke instruction, values coming from the original
69 /// block will now be coming from either the "then" block or the "else" block.
70 /// For example, in the code below:
73 /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
76 /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
79 /// %t3 = phi i32 [ %x, %orig_bb ], ...
81 /// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in
82 /// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb":
85 /// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ...
87 static void fixupPHINodeForUnwindDest(InvokeInst
*Invoke
, BasicBlock
*OrigBlock
,
88 BasicBlock
*ThenBlock
,
89 BasicBlock
*ElseBlock
) {
90 for (PHINode
&Phi
: Invoke
->getUnwindDest()->phis()) {
91 int Idx
= Phi
.getBasicBlockIndex(OrigBlock
);
94 auto *V
= Phi
.getIncomingValue(Idx
);
95 Phi
.setIncomingBlock(Idx
, ThenBlock
);
96 Phi
.addIncoming(V
, ElseBlock
);
100 /// Create a phi node for the returned value of a call or invoke instruction.
102 /// After versioning a call or invoke instruction that returns a value, we have
103 /// to merge the value of the original and new instructions. We do this by
104 /// creating a phi node and replacing uses of the original instruction with this
107 /// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is
108 /// defined in "then_bb", we create the following phi node:
110 /// ; Uses of the original instruction are replaced by uses of the phi node.
111 /// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ],
113 static void createRetPHINode(Instruction
*OrigInst
, Instruction
*NewInst
,
114 BasicBlock
*MergeBlock
, IRBuilder
<> &Builder
) {
116 if (OrigInst
->getType()->isVoidTy() || OrigInst
->use_empty())
119 Builder
.SetInsertPoint(MergeBlock
, MergeBlock
->begin());
120 PHINode
*Phi
= Builder
.CreatePHI(OrigInst
->getType(), 0);
121 SmallVector
<User
*, 16> UsersToUpdate(OrigInst
->users());
122 for (User
*U
: UsersToUpdate
)
123 U
->replaceUsesOfWith(OrigInst
, Phi
);
124 Phi
->addIncoming(OrigInst
, OrigInst
->getParent());
125 Phi
->addIncoming(NewInst
, NewInst
->getParent());
128 /// Cast a call or invoke instruction to the given type.
130 /// When promoting a call site, the return type of the call site might not match
131 /// that of the callee. If this is the case, we have to cast the returned value
132 /// to the correct type. The location of the cast depends on if we have a call
133 /// or invoke instruction.
135 /// For example, if the call instruction below requires a bitcast after
139 /// %t0 = call i32 @func()
142 /// The bitcast is placed after the call instruction:
145 /// ; Uses of the original return value are replaced by uses of the bitcast.
146 /// %t0 = call i32 @func()
147 /// %t1 = bitcast i32 %t0 to ...
150 /// A similar transformation is performed for invoke instructions. However,
151 /// since invokes are terminating, a new block is created for the bitcast. For
152 /// example, if the invoke instruction below requires a bitcast after promotion:
155 /// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst
157 /// The edge between the original block and the invoke's normal destination is
158 /// split, and the bitcast is placed there:
161 /// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst
164 /// ; Uses of the original return value are replaced by uses of the bitcast.
165 /// %t1 = bitcast i32 %t0 to ...
166 /// br label %normal_dst
168 static void createRetBitCast(CallBase
&CB
, Type
*RetTy
, CastInst
**RetBitCast
) {
170 // Save the users of the calling instruction. These uses will be changed to
171 // use the bitcast after we create it.
172 SmallVector
<User
*, 16> UsersToUpdate(CB
.users());
174 // Determine an appropriate location to create the bitcast for the return
175 // value. The location depends on if we have a call or invoke instruction.
176 BasicBlock::iterator InsertBefore
;
177 if (auto *Invoke
= dyn_cast
<InvokeInst
>(&CB
))
179 SplitEdge(Invoke
->getParent(), Invoke
->getNormalDest())->begin();
181 InsertBefore
= std::next(CB
.getIterator());
183 // Bitcast the return value to the correct type.
184 auto *Cast
= CastInst::CreateBitOrPointerCast(&CB
, RetTy
, "", InsertBefore
);
188 // Replace all the original uses of the calling instruction with the bitcast.
189 for (User
*U
: UsersToUpdate
)
190 U
->replaceUsesOfWith(&CB
, Cast
);
193 /// Predicate and clone the given call site.
195 /// This function creates an if-then-else structure at the location of the call
196 /// site. The "if" condition is specified by `Cond`.
197 /// The original call site is moved into the "else" block, and a clone of the
198 /// call site is placed in the "then" block. The cloned instruction is returned.
200 /// For example, the call instruction below:
203 /// %t0 = call i32 %ptr()
206 /// Is replace by the following:
210 /// br i1 %cond, %then_bb, %else_bb
213 /// ; The clone of the original call instruction is placed in the "then"
214 /// ; block. It is not yet promoted.
215 /// %t1 = call i32 %ptr()
219 /// ; The original call instruction is moved to the "else" block.
220 /// %t0 = call i32 %ptr()
224 /// ; Uses of the original call instruction are replaced by uses of the phi
226 /// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
229 /// A similar transformation is performed for invoke instructions. However,
230 /// since invokes are terminating, more work is required. For example, the
231 /// invoke instruction below:
234 /// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst
236 /// Is replace by the following:
240 /// br i1 %cond, %then_bb, %else_bb
243 /// ; The clone of the original invoke instruction is placed in the "then"
244 /// ; block, and its normal destination is set to the "merge" block. It is
245 /// ; not yet promoted.
246 /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
249 /// ; The original invoke instruction is moved into the "else" block, and
250 /// ; its normal destination is set to the "merge" block.
251 /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
254 /// ; Uses of the original invoke instruction are replaced by uses of the
255 /// ; phi node, and the merge block branches to the normal destination.
256 /// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
259 /// An indirect musttail call is processed slightly differently in that:
260 /// 1. No merge block needed for the orginal and the cloned callsite, since
261 /// either one ends the flow. No phi node is needed either.
262 /// 2. The return statement following the original call site is duplicated too
263 /// and placed immediately after the cloned call site per the IR convention.
265 /// For example, the musttail call instruction below:
268 /// %t0 = musttail call i32 %ptr()
271 /// Is replaced by the following:
275 /// br i1 %cond, %then_bb, %orig_bb
278 /// ; The clone of the original call instruction is placed in the "then"
279 /// ; block. It is not yet promoted.
280 /// %t1 = musttail call i32 %ptr()
284 /// ; The original call instruction stays in its original block.
285 /// %t0 = musttail call i32 %ptr()
287 static CallBase
&versionCallSiteWithCond(CallBase
&CB
, Value
*Cond
,
288 MDNode
*BranchWeights
) {
290 IRBuilder
<> Builder(&CB
);
291 CallBase
*OrigInst
= &CB
;
292 BasicBlock
*OrigBlock
= OrigInst
->getParent();
294 if (OrigInst
->isMustTailCall()) {
295 // Create an if-then structure. The original instruction stays in its block,
296 // and a clone of the original instruction is placed in the "then" block.
297 Instruction
*ThenTerm
=
298 SplitBlockAndInsertIfThen(Cond
, &CB
, false, BranchWeights
);
299 BasicBlock
*ThenBlock
= ThenTerm
->getParent();
300 ThenBlock
->setName("if.true.direct_targ");
301 CallBase
*NewInst
= cast
<CallBase
>(OrigInst
->clone());
302 NewInst
->insertBefore(ThenTerm
);
304 // Place a clone of the optional bitcast after the new call site.
305 Value
*NewRetVal
= NewInst
;
306 auto Next
= OrigInst
->getNextNode();
307 if (auto *BitCast
= dyn_cast_or_null
<BitCastInst
>(Next
)) {
308 assert(BitCast
->getOperand(0) == OrigInst
&&
309 "bitcast following musttail call must use the call");
310 auto NewBitCast
= BitCast
->clone();
311 NewBitCast
->replaceUsesOfWith(OrigInst
, NewInst
);
312 NewBitCast
->insertBefore(ThenTerm
);
313 NewRetVal
= NewBitCast
;
314 Next
= BitCast
->getNextNode();
317 // Place a clone of the return instruction after the new call site.
318 ReturnInst
*Ret
= dyn_cast_or_null
<ReturnInst
>(Next
);
319 assert(Ret
&& "musttail call must precede a ret with an optional bitcast");
320 auto NewRet
= Ret
->clone();
321 if (Ret
->getReturnValue())
322 NewRet
->replaceUsesOfWith(Ret
->getReturnValue(), NewRetVal
);
323 NewRet
->insertBefore(ThenTerm
);
325 // A return instructions is terminating, so we don't need the terminator
326 // instruction just created.
327 ThenTerm
->eraseFromParent();
332 // Create an if-then-else structure. The original instruction is moved into
333 // the "else" block, and a clone of the original instruction is placed in the
335 Instruction
*ThenTerm
= nullptr;
336 Instruction
*ElseTerm
= nullptr;
337 SplitBlockAndInsertIfThenElse(Cond
, &CB
, &ThenTerm
, &ElseTerm
, BranchWeights
);
338 BasicBlock
*ThenBlock
= ThenTerm
->getParent();
339 BasicBlock
*ElseBlock
= ElseTerm
->getParent();
340 BasicBlock
*MergeBlock
= OrigInst
->getParent();
342 ThenBlock
->setName("if.true.direct_targ");
343 ElseBlock
->setName("if.false.orig_indirect");
344 MergeBlock
->setName("if.end.icp");
346 CallBase
*NewInst
= cast
<CallBase
>(OrigInst
->clone());
347 OrigInst
->moveBefore(ElseTerm
);
348 NewInst
->insertBefore(ThenTerm
);
350 // If the original call site is an invoke instruction, we have extra work to
351 // do since invoke instructions are terminating. We have to fix-up phi nodes
352 // in the invoke's normal and unwind destinations.
353 if (auto *OrigInvoke
= dyn_cast
<InvokeInst
>(OrigInst
)) {
354 auto *NewInvoke
= cast
<InvokeInst
>(NewInst
);
356 // Invoke instructions are terminating, so we don't need the terminator
357 // instructions that were just created.
358 ThenTerm
->eraseFromParent();
359 ElseTerm
->eraseFromParent();
361 // Branch from the "merge" block to the original normal destination.
362 Builder
.SetInsertPoint(MergeBlock
);
363 Builder
.CreateBr(OrigInvoke
->getNormalDest());
365 // Fix-up phi nodes in the original invoke's normal and unwind destinations.
366 fixupPHINodeForNormalDest(OrigInvoke
, OrigBlock
, MergeBlock
);
367 fixupPHINodeForUnwindDest(OrigInvoke
, MergeBlock
, ThenBlock
, ElseBlock
);
369 // Now set the normal destinations of the invoke instructions to be the
371 OrigInvoke
->setNormalDest(MergeBlock
);
372 NewInvoke
->setNormalDest(MergeBlock
);
375 // Create a phi node for the returned value of the call site.
376 createRetPHINode(OrigInst
, NewInst
, MergeBlock
, Builder
);
381 // Predicate and clone the given call site using condition `CB.callee ==
382 // Callee`. See the comment `versionCallSiteWithCond` for the transformation.
383 CallBase
&llvm::versionCallSite(CallBase
&CB
, Value
*Callee
,
384 MDNode
*BranchWeights
) {
386 IRBuilder
<> Builder(&CB
);
388 // Create the compare. The called value and callee must have the same type to
390 if (CB
.getCalledOperand()->getType() != Callee
->getType())
391 Callee
= Builder
.CreateBitCast(Callee
, CB
.getCalledOperand()->getType());
392 auto *Cond
= Builder
.CreateICmpEQ(CB
.getCalledOperand(), Callee
);
394 return versionCallSiteWithCond(CB
, Cond
, BranchWeights
);
397 bool llvm::isLegalToPromote(const CallBase
&CB
, Function
*Callee
,
398 const char **FailureReason
) {
399 assert(!CB
.getCalledFunction() && "Only indirect call sites can be promoted");
401 auto &DL
= Callee
->getDataLayout();
403 // Check the return type. The callee's return value type must be bitcast
404 // compatible with the call site's type.
405 Type
*CallRetTy
= CB
.getType();
406 Type
*FuncRetTy
= Callee
->getReturnType();
407 if (CallRetTy
!= FuncRetTy
)
408 if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy
, CallRetTy
, DL
)) {
410 *FailureReason
= "Return type mismatch";
414 // The number of formal arguments of the callee.
415 unsigned NumParams
= Callee
->getFunctionType()->getNumParams();
417 // The number of actual arguments in the call.
418 unsigned NumArgs
= CB
.arg_size();
420 // Check the number of arguments. The callee and call site must agree on the
421 // number of arguments.
422 if (NumArgs
!= NumParams
&& !Callee
->isVarArg()) {
424 *FailureReason
= "The number of arguments mismatch";
428 // Check the argument types. The callee's formal argument types must be
429 // bitcast compatible with the corresponding actual argument types of the call
432 for (; I
< NumParams
; ++I
) {
433 // Make sure that the callee and call agree on byval/inalloca. The types do
434 // not have to match.
435 if (Callee
->hasParamAttribute(I
, Attribute::ByVal
) !=
436 CB
.getAttributes().hasParamAttr(I
, Attribute::ByVal
)) {
438 *FailureReason
= "byval mismatch";
441 if (Callee
->hasParamAttribute(I
, Attribute::InAlloca
) !=
442 CB
.getAttributes().hasParamAttr(I
, Attribute::InAlloca
)) {
444 *FailureReason
= "inalloca mismatch";
448 Type
*FormalTy
= Callee
->getFunctionType()->getFunctionParamType(I
);
449 Type
*ActualTy
= CB
.getArgOperand(I
)->getType();
450 if (FormalTy
== ActualTy
)
452 if (!CastInst::isBitOrNoopPointerCastable(ActualTy
, FormalTy
, DL
)) {
454 *FailureReason
= "Argument type mismatch";
458 // MustTail call needs stricter type match. See
459 // Verifier::verifyMustTailCall().
460 if (CB
.isMustTailCall()) {
461 PointerType
*PF
= dyn_cast
<PointerType
>(FormalTy
);
462 PointerType
*PA
= dyn_cast
<PointerType
>(ActualTy
);
463 if (!PF
|| !PA
|| PF
->getAddressSpace() != PA
->getAddressSpace()) {
465 *FailureReason
= "Musttail call Argument type mismatch";
470 for (; I
< NumArgs
; I
++) {
471 // Vararg functions can have more arguments than parameters.
472 assert(Callee
->isVarArg());
473 if (CB
.paramHasAttr(I
, Attribute::StructRet
)) {
475 *FailureReason
= "SRet arg to vararg function";
483 CallBase
&llvm::promoteCall(CallBase
&CB
, Function
*Callee
,
484 CastInst
**RetBitCast
) {
485 assert(!CB
.getCalledFunction() && "Only indirect call sites can be promoted");
487 // Set the called function of the call site to be the given callee (but don't
489 CB
.setCalledOperand(Callee
);
491 // Since the call site will no longer be direct, we must clear metadata that
492 // is only appropriate for indirect calls. This includes !prof and !callees
494 CB
.setMetadata(LLVMContext::MD_prof
, nullptr);
495 CB
.setMetadata(LLVMContext::MD_callees
, nullptr);
497 // If the function type of the call site matches that of the callee, no
498 // additional work is required.
499 if (CB
.getFunctionType() == Callee
->getFunctionType())
502 // Save the return types of the call site and callee.
503 Type
*CallSiteRetTy
= CB
.getType();
504 Type
*CalleeRetTy
= Callee
->getReturnType();
506 // Change the function type of the call site the match that of the callee.
507 CB
.mutateFunctionType(Callee
->getFunctionType());
509 // Inspect the arguments of the call site. If an argument's type doesn't
510 // match the corresponding formal argument's type in the callee, bitcast it
511 // to the correct type.
512 auto CalleeType
= Callee
->getFunctionType();
513 auto CalleeParamNum
= CalleeType
->getNumParams();
515 LLVMContext
&Ctx
= Callee
->getContext();
516 const AttributeList
&CallerPAL
= CB
.getAttributes();
517 // The new list of argument attributes.
518 SmallVector
<AttributeSet
, 4> NewArgAttrs
;
519 bool AttributeChanged
= false;
521 for (unsigned ArgNo
= 0; ArgNo
< CalleeParamNum
; ++ArgNo
) {
522 auto *Arg
= CB
.getArgOperand(ArgNo
);
523 Type
*FormalTy
= CalleeType
->getParamType(ArgNo
);
524 Type
*ActualTy
= Arg
->getType();
525 if (FormalTy
!= ActualTy
) {
527 CastInst::CreateBitOrPointerCast(Arg
, FormalTy
, "", CB
.getIterator());
528 CB
.setArgOperand(ArgNo
, Cast
);
530 // Remove any incompatible attributes for the argument.
531 AttrBuilder
ArgAttrs(Ctx
, CallerPAL
.getParamAttrs(ArgNo
));
532 ArgAttrs
.remove(AttributeFuncs::typeIncompatible(
533 FormalTy
, CallerPAL
.getParamAttrs(ArgNo
)));
535 // We may have a different byval/inalloca type.
536 if (ArgAttrs
.getByValType())
537 ArgAttrs
.addByValAttr(Callee
->getParamByValType(ArgNo
));
538 if (ArgAttrs
.getInAllocaType())
539 ArgAttrs
.addInAllocaAttr(Callee
->getParamInAllocaType(ArgNo
));
541 NewArgAttrs
.push_back(AttributeSet::get(Ctx
, ArgAttrs
));
542 AttributeChanged
= true;
544 NewArgAttrs
.push_back(CallerPAL
.getParamAttrs(ArgNo
));
547 // If the return type of the call site doesn't match that of the callee, cast
548 // the returned value to the appropriate type.
549 // Remove any incompatible return value attribute.
550 AttrBuilder
RAttrs(Ctx
, CallerPAL
.getRetAttrs());
551 if (!CallSiteRetTy
->isVoidTy() && CallSiteRetTy
!= CalleeRetTy
) {
552 createRetBitCast(CB
, CallSiteRetTy
, RetBitCast
);
554 AttributeFuncs::typeIncompatible(CalleeRetTy
, CallerPAL
.getRetAttrs()));
555 AttributeChanged
= true;
558 // Set the new callsite attribute.
559 if (AttributeChanged
)
560 CB
.setAttributes(AttributeList::get(Ctx
, CallerPAL
.getFnAttrs(),
561 AttributeSet::get(Ctx
, RAttrs
),
567 CallBase
&llvm::promoteCallWithIfThenElse(CallBase
&CB
, Function
*Callee
,
568 MDNode
*BranchWeights
) {
570 // Version the indirect call site. If the called value is equal to the given
571 // callee, 'NewInst' will be executed, otherwise the original call site will
573 CallBase
&NewInst
= versionCallSite(CB
, Callee
, BranchWeights
);
575 // Promote 'NewInst' so that it directly calls the desired function.
576 return promoteCall(NewInst
, Callee
);
579 CallBase
*llvm::promoteCallWithIfThenElse(CallBase
&CB
, Function
&Callee
,
580 PGOContextualProfile
&CtxProf
) {
581 assert(CB
.isIndirectCall());
582 if (!CtxProf
.isFunctionKnown(Callee
))
584 auto &Caller
= *CB
.getFunction();
585 auto *CSInstr
= CtxProfAnalysis::getCallsiteInstrumentation(CB
);
588 const uint64_t CSIndex
= CSInstr
->getIndex()->getZExtValue();
590 CallBase
&DirectCall
= promoteCall(
591 versionCallSite(CB
, &Callee
, /*BranchWeights=*/nullptr), &Callee
);
592 CSInstr
->moveBefore(&CB
);
593 const auto NewCSID
= CtxProf
.allocateNextCallsiteIndex(Caller
);
594 auto *NewCSInstr
= cast
<InstrProfCallsite
>(CSInstr
->clone());
595 NewCSInstr
->setIndex(NewCSID
);
596 NewCSInstr
->setCallee(&Callee
);
597 NewCSInstr
->insertBefore(&DirectCall
);
598 auto &DirectBB
= *DirectCall
.getParent();
599 auto &IndirectBB
= *CB
.getParent();
601 assert((CtxProfAnalysis::getBBInstrumentation(IndirectBB
) == nullptr) &&
602 "The ICP direct BB is new, it shouldn't have instrumentation");
603 assert((CtxProfAnalysis::getBBInstrumentation(DirectBB
) == nullptr) &&
604 "The ICP indirect BB is new, it shouldn't have instrumentation");
606 // Allocate counters for the new basic blocks.
607 const uint32_t DirectID
= CtxProf
.allocateNextCounterIndex(Caller
);
608 const uint32_t IndirectID
= CtxProf
.allocateNextCounterIndex(Caller
);
610 CtxProfAnalysis::getBBInstrumentation(Caller
.getEntryBlock());
611 auto *DirectBBIns
= cast
<InstrProfCntrInstBase
>(EntryBBIns
->clone());
612 DirectBBIns
->setIndex(DirectID
);
613 DirectBBIns
->insertInto(&DirectBB
, DirectBB
.getFirstInsertionPt());
615 auto *IndirectBBIns
= cast
<InstrProfCntrInstBase
>(EntryBBIns
->clone());
616 IndirectBBIns
->setIndex(IndirectID
);
617 IndirectBBIns
->insertInto(&IndirectBB
, IndirectBB
.getFirstInsertionPt());
619 const GlobalValue::GUID CalleeGUID
= AssignGUIDPass::getGUID(Callee
);
620 const uint32_t NewCountersSize
= IndirectID
+ 1;
622 auto ProfileUpdater
= [&](PGOCtxProfContext
&Ctx
) {
623 assert(Ctx
.guid() == AssignGUIDPass::getGUID(Caller
));
624 assert(NewCountersSize
- 2 == Ctx
.counters().size());
625 // All the ctx-es belonging to a function must have the same size counters.
626 Ctx
.resizeCounters(NewCountersSize
);
628 // Maybe in this context, the indirect callsite wasn't observed at all. That
629 // would make both direct and indirect BBs cold - which is what we already
630 // have from resising the counters.
631 if (!Ctx
.hasCallsite(CSIndex
))
633 auto &CSData
= Ctx
.callsite(CSIndex
);
635 uint64_t TotalCount
= 0;
636 for (const auto &[_
, V
] : CSData
)
637 TotalCount
+= V
.getEntrycount();
638 uint64_t DirectCount
= 0;
639 // If we called the direct target, update the DirectCount. If we didn't, we
640 // still want to update the indirect BB (to which the TotalCount goes, in
642 if (auto It
= CSData
.find(CalleeGUID
); It
!= CSData
.end()) {
643 assert(CalleeGUID
== It
->second
.guid());
644 DirectCount
= It
->second
.getEntrycount();
645 // This direct target needs to be moved to this caller under the
646 // newly-allocated callsite index.
647 assert(Ctx
.callsites().count(NewCSID
) == 0);
648 Ctx
.ingestContext(NewCSID
, std::move(It
->second
));
649 CSData
.erase(CalleeGUID
);
652 assert(TotalCount
>= DirectCount
);
653 uint64_t IndirectCount
= TotalCount
- DirectCount
;
654 // The ICP's effect is as-if the direct BB would have been taken DirectCount
655 // times, and the indirect BB, IndirectCount times
656 Ctx
.counters()[DirectID
] = DirectCount
;
657 Ctx
.counters()[IndirectID
] = IndirectCount
;
660 CtxProf
.update(ProfileUpdater
, Caller
);
664 CallBase
&llvm::promoteCallWithVTableCmp(CallBase
&CB
, Instruction
*VPtr
,
666 ArrayRef
<Constant
*> AddressPoints
,
667 MDNode
*BranchWeights
) {
668 assert(!AddressPoints
.empty() && "Caller should guarantee");
669 IRBuilder
<> Builder(&CB
);
670 SmallVector
<Value
*, 2> ICmps
;
671 for (auto &AddressPoint
: AddressPoints
)
672 ICmps
.push_back(Builder
.CreateICmpEQ(VPtr
, AddressPoint
));
674 // TODO: Perform tree height reduction if the number of ICmps is high.
675 Value
*Cond
= Builder
.CreateOr(ICmps
);
677 // Version the indirect call site. If Cond is true, 'NewInst' will be
678 // executed, otherwise the original call site will be executed.
679 CallBase
&NewInst
= versionCallSiteWithCond(CB
, Cond
, BranchWeights
);
681 // Promote 'NewInst' so that it directly calls the desired function.
682 return promoteCall(NewInst
, Callee
);
685 bool llvm::tryPromoteCall(CallBase
&CB
) {
686 assert(!CB
.getCalledFunction());
687 Module
*M
= CB
.getCaller()->getParent();
688 const DataLayout
&DL
= M
->getDataLayout();
689 Value
*Callee
= CB
.getCalledOperand();
691 LoadInst
*VTableEntryLoad
= dyn_cast
<LoadInst
>(Callee
);
692 if (!VTableEntryLoad
)
693 return false; // Not a vtable entry load.
694 Value
*VTableEntryPtr
= VTableEntryLoad
->getPointerOperand();
695 APInt
VTableOffset(DL
.getIndexTypeSizeInBits(VTableEntryPtr
->getType()), 0);
696 Value
*VTableBasePtr
= VTableEntryPtr
->stripAndAccumulateConstantOffsets(
697 DL
, VTableOffset
, /* AllowNonInbounds */ true);
698 LoadInst
*VTablePtrLoad
= dyn_cast
<LoadInst
>(VTableBasePtr
);
700 return false; // Not a vtable load.
701 Value
*Object
= VTablePtrLoad
->getPointerOperand();
702 APInt
ObjectOffset(DL
.getIndexTypeSizeInBits(Object
->getType()), 0);
703 Value
*ObjectBase
= Object
->stripAndAccumulateConstantOffsets(
704 DL
, ObjectOffset
, /* AllowNonInbounds */ true);
705 if (!(isa
<AllocaInst
>(ObjectBase
) && ObjectOffset
== 0))
706 // Not an Alloca or the offset isn't zero.
709 // Look for the vtable pointer store into the object by the ctor.
710 BasicBlock::iterator
BBI(VTablePtrLoad
);
711 Value
*VTablePtr
= FindAvailableLoadedValue(
712 VTablePtrLoad
, VTablePtrLoad
->getParent(), BBI
, 0, nullptr, nullptr);
713 if (!VTablePtr
|| !VTablePtr
->getType()->isPointerTy())
714 return false; // No vtable found.
715 APInt
VTableOffsetGVBase(DL
.getIndexTypeSizeInBits(VTablePtr
->getType()), 0);
716 Value
*VTableGVBase
= VTablePtr
->stripAndAccumulateConstantOffsets(
717 DL
, VTableOffsetGVBase
, /* AllowNonInbounds */ true);
718 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(VTableGVBase
);
719 if (!(GV
&& GV
->isConstant() && GV
->hasDefinitiveInitializer()))
720 // Not in the form of a global constant variable with an initializer.
723 APInt VTableGVOffset
= VTableOffsetGVBase
+ VTableOffset
;
724 if (!(VTableGVOffset
.getActiveBits() <= 64))
725 return false; // Out of range.
727 Function
*DirectCallee
= nullptr;
728 std::tie(DirectCallee
, std::ignore
) =
729 getFunctionAtVTableOffset(GV
, VTableGVOffset
.getZExtValue(), *M
);
731 return false; // No function pointer found.
733 if (!isLegalToPromote(CB
, DirectCallee
))
737 promoteCall(CB
, DirectCallee
);