lib/Analysis/InlineCost.cpp

   1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements inline cost analysis.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Analysis/InlineCost.h"
  15 #include "llvm/ADT/STLExtras.h"
  16 #include "llvm/ADT/SetVector.h"
  17 #include "llvm/ADT/SmallPtrSet.h"
  18 #include "llvm/ADT/SmallVector.h"
  19 #include "llvm/ADT/Statistic.h"
  20 #include "llvm/Analysis/AssumptionCache.h"
  21 #include "llvm/Analysis/BlockFrequencyInfo.h"
  22 #include "llvm/Analysis/CodeMetrics.h"
  23 #include "llvm/Analysis/ConstantFolding.h"
  24 #include "llvm/Analysis/CFG.h"
  25 #include "llvm/Analysis/InstructionSimplify.h"
  26 #include "llvm/Analysis/ProfileSummaryInfo.h"
  27 #include "llvm/Analysis/TargetTransformInfo.h"
  28 #include "llvm/Analysis/ValueTracking.h"
  29 #include "llvm/Config/llvm-config.h"
  30 #include "llvm/IR/CallSite.h"
  31 #include "llvm/IR/CallingConv.h"
  32 #include "llvm/IR/DataLayout.h"
  33 #include "llvm/IR/GetElementPtrTypeIterator.h"
  34 #include "llvm/IR/GlobalAlias.h"
  35 #include "llvm/IR/InstVisitor.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/Debug.h"
  39 #include "llvm/Support/raw_ostream.h"
  40
  41 using namespace llvm;
  42
  43 #define DEBUG_TYPE "inline-cost"
  44
  45 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
  46
  47 static cl::opt<int> InlineThreshold(
  48     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
  49     cl::desc("Control the amount of inlining to perform (default = 225)"));
  50
  51 static cl::opt<int> HintThreshold(
  52     "inlinehint-threshold", cl::Hidden, cl::init(325),
  53     cl::desc("Threshold for inlining functions with inline hint"));
  54
  55 static cl::opt<int>
  56     ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
  57                           cl::init(45),
  58                           cl::desc("Threshold for inlining cold callsites"));
  59
  60 // We introduce this threshold to help performance of instrumentation based
  61 // PGO before we actually hook up inliner with analysis passes such as BPI and
  62 // BFI.
  63 static cl::opt<int> ColdThreshold(
  64     "inlinecold-threshold", cl::Hidden, cl::init(45),
  65     cl::desc("Threshold for inlining functions with cold attribute"));
  66
  67 static cl::opt<int>
  68     HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
  69                          cl::ZeroOrMore,
  70                          cl::desc("Threshold for hot callsites "));
  71
  72 static cl::opt<int> LocallyHotCallSiteThreshold(
  73     "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
  74     cl::desc("Threshold for locally hot callsites "));
  75
  76 static cl::opt<int> ColdCallSiteRelFreq(
  77     "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
  78     cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
  79              "entry frequency, for a callsite to be cold in the absence of "
  80              "profile information."));
  81
  82 static cl::opt<int> HotCallSiteRelFreq(
  83     "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
  84     cl::desc("Minimum block frequency, expressed as a multiple of caller's "
  85              "entry frequency, for a callsite to be hot in the absence of "
  86              "profile information."));
  87
  88 static cl::opt<bool> OptComputeFullInlineCost(
  89     "inline-cost-full", cl::Hidden, cl::init(false),
  90     cl::desc("Compute the full inline cost of a call site even when the cost "
  91              "exceeds the threshold."));
  92
  93 namespace {
  94
  95 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  96   typedef InstVisitor<CallAnalyzer, bool> Base;
  97   friend class InstVisitor<CallAnalyzer, bool>;
  98
  99   /// The TargetTransformInfo available for this compilation.
 100   const TargetTransformInfo &TTI;
 101
 102   /// Getter for the cache of @llvm.assume intrinsics.
 103   std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
 104
 105   /// Getter for BlockFrequencyInfo
 106   Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
 107
 108   /// Profile summary information.
 109   ProfileSummaryInfo *PSI;
 110
 111   /// The called function.
 112   Function &F;
 113
 114   // Cache the DataLayout since we use it a lot.
 115   const DataLayout &DL;
 116
 117   /// The OptimizationRemarkEmitter available for this compilation.
 118   OptimizationRemarkEmitter *ORE;
 119
 120   /// The candidate callsite being analyzed. Please do not use this to do
 121   /// analysis in the caller function; we want the inline cost query to be
 122   /// easily cacheable. Instead, use the cover function paramHasAttr.
 123   CallSite CandidateCS;
 124
 125   /// Tunable parameters that control the analysis.
 126   const InlineParams &Params;
 127
 128   int Threshold;
 129   int Cost;
 130   bool ComputeFullInlineCost;
 131
 132   bool IsCallerRecursive;
 133   bool IsRecursiveCall;
 134   bool ExposesReturnsTwice;
 135   bool HasDynamicAlloca;
 136   bool ContainsNoDuplicateCall;
 137   bool HasReturn;
 138   bool HasIndirectBr;
 139   bool HasUninlineableIntrinsic;
 140   bool InitsVargArgs;
 141
 142   /// Number of bytes allocated statically by the callee.
 143   uint64_t AllocatedSize;
 144   unsigned NumInstructions, NumVectorInstructions;
 145   int VectorBonus, TenPercentVectorBonus;
 146   // Bonus to be applied when the callee has only one reachable basic block.
 147   int SingleBBBonus;
 148
 149   /// While we walk the potentially-inlined instructions, we build up and
 150   /// maintain a mapping of simplified values specific to this callsite. The
 151   /// idea is to propagate any special information we have about arguments to
 152   /// this call through the inlinable section of the function, and account for
 153   /// likely simplifications post-inlining. The most important aspect we track
 154   /// is CFG altering simplifications -- when we prove a basic block dead, that
 155   /// can cause dramatic shifts in the cost of inlining a function.
 156   DenseMap<Value *, Constant *> SimplifiedValues;
 157
 158   /// Keep track of the values which map back (through function arguments) to
 159   /// allocas on the caller stack which could be simplified through SROA.
 160   DenseMap<Value *, Value *> SROAArgValues;
 161
 162   /// The mapping of caller Alloca values to their accumulated cost savings. If
 163   /// we have to disable SROA for one of the allocas, this tells us how much
 164   /// cost must be added.
 165   DenseMap<Value *, int> SROAArgCosts;
 166
 167   /// Keep track of values which map to a pointer base and constant offset.
 168   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
 169
 170   /// Keep track of dead blocks due to the constant arguments.
 171   SetVector<BasicBlock *> DeadBlocks;
 172
 173   /// The mapping of the blocks to their known unique successors due to the
 174   /// constant arguments.
 175   DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
 176
 177   /// Model the elimination of repeated loads that is expected to happen
 178   /// whenever we simplify away the stores that would otherwise cause them to be
 179   /// loads.
 180   bool EnableLoadElimination;
 181   SmallPtrSet<Value *, 16> LoadAddrSet;
 182   int LoadEliminationCost;
 183
 184   // Custom simplification helper routines.
 185   bool isAllocaDerivedArg(Value *V);
 186   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
 187                             DenseMap<Value *, int>::iterator &CostIt);
 188   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
 189   void disableSROA(Value *V);
 190   void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
 191   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
 192                           int InstructionCost);
 193   void disableLoadElimination();
 194   bool isGEPFree(GetElementPtrInst &GEP);
 195   bool canFoldInboundsGEP(GetElementPtrInst &I);
 196   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
 197   bool simplifyCallSite(Function *F, CallSite CS);
 198   template <typename Callable>
 199   bool simplifyInstruction(Instruction &I, Callable Evaluate);
 200   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
 201
 202   /// Return true if the given argument to the function being considered for
 203   /// inlining has the given attribute set either at the call site or the
 204   /// function declaration.  Primarily used to inspect call site specific
 205   /// attributes since these can be more precise than the ones on the callee
 206   /// itself.
 207   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
 208
 209   /// Return true if the given value is known non null within the callee if
 210   /// inlined through this particular callsite.
 211   bool isKnownNonNullInCallee(Value *V);
 212
 213   /// Update Threshold based on callsite properties such as callee
 214   /// attributes and callee hotness for PGO builds. The Callee is explicitly
 215   /// passed to support analyzing indirect calls whose target is inferred by
 216   /// analysis.
 217   void updateThreshold(CallSite CS, Function &Callee);
 218
 219   /// Return true if size growth is allowed when inlining the callee at CS.
 220   bool allowSizeGrowth(CallSite CS);
 221
 222   /// Return true if \p CS is a cold callsite.
 223   bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
 224
 225   /// Return a higher threshold if \p CS is a hot callsite.
 226   Optional<int> getHotCallSiteThreshold(CallSite CS,
 227                                         BlockFrequencyInfo *CallerBFI);
 228
 229   // Custom analysis routines.
 230   InlineResult analyzeBlock(BasicBlock *BB,
 231                             SmallPtrSetImpl<const Value *> &EphValues);
 232
 233   // Disable several entry points to the visitor so we don't accidentally use
 234   // them by declaring but not defining them here.
 235   void visit(Module *);
 236   void visit(Module &);
 237   void visit(Function *);
 238   void visit(Function &);
 239   void visit(BasicBlock *);
 240   void visit(BasicBlock &);
 241
 242   // Provide base case for our instruction visit.
 243   bool visitInstruction(Instruction &I);
 244
 245   // Our visit overrides.
 246   bool visitAlloca(AllocaInst &I);
 247   bool visitPHI(PHINode &I);
 248   bool visitGetElementPtr(GetElementPtrInst &I);
 249   bool visitBitCast(BitCastInst &I);
 250   bool visitPtrToInt(PtrToIntInst &I);
 251   bool visitIntToPtr(IntToPtrInst &I);
 252   bool visitCastInst(CastInst &I);
 253   bool visitUnaryInstruction(UnaryInstruction &I);
 254   bool visitCmpInst(CmpInst &I);
 255   bool visitSub(BinaryOperator &I);
 256   bool visitBinaryOperator(BinaryOperator &I);
 257   bool visitLoad(LoadInst &I);
 258   bool visitStore(StoreInst &I);
 259   bool visitExtractValue(ExtractValueInst &I);
 260   bool visitInsertValue(InsertValueInst &I);
 261   bool visitCallSite(CallSite CS);
 262   bool visitReturnInst(ReturnInst &RI);
 263   bool visitBranchInst(BranchInst &BI);
 264   bool visitSelectInst(SelectInst &SI);
 265   bool visitSwitchInst(SwitchInst &SI);
 266   bool visitIndirectBrInst(IndirectBrInst &IBI);
 267   bool visitResumeInst(ResumeInst &RI);
 268   bool visitCleanupReturnInst(CleanupReturnInst &RI);
 269   bool visitCatchReturnInst(CatchReturnInst &RI);
 270   bool visitUnreachableInst(UnreachableInst &I);
 271
 272 public:
 273   CallAnalyzer(const TargetTransformInfo &TTI,
 274                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
 275                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
 276                ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
 277                Function &Callee, CallSite CSArg, const InlineParams &Params)
 278       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
 279         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
 280         CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
 281         Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
 282                                        Params.ComputeFullInlineCost || ORE),
 283         IsCallerRecursive(false), IsRecursiveCall(false),
 284         ExposesReturnsTwice(false), HasDynamicAlloca(false),
 285         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
 286         HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
 287         NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
 288         SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
 289         NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
 290         NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
 291         NumInstructionsSimplified(0), SROACostSavings(0),
 292         SROACostSavingsLost(0) {}
 293
 294   InlineResult analyzeCall(CallSite CS);
 295
 296   int getThreshold() { return Threshold; }
 297   int getCost() { return Cost; }
 298
 299   // Keep a bunch of stats about the cost savings found so we can print them
 300   // out when debugging.
 301   unsigned NumConstantArgs;
 302   unsigned NumConstantOffsetPtrArgs;
 303   unsigned NumAllocaArgs;
 304   unsigned NumConstantPtrCmps;
 305   unsigned NumConstantPtrDiffs;
 306   unsigned NumInstructionsSimplified;
 307   unsigned SROACostSavings;
 308   unsigned SROACostSavingsLost;
 309
 310   void dump();
 311 };
 312
 313 } // namespace
 314
 315 /// Test whether the given value is an Alloca-derived function argument.
 316 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
 317   return SROAArgValues.count(V);
 318 }
 319
 320 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
 321 /// Returns false if V does not map to a SROA-candidate.
 322 bool CallAnalyzer::lookupSROAArgAndCost(
 323     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
 324   if (SROAArgValues.empty() || SROAArgCosts.empty())
 325     return false;
 326
 327   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
 328   if (ArgIt == SROAArgValues.end())
 329     return false;
 330
 331   Arg = ArgIt->second;
 332   CostIt = SROAArgCosts.find(Arg);
 333   return CostIt != SROAArgCosts.end();
 334 }
 335
 336 /// Disable SROA for the candidate marked by this cost iterator.
 337 ///
 338 /// This marks the candidate as no longer viable for SROA, and adds the cost
 339 /// savings associated with it back into the inline cost measurement.
 340 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
 341   // If we're no longer able to perform SROA we need to undo its cost savings
 342   // and prevent subsequent analysis.
 343   Cost += CostIt->second;
 344   SROACostSavings -= CostIt->second;
 345   SROACostSavingsLost += CostIt->second;
 346   SROAArgCosts.erase(CostIt);
 347   disableLoadElimination();
 348 }
 349
 350 /// If 'V' maps to a SROA candidate, disable SROA for it.
 351 void CallAnalyzer::disableSROA(Value *V) {
 352   Value *SROAArg;
 353   DenseMap<Value *, int>::iterator CostIt;
 354   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
 355     disableSROA(CostIt);
 356 }
 357
 358 /// Accumulate the given cost for a particular SROA candidate.
 359 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
 360                                       int InstructionCost) {
 361   CostIt->second += InstructionCost;
 362   SROACostSavings += InstructionCost;
 363 }
 364
 365 void CallAnalyzer::disableLoadElimination() {
 366   if (EnableLoadElimination) {
 367     Cost += LoadEliminationCost;
 368     LoadEliminationCost = 0;
 369     EnableLoadElimination = false;
 370   }
 371 }
 372
 373 /// Accumulate a constant GEP offset into an APInt if possible.
 374 ///
 375 /// Returns false if unable to compute the offset for any reason. Respects any
 376 /// simplified values known during the analysis of this callsite.
 377 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
 378   unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
 379   assert(IntPtrWidth == Offset.getBitWidth());
 380
 381   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
 382        GTI != GTE; ++GTI) {
 383     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
 384     if (!OpC)
 385       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
 386         OpC = dyn_cast<ConstantInt>(SimpleOp);
 387     if (!OpC)
 388       return false;
 389     if (OpC->isZero())
 390       continue;
 391
 392     // Handle a struct index, which adds its field offset to the pointer.
 393     if (StructType *STy = GTI.getStructTypeOrNull()) {
 394       unsigned ElementIdx = OpC->getZExtValue();
 395       const StructLayout *SL = DL.getStructLayout(STy);
 396       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
 397       continue;
 398     }
 399
 400     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
 401     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
 402   }
 403   return true;
 404 }
 405
 406 /// Use TTI to check whether a GEP is free.
 407 ///
 408 /// Respects any simplified values known during the analysis of this callsite.
 409 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
 410   SmallVector<Value *, 4> Operands;
 411   Operands.push_back(GEP.getOperand(0));
 412   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
 413     if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
 414        Operands.push_back(SimpleOp);
 415      else
 416        Operands.push_back(*I);
 417   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
 418 }
 419
 420 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
 421   // Check whether inlining will turn a dynamic alloca into a static
 422   // alloca and handle that case.
 423   if (I.isArrayAllocation()) {
 424     Constant *Size = SimplifiedValues.lookup(I.getArraySize());
 425     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
 426       Type *Ty = I.getAllocatedType();
 427       AllocatedSize = SaturatingMultiplyAdd(
 428           AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
 429       return Base::visitAlloca(I);
 430     }
 431   }
 432
 433   // Accumulate the allocated size.
 434   if (I.isStaticAlloca()) {
 435     Type *Ty = I.getAllocatedType();
 436     AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
 437   }
 438
 439   // We will happily inline static alloca instructions.
 440   if (I.isStaticAlloca())
 441     return Base::visitAlloca(I);
 442
 443   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
 444   // a variety of reasons, and so we would like to not inline them into
 445   // functions which don't currently have a dynamic alloca. This simply
 446   // disables inlining altogether in the presence of a dynamic alloca.
 447   HasDynamicAlloca = true;
 448   return false;
 449 }
 450
 451 bool CallAnalyzer::visitPHI(PHINode &I) {
 452   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
 453   // though we don't want to propagate it's bonuses. The idea is to disable
 454   // SROA if it *might* be used in an inappropriate manner.
 455
 456   // Phi nodes are always zero-cost.
 457   // FIXME: Pointer sizes may differ between different address spaces, so do we
 458   // need to use correct address space in the call to getPointerSizeInBits here?
 459   // Or could we skip the getPointerSizeInBits call completely? As far as I can
 460   // see the ZeroOffset is used as a dummy value, so we can probably use any
 461   // bit width for the ZeroOffset?
 462   APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
 463   bool CheckSROA = I.getType()->isPointerTy();
 464
 465   // Track the constant or pointer with constant offset we've seen so far.
 466   Constant *FirstC = nullptr;
 467   std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
 468   Value *FirstV = nullptr;
 469
 470   for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
 471     BasicBlock *Pred = I.getIncomingBlock(i);
 472     // If the incoming block is dead, skip the incoming block.
 473     if (DeadBlocks.count(Pred))
 474       continue;
 475     // If the parent block of phi is not the known successor of the incoming
 476     // block, skip the incoming block.
 477     BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
 478     if (KnownSuccessor && KnownSuccessor != I.getParent())
 479       continue;
 480
 481     Value *V = I.getIncomingValue(i);
 482     // If the incoming value is this phi itself, skip the incoming value.
 483     if (&I == V)
 484       continue;
 485
 486     Constant *C = dyn_cast<Constant>(V);
 487     if (!C)
 488       C = SimplifiedValues.lookup(V);
 489
 490     std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
 491     if (!C && CheckSROA)
 492       BaseAndOffset = ConstantOffsetPtrs.lookup(V);
 493
 494     if (!C && !BaseAndOffset.first)
 495       // The incoming value is neither a constant nor a pointer with constant
 496       // offset, exit early.
 497       return true;
 498
 499     if (FirstC) {
 500       if (FirstC == C)
 501         // If we've seen a constant incoming value before and it is the same
 502         // constant we see this time, continue checking the next incoming value.
 503         continue;
 504       // Otherwise early exit because we either see a different constant or saw
 505       // a constant before but we have a pointer with constant offset this time.
 506       return true;
 507     }
 508
 509     if (FirstV) {
 510       // The same logic as above, but check pointer with constant offset here.
 511       if (FirstBaseAndOffset == BaseAndOffset)
 512         continue;
 513       return true;
 514     }
 515
 516     if (C) {
 517       // This is the 1st time we've seen a constant, record it.
 518       FirstC = C;
 519       continue;
 520     }
 521
 522     // The remaining case is that this is the 1st time we've seen a pointer with
 523     // constant offset, record it.
 524     FirstV = V;
 525     FirstBaseAndOffset = BaseAndOffset;
 526   }
 527
 528   // Check if we can map phi to a constant.
 529   if (FirstC) {
 530     SimplifiedValues[&I] = FirstC;
 531     return true;
 532   }
 533
 534   // Check if we can map phi to a pointer with constant offset.
 535   if (FirstBaseAndOffset.first) {
 536     ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
 537
 538     Value *SROAArg;
 539     DenseMap<Value *, int>::iterator CostIt;
 540     if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
 541       SROAArgValues[&I] = SROAArg;
 542   }
 543
 544   return true;
 545 }
 546
 547 /// Check we can fold GEPs of constant-offset call site argument pointers.
 548 /// This requires target data and inbounds GEPs.
 549 ///
 550 /// \return true if the specified GEP can be folded.
 551 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
 552   // Check if we have a base + offset for the pointer.
 553   std::pair<Value *, APInt> BaseAndOffset =
 554       ConstantOffsetPtrs.lookup(I.getPointerOperand());
 555   if (!BaseAndOffset.first)
 556     return false;
 557
 558   // Check if the offset of this GEP is constant, and if so accumulate it
 559   // into Offset.
 560   if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
 561     return false;
 562
 563   // Add the result as a new mapping to Base + Offset.
 564   ConstantOffsetPtrs[&I] = BaseAndOffset;
 565
 566   return true;
 567 }
 568
 569 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
 570   Value *SROAArg;
 571   DenseMap<Value *, int>::iterator CostIt;
 572   bool SROACandidate =
 573       lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
 574
 575   // Lambda to check whether a GEP's indices are all constant.
 576   auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
 577     for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
 578       if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
 579         return false;
 580     return true;
 581   };
 582
 583   if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
 584     if (SROACandidate)
 585       SROAArgValues[&I] = SROAArg;
 586
 587     // Constant GEPs are modeled as free.
 588     return true;
 589   }
 590
 591   // Variable GEPs will require math and will disable SROA.
 592   if (SROACandidate)
 593     disableSROA(CostIt);
 594   return isGEPFree(I);
 595 }
 596
 597 /// Simplify \p I if its operands are constants and update SimplifiedValues.
 598 /// \p Evaluate is a callable specific to instruction type that evaluates the
 599 /// instruction when all the operands are constants.
 600 template <typename Callable>
 601 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
 602   SmallVector<Constant *, 2> COps;
 603   for (Value *Op : I.operands()) {
 604     Constant *COp = dyn_cast<Constant>(Op);
 605     if (!COp)
 606       COp = SimplifiedValues.lookup(Op);
 607     if (!COp)
 608       return false;
 609     COps.push_back(COp);
 610   }
 611   auto *C = Evaluate(COps);
 612   if (!C)
 613     return false;
 614   SimplifiedValues[&I] = C;
 615   return true;
 616 }
 617
 618 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
 619   // Propagate constants through bitcasts.
 620   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 621         return ConstantExpr::getBitCast(COps[0], I.getType());
 622       }))
 623     return true;
 624
 625   // Track base/offsets through casts
 626   std::pair<Value *, APInt> BaseAndOffset =
 627       ConstantOffsetPtrs.lookup(I.getOperand(0));
 628   // Casts don't change the offset, just wrap it up.
 629   if (BaseAndOffset.first)
 630     ConstantOffsetPtrs[&I] = BaseAndOffset;
 631
 632   // Also look for SROA candidates here.
 633   Value *SROAArg;
 634   DenseMap<Value *, int>::iterator CostIt;
 635   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
 636     SROAArgValues[&I] = SROAArg;
 637
 638   // Bitcasts are always zero cost.
 639   return true;
 640 }
 641
 642 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
 643   // Propagate constants through ptrtoint.
 644   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 645         return ConstantExpr::getPtrToInt(COps[0], I.getType());
 646       }))
 647     return true;
 648
 649   // Track base/offset pairs when converted to a plain integer provided the
 650   // integer is large enough to represent the pointer.
 651   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
 652   unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
 653   if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
 654     std::pair<Value *, APInt> BaseAndOffset =
 655         ConstantOffsetPtrs.lookup(I.getOperand(0));
 656     if (BaseAndOffset.first)
 657       ConstantOffsetPtrs[&I] = BaseAndOffset;
 658   }
 659
 660   // This is really weird. Technically, ptrtoint will disable SROA. However,
 661   // unless that ptrtoint is *used* somewhere in the live basic blocks after
 662   // inlining, it will be nuked, and SROA should proceed. All of the uses which
 663   // would block SROA would also block SROA if applied directly to a pointer,
 664   // and so we can just add the integer in here. The only places where SROA is
 665   // preserved either cannot fire on an integer, or won't in-and-of themselves
 666   // disable SROA (ext) w/o some later use that we would see and disable.
 667   Value *SROAArg;
 668   DenseMap<Value *, int>::iterator CostIt;
 669   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
 670     SROAArgValues[&I] = SROAArg;
 671
 672   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
 673 }
 674
 675 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
 676   // Propagate constants through ptrtoint.
 677   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 678         return ConstantExpr::getIntToPtr(COps[0], I.getType());
 679       }))
 680     return true;
 681
 682   // Track base/offset pairs when round-tripped through a pointer without
 683   // modifications provided the integer is not too large.
 684   Value *Op = I.getOperand(0);
 685   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
 686   if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
 687     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
 688     if (BaseAndOffset.first)
 689       ConstantOffsetPtrs[&I] = BaseAndOffset;
 690   }
 691
 692   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
 693   Value *SROAArg;
 694   DenseMap<Value *, int>::iterator CostIt;
 695   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
 696     SROAArgValues[&I] = SROAArg;
 697
 698   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
 699 }
 700
 701 bool CallAnalyzer::visitCastInst(CastInst &I) {
 702   // Propagate constants through ptrtoint.
 703   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 704         return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
 705       }))
 706     return true;
 707
 708   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
 709   disableSROA(I.getOperand(0));
 710
 711   // If this is a floating-point cast, and the target says this operation
 712   // is expensive, this may eventually become a library call. Treat the cost
 713   // as such.
 714   switch (I.getOpcode()) {
 715   case Instruction::FPTrunc:
 716   case Instruction::FPExt:
 717   case Instruction::UIToFP:
 718   case Instruction::SIToFP:
 719   case Instruction::FPToUI:
 720   case Instruction::FPToSI:
 721     if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
 722       Cost += InlineConstants::CallPenalty;
 723   default:
 724     break;
 725   }
 726
 727   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
 728 }
 729
 730 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
 731   Value *Operand = I.getOperand(0);
 732   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 733         return ConstantFoldInstOperands(&I, COps[0], DL);
 734       }))
 735     return true;
 736
 737   // Disable any SROA on the argument to arbitrary unary operators.
 738   disableSROA(Operand);
 739
 740   return false;
 741 }
 742
 743 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
 744   return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
 745 }
 746
 747 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
 748   // Does the *call site* have the NonNull attribute set on an argument?  We
 749   // use the attribute on the call site to memoize any analysis done in the
 750   // caller. This will also trip if the callee function has a non-null
 751   // parameter attribute, but that's a less interesting case because hopefully
 752   // the callee would already have been simplified based on that.
 753   if (Argument *A = dyn_cast<Argument>(V))
 754     if (paramHasAttr(A, Attribute::NonNull))
 755       return true;
 756
 757   // Is this an alloca in the caller?  This is distinct from the attribute case
 758   // above because attributes aren't updated within the inliner itself and we
 759   // always want to catch the alloca derived case.
 760   if (isAllocaDerivedArg(V))
 761     // We can actually predict the result of comparisons between an
 762     // alloca-derived value and null. Note that this fires regardless of
 763     // SROA firing.
 764     return true;
 765
 766   return false;
 767 }
 768
 769 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
 770   // If the normal destination of the invoke or the parent block of the call
 771   // site is unreachable-terminated, there is little point in inlining this
 772   // unless there is literally zero cost.
 773   // FIXME: Note that it is possible that an unreachable-terminated block has a
 774   // hot entry. For example, in below scenario inlining hot_call_X() may be
 775   // beneficial :
 776   // main() {
 777   //   hot_call_1();
 778   //   ...
 779   //   hot_call_N()
 780   //   exit(0);
 781   // }
 782   // For now, we are not handling this corner case here as it is rare in real
 783   // code. In future, we should elaborate this based on BPI and BFI in more
 784   // general threshold adjusting heuristics in updateThreshold().
 785   Instruction *Instr = CS.getInstruction();
 786   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
 787     if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
 788       return false;
 789   } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
 790     return false;
 791
 792   return true;
 793 }
 794
 795 bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
 796   // If global profile summary is available, then callsite's coldness is
 797   // determined based on that.
 798   if (PSI && PSI->hasProfileSummary())
 799     return PSI->isColdCallSite(CS, CallerBFI);
 800
 801   // Otherwise we need BFI to be available.
 802   if (!CallerBFI)
 803     return false;
 804
 805   // Determine if the callsite is cold relative to caller's entry. We could
 806   // potentially cache the computation of scaled entry frequency, but the added
 807   // complexity is not worth it unless this scaling shows up high in the
 808   // profiles.
 809   const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
 810   auto CallSiteBB = CS.getInstruction()->getParent();
 811   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
 812   auto CallerEntryFreq =
 813       CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
 814   return CallSiteFreq < CallerEntryFreq * ColdProb;
 815 }
 816
 817 Optional<int>
 818 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
 819                                       BlockFrequencyInfo *CallerBFI) {
 820
 821   // If global profile summary is available, then callsite's hotness is
 822   // determined based on that.
 823   if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
 824     return Params.HotCallSiteThreshold;
 825
 826   // Otherwise we need BFI to be available and to have a locally hot callsite
 827   // threshold.
 828   if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
 829     return None;
 830
 831   // Determine if the callsite is hot relative to caller's entry. We could
 832   // potentially cache the computation of scaled entry frequency, but the added
 833   // complexity is not worth it unless this scaling shows up high in the
 834   // profiles.
 835   auto CallSiteBB = CS.getInstruction()->getParent();
 836   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
 837   auto CallerEntryFreq = CallerBFI->getEntryFreq();
 838   if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
 839     return Params.LocallyHotCallSiteThreshold;
 840
 841   // Otherwise treat it normally.
 842   return None;
 843 }
 844
 845 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
 846   // If no size growth is allowed for this inlining, set Threshold to 0.
 847   if (!allowSizeGrowth(CS)) {
 848     Threshold = 0;
 849     return;
 850   }
 851
 852   Function *Caller = CS.getCaller();
 853
 854   // return min(A, B) if B is valid.
 855   auto MinIfValid = [](int A, Optional<int> B) {
 856     return B ? std::min(A, B.getValue()) : A;
 857   };
 858
 859   // return max(A, B) if B is valid.
 860   auto MaxIfValid = [](int A, Optional<int> B) {
 861     return B ? std::max(A, B.getValue()) : A;
 862   };
 863
 864   // Various bonus percentages. These are multiplied by Threshold to get the
 865   // bonus values.
 866   // SingleBBBonus: This bonus is applied if the callee has a single reachable
 867   // basic block at the given callsite context. This is speculatively applied
 868   // and withdrawn if more than one basic block is seen.
 869   //
 870   // Vector bonuses: We want to more aggressively inline vector-dense kernels
 871   // and apply this bonus based on the percentage of vector instructions. A
 872   // bonus is applied if the vector instructions exceed 50% and half that amount
 873   // is applied if it exceeds 10%. Note that these bonuses are some what
 874   // arbitrary and evolved over time by accident as much as because they are
 875   // principled bonuses.
 876   // FIXME: It would be nice to base the bonus values on something more
 877   // scientific.
 878   //
 879   // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
 880   // of the last call to a static function as inlining such functions is
 881   // guaranteed to reduce code size.
 882   //
 883   // These bonus percentages may be set to 0 based on properties of the caller
 884   // and the callsite.
 885   int SingleBBBonusPercent = 50;
 886   int VectorBonusPercent = 150;
 887   int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
 888
 889   // Lambda to set all the above bonus and bonus percentages to 0.
 890   auto DisallowAllBonuses = [&]() {
 891     SingleBBBonusPercent = 0;
 892     VectorBonusPercent = 0;
 893     LastCallToStaticBonus = 0;
 894   };
 895
 896   // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
 897   // and reduce the threshold if the caller has the necessary attribute.
 898   if (Caller->optForMinSize()) {
 899     Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
 900     // For minsize, we want to disable the single BB bonus and the vector
 901     // bonuses, but not the last-call-to-static bonus. Inlining the last call to
 902     // a static function will, at the minimum, eliminate the parameter setup and
 903     // call/return instructions.
 904     SingleBBBonusPercent = 0;
 905     VectorBonusPercent = 0;
 906   } else if (Caller->optForSize())
 907     Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
 908
 909   // Adjust the threshold based on inlinehint attribute and profile based
 910   // hotness information if the caller does not have MinSize attribute.
 911   if (!Caller->optForMinSize()) {
 912     if (Callee.hasFnAttribute(Attribute::InlineHint))
 913       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
 914
 915     // FIXME: After switching to the new passmanager, simplify the logic below
 916     // by checking only the callsite hotness/coldness as we will reliably
 917     // have local profile information.
 918     //
 919     // Callsite hotness and coldness can be determined if sample profile is
 920     // used (which adds hotness metadata to calls) or if caller's
 921     // BlockFrequencyInfo is available.
 922     BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
 923     auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
 924     if (!Caller->optForSize() && HotCallSiteThreshold) {
 925       LLVM_DEBUG(dbgs() << "Hot callsite.\n");
 926       // FIXME: This should update the threshold only if it exceeds the
 927       // current threshold, but AutoFDO + ThinLTO currently relies on this
 928       // behavior to prevent inlining of hot callsites during ThinLTO
 929       // compile phase.
 930       Threshold = HotCallSiteThreshold.getValue();
 931     } else if (isColdCallSite(CS, CallerBFI)) {
 932       LLVM_DEBUG(dbgs() << "Cold callsite.\n");
 933       // Do not apply bonuses for a cold callsite including the
 934       // LastCallToStatic bonus. While this bonus might result in code size
 935       // reduction, it can cause the size of a non-cold caller to increase
 936       // preventing it from being inlined.
 937       DisallowAllBonuses();
 938       Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
 939     } else if (PSI) {
 940       // Use callee's global profile information only if we have no way of
 941       // determining this via callsite information.
 942       if (PSI->isFunctionEntryHot(&Callee)) {
 943         LLVM_DEBUG(dbgs() << "Hot callee.\n");
 944         // If callsite hotness can not be determined, we may still know
 945         // that the callee is hot and treat it as a weaker hint for threshold
 946         // increase.
 947         Threshold = MaxIfValid(Threshold, Params.HintThreshold);
 948       } else if (PSI->isFunctionEntryCold(&Callee)) {
 949         LLVM_DEBUG(dbgs() << "Cold callee.\n");
 950         // Do not apply bonuses for a cold callee including the
 951         // LastCallToStatic bonus. While this bonus might result in code size
 952         // reduction, it can cause the size of a non-cold caller to increase
 953         // preventing it from being inlined.
 954         DisallowAllBonuses();
 955         Threshold = MinIfValid(Threshold, Params.ColdThreshold);
 956       }
 957     }
 958   }
 959
 960   // Finally, take the target-specific inlining threshold multiplier into
 961   // account.
 962   Threshold *= TTI.getInliningThresholdMultiplier();
 963
 964   SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
 965   VectorBonus = Threshold * VectorBonusPercent / 100;
 966
 967   bool OnlyOneCallAndLocalLinkage =
 968       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
 969   // If there is only one call of the function, and it has internal linkage,
 970   // the cost of inlining it drops dramatically. It may seem odd to update
 971   // Cost in updateThreshold, but the bonus depends on the logic in this method.
 972   if (OnlyOneCallAndLocalLinkage)
 973     Cost -= LastCallToStaticBonus;
 974 }
 975
 976 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
 977   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 978   // First try to handle simplified comparisons.
 979   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 980         return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
 981       }))
 982     return true;
 983
 984   if (I.getOpcode() == Instruction::FCmp)
 985     return false;
 986
 987   // Otherwise look for a comparison between constant offset pointers with
 988   // a common base.
 989   Value *LHSBase, *RHSBase;
 990   APInt LHSOffset, RHSOffset;
 991   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
 992   if (LHSBase) {
 993     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
 994     if (RHSBase && LHSBase == RHSBase) {
 995       // We have common bases, fold the icmp to a constant based on the
 996       // offsets.
 997       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
 998       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
 999       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1000         SimplifiedValues[&I] = C;
1001         ++NumConstantPtrCmps;
1002         return true;
1003       }
1004     }
1005   }
1006
1007   // If the comparison is an equality comparison with null, we can simplify it
1008   // if we know the value (argument) can't be null
1009   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1010       isKnownNonNullInCallee(I.getOperand(0))) {
1011     bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1012     SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1013                                       : ConstantInt::getFalse(I.getType());
1014     return true;
1015   }
1016   // Finally check for SROA candidates in comparisons.
1017   Value *SROAArg;
1018   DenseMap<Value *, int>::iterator CostIt;
1019   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1020     if (isa<ConstantPointerNull>(I.getOperand(1))) {
1021       accumulateSROACost(CostIt, InlineConstants::InstrCost);
1022       return true;
1023     }
1024
1025     disableSROA(CostIt);
1026   }
1027
1028   return false;
1029 }
1030
1031 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1032   // Try to handle a special case: we can fold computing the difference of two
1033   // constant-related pointers.
1034   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1035   Value *LHSBase, *RHSBase;
1036   APInt LHSOffset, RHSOffset;
1037   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1038   if (LHSBase) {
1039     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1040     if (RHSBase && LHSBase == RHSBase) {
1041       // We have common bases, fold the subtract to a constant based on the
1042       // offsets.
1043       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1044       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1045       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1046         SimplifiedValues[&I] = C;
1047         ++NumConstantPtrDiffs;
1048         return true;
1049       }
1050     }
1051   }
1052
1053   // Otherwise, fall back to the generic logic for simplifying and handling
1054   // instructions.
1055   return Base::visitSub(I);
1056 }
1057
1058 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1059   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1060   Constant *CLHS = dyn_cast<Constant>(LHS);
1061   if (!CLHS)
1062     CLHS = SimplifiedValues.lookup(LHS);
1063   Constant *CRHS = dyn_cast<Constant>(RHS);
1064   if (!CRHS)
1065     CRHS = SimplifiedValues.lookup(RHS);
1066
1067   Value *SimpleV = nullptr;
1068   if (auto FI = dyn_cast<FPMathOperator>(&I))
1069     SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1070                               CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1071   else
1072     SimpleV =
1073         SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1074
1075   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1076     SimplifiedValues[&I] = C;
1077
1078   if (SimpleV)
1079     return true;
1080
1081   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1082   disableSROA(LHS);
1083   disableSROA(RHS);
1084
1085   // If the instruction is floating point, and the target says this operation
1086   // is expensive, this may eventually become a library call. Treat the cost
1087   // as such.
1088   if (I.getType()->isFloatingPointTy() &&
1089       TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
1090     Cost += InlineConstants::CallPenalty;
1091
1092   return false;
1093 }
1094
1095 bool CallAnalyzer::visitLoad(LoadInst &I) {
1096   Value *SROAArg;
1097   DenseMap<Value *, int>::iterator CostIt;
1098   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1099     if (I.isSimple()) {
1100       accumulateSROACost(CostIt, InlineConstants::InstrCost);
1101       return true;
1102     }
1103
1104     disableSROA(CostIt);
1105   }
1106
1107   // If the data is already loaded from this address and hasn't been clobbered
1108   // by any stores or calls, this load is likely to be redundant and can be
1109   // eliminated.
1110   if (EnableLoadElimination &&
1111       !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1112     LoadEliminationCost += InlineConstants::InstrCost;
1113     return true;
1114   }
1115
1116   return false;
1117 }
1118
1119 bool CallAnalyzer::visitStore(StoreInst &I) {
1120   Value *SROAArg;
1121   DenseMap<Value *, int>::iterator CostIt;
1122   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1123     if (I.isSimple()) {
1124       accumulateSROACost(CostIt, InlineConstants::InstrCost);
1125       return true;
1126     }
1127
1128     disableSROA(CostIt);
1129   }
1130
1131   // The store can potentially clobber loads and prevent repeated loads from
1132   // being eliminated.
1133   // FIXME:
1134   // 1. We can probably keep an initial set of eliminatable loads substracted
1135   // from the cost even when we finally see a store. We just need to disable
1136   // *further* accumulation of elimination savings.
1137   // 2. We should probably at some point thread MemorySSA for the callee into
1138   // this and then use that to actually compute *really* precise savings.
1139   disableLoadElimination();
1140   return false;
1141 }
1142
1143 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1144   // Constant folding for extract value is trivial.
1145   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1146         return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1147       }))
1148     return true;
1149
1150   // SROA can look through these but give them a cost.
1151   return false;
1152 }
1153
1154 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1155   // Constant folding for insert value is trivial.
1156   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1157         return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1158                                             /*InsertedValueOperand*/ COps[1],
1159                                             I.getIndices());
1160       }))
1161     return true;
1162
1163   // SROA can look through these but give them a cost.
1164   return false;
1165 }
1166
1167 /// Try to simplify a call site.
1168 ///
1169 /// Takes a concrete function and callsite and tries to actually simplify it by
1170 /// analyzing the arguments and call itself with instsimplify. Returns true if
1171 /// it has simplified the callsite to some other entity (a constant), making it
1172 /// free.
1173 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
1174   // FIXME: Using the instsimplify logic directly for this is inefficient
1175   // because we have to continually rebuild the argument list even when no
1176   // simplifications can be performed. Until that is fixed with remapping
1177   // inside of instsimplify, directly constant fold calls here.
1178   if (!canConstantFoldCallTo(CS, F))
1179     return false;
1180
1181   // Try to re-map the arguments to constants.
1182   SmallVector<Constant *, 4> ConstantArgs;
1183   ConstantArgs.reserve(CS.arg_size());
1184   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
1185        ++I) {
1186     Constant *C = dyn_cast<Constant>(*I);
1187     if (!C)
1188       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
1189     if (!C)
1190       return false; // This argument doesn't map to a constant.
1191
1192     ConstantArgs.push_back(C);
1193   }
1194   if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
1195     SimplifiedValues[CS.getInstruction()] = C;
1196     return true;
1197   }
1198
1199   return false;
1200 }
1201
1202 bool CallAnalyzer::visitCallSite(CallSite CS) {
1203   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
1204       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1205     // This aborts the entire analysis.
1206     ExposesReturnsTwice = true;
1207     return false;
1208   }
1209   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
1210     ContainsNoDuplicateCall = true;
1211
1212   if (Function *F = CS.getCalledFunction()) {
1213     // When we have a concrete function, first try to simplify it directly.
1214     if (simplifyCallSite(F, CS))
1215       return true;
1216
1217     // Next check if it is an intrinsic we know about.
1218     // FIXME: Lift this into part of the InstVisitor.
1219     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
1220       switch (II->getIntrinsicID()) {
1221       default:
1222         if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1223           disableLoadElimination();
1224         return Base::visitCallSite(CS);
1225
1226       case Intrinsic::load_relative:
1227         // This is normally lowered to 4 LLVM instructions.
1228         Cost += 3 * InlineConstants::InstrCost;
1229         return false;
1230
1231       case Intrinsic::memset:
1232       case Intrinsic::memcpy:
1233       case Intrinsic::memmove:
1234         disableLoadElimination();
1235         // SROA can usually chew through these intrinsics, but they aren't free.
1236         return false;
1237       case Intrinsic::icall_branch_funnel:
1238       case Intrinsic::localescape:
1239         HasUninlineableIntrinsic = true;
1240         return false;
1241       case Intrinsic::vastart:
1242         InitsVargArgs = true;
1243         return false;
1244       }
1245     }
1246
1247     if (F == CS.getInstruction()->getFunction()) {
1248       // This flag will fully abort the analysis, so don't bother with anything
1249       // else.
1250       IsRecursiveCall = true;
1251       return false;
1252     }
1253
1254     if (TTI.isLoweredToCall(F)) {
1255       // We account for the average 1 instruction per call argument setup
1256       // here.
1257       Cost += CS.arg_size() * InlineConstants::InstrCost;
1258
1259       // Everything other than inline ASM will also have a significant cost
1260       // merely from making the call.
1261       if (!isa<InlineAsm>(CS.getCalledValue()))
1262         Cost += InlineConstants::CallPenalty;
1263     }
1264
1265     if (!CS.onlyReadsMemory())
1266       disableLoadElimination();
1267     return Base::visitCallSite(CS);
1268   }
1269
1270   // Otherwise we're in a very special case -- an indirect function call. See
1271   // if we can be particularly clever about this.
1272   Value *Callee = CS.getCalledValue();
1273
1274   // First, pay the price of the argument setup. We account for the average
1275   // 1 instruction per call argument setup here.
1276   Cost += CS.arg_size() * InlineConstants::InstrCost;
1277
1278   // Next, check if this happens to be an indirect function call to a known
1279   // function in this inline context. If not, we've done all we can.
1280   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1281   if (!F) {
1282     if (!CS.onlyReadsMemory())
1283       disableLoadElimination();
1284     return Base::visitCallSite(CS);
1285   }
1286
1287   // If we have a constant that we are calling as a function, we can peer
1288   // through it and see the function target. This happens not infrequently
1289   // during devirtualization and so we want to give it a hefty bonus for
1290   // inlining, but cap that bonus in the event that inlining wouldn't pan
1291   // out. Pretend to inline the function, with a custom threshold.
1292   auto IndirectCallParams = Params;
1293   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
1294   CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
1295                   IndirectCallParams);
1296   if (CA.analyzeCall(CS)) {
1297     // We were able to inline the indirect call! Subtract the cost from the
1298     // threshold to get the bonus we want to apply, but don't go below zero.
1299     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1300   }
1301
1302   if (!F->onlyReadsMemory())
1303     disableLoadElimination();
1304   return Base::visitCallSite(CS);
1305 }
1306
1307 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1308   // At least one return instruction will be free after inlining.
1309   bool Free = !HasReturn;
1310   HasReturn = true;
1311   return Free;
1312 }
1313
1314 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1315   // We model unconditional branches as essentially free -- they really
1316   // shouldn't exist at all, but handling them makes the behavior of the
1317   // inliner more regular and predictable. Interestingly, conditional branches
1318   // which will fold away are also free.
1319   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1320          dyn_cast_or_null<ConstantInt>(
1321              SimplifiedValues.lookup(BI.getCondition()));
1322 }
1323
1324 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1325   bool CheckSROA = SI.getType()->isPointerTy();
1326   Value *TrueVal = SI.getTrueValue();
1327   Value *FalseVal = SI.getFalseValue();
1328
1329   Constant *TrueC = dyn_cast<Constant>(TrueVal);
1330   if (!TrueC)
1331     TrueC = SimplifiedValues.lookup(TrueVal);
1332   Constant *FalseC = dyn_cast<Constant>(FalseVal);
1333   if (!FalseC)
1334     FalseC = SimplifiedValues.lookup(FalseVal);
1335   Constant *CondC =
1336       dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1337
1338   if (!CondC) {
1339     // Select C, X, X => X
1340     if (TrueC == FalseC && TrueC) {
1341       SimplifiedValues[&SI] = TrueC;
1342       return true;
1343     }
1344
1345     if (!CheckSROA)
1346       return Base::visitSelectInst(SI);
1347
1348     std::pair<Value *, APInt> TrueBaseAndOffset =
1349         ConstantOffsetPtrs.lookup(TrueVal);
1350     std::pair<Value *, APInt> FalseBaseAndOffset =
1351         ConstantOffsetPtrs.lookup(FalseVal);
1352     if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1353       ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1354
1355       Value *SROAArg;
1356       DenseMap<Value *, int>::iterator CostIt;
1357       if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1358         SROAArgValues[&SI] = SROAArg;
1359       return true;
1360     }
1361
1362     return Base::visitSelectInst(SI);
1363   }
1364
1365   // Select condition is a constant.
1366   Value *SelectedV = CondC->isAllOnesValue()
1367                          ? TrueVal
1368                          : (CondC->isNullValue()) ? FalseVal : nullptr;
1369   if (!SelectedV) {
1370     // Condition is a vector constant that is not all 1s or all 0s.  If all
1371     // operands are constants, ConstantExpr::getSelect() can handle the cases
1372     // such as select vectors.
1373     if (TrueC && FalseC) {
1374       if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1375         SimplifiedValues[&SI] = C;
1376         return true;
1377       }
1378     }
1379     return Base::visitSelectInst(SI);
1380   }
1381
1382   // Condition is either all 1s or all 0s. SI can be simplified.
1383   if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1384     SimplifiedValues[&SI] = SelectedC;
1385     return true;
1386   }
1387
1388   if (!CheckSROA)
1389     return true;
1390
1391   std::pair<Value *, APInt> BaseAndOffset =
1392       ConstantOffsetPtrs.lookup(SelectedV);
1393   if (BaseAndOffset.first) {
1394     ConstantOffsetPtrs[&SI] = BaseAndOffset;
1395
1396     Value *SROAArg;
1397     DenseMap<Value *, int>::iterator CostIt;
1398     if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1399       SROAArgValues[&SI] = SROAArg;
1400   }
1401
1402   return true;
1403 }
1404
1405 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1406   // We model unconditional switches as free, see the comments on handling
1407   // branches.
1408   if (isa<ConstantInt>(SI.getCondition()))
1409     return true;
1410   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1411     if (isa<ConstantInt>(V))
1412       return true;
1413
1414   // Assume the most general case where the switch is lowered into
1415   // either a jump table, bit test, or a balanced binary tree consisting of
1416   // case clusters without merging adjacent clusters with the same
1417   // destination. We do not consider the switches that are lowered with a mix
1418   // of jump table/bit test/binary search tree. The cost of the switch is
1419   // proportional to the size of the tree or the size of jump table range.
1420   //
1421   // NB: We convert large switches which are just used to initialize large phi
1422   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1423   // inlining those. It will prevent inlining in cases where the optimization
1424   // does not (yet) fire.
1425
1426   // Maximum valid cost increased in this function.
1427   int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1428
1429   // Exit early for a large switch, assuming one case needs at least one
1430   // instruction.
1431   // FIXME: This is not true for a bit test, but ignore such case for now to
1432   // save compile-time.
1433   int64_t CostLowerBound =
1434       std::min((int64_t)CostUpperBound,
1435                (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1436
1437   if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1438     Cost = CostLowerBound;
1439     return false;
1440   }
1441
1442   unsigned JumpTableSize = 0;
1443   unsigned NumCaseCluster =
1444       TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1445
1446   // If suitable for a jump table, consider the cost for the table size and
1447   // branch to destination.
1448   if (JumpTableSize) {
1449     int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1450                      4 * InlineConstants::InstrCost;
1451
1452     Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1453     return false;
1454   }
1455
1456   // Considering forming a binary search, we should find the number of nodes
1457   // which is same as the number of comparisons when lowered. For a given
1458   // number of clusters, n, we can define a recursive function, f(n), to find
1459   // the number of nodes in the tree. The recursion is :
1460   // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1461   // and f(n) = n, when n <= 3.
1462   // This will lead a binary tree where the leaf should be either f(2) or f(3)
1463   // when n > 3.  So, the number of comparisons from leaves should be n, while
1464   // the number of non-leaf should be :
1465   //   2^(log2(n) - 1) - 1
1466   //   = 2^log2(n) * 2^-1 - 1
1467   //   = n / 2 - 1.
1468   // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1469   // number of comparisons in a simple closed form :
1470   //   n + n / 2 - 1 = n * 3 / 2 - 1
1471   if (NumCaseCluster <= 3) {
1472     // Suppose a comparison includes one compare and one conditional branch.
1473     Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1474     return false;
1475   }
1476
1477   int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1478   int64_t SwitchCost =
1479       ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1480
1481   Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1482   return false;
1483 }
1484
1485 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1486   // We never want to inline functions that contain an indirectbr.  This is
1487   // incorrect because all the blockaddress's (in static global initializers
1488   // for example) would be referring to the original function, and this
1489   // indirect jump would jump from the inlined copy of the function into the
1490   // original function which is extremely undefined behavior.
1491   // FIXME: This logic isn't really right; we can safely inline functions with
1492   // indirectbr's as long as no other function or global references the
1493   // blockaddress of a block within the current function.
1494   HasIndirectBr = true;
1495   return false;
1496 }
1497
1498 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1499   // FIXME: It's not clear that a single instruction is an accurate model for
1500   // the inline cost of a resume instruction.
1501   return false;
1502 }
1503
1504 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1505   // FIXME: It's not clear that a single instruction is an accurate model for
1506   // the inline cost of a cleanupret instruction.
1507   return false;
1508 }
1509
1510 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1511   // FIXME: It's not clear that a single instruction is an accurate model for
1512   // the inline cost of a catchret instruction.
1513   return false;
1514 }
1515
1516 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1517   // FIXME: It might be reasonably to discount the cost of instructions leading
1518   // to unreachable as they have the lowest possible impact on both runtime and
1519   // code size.
1520   return true; // No actual code is needed for unreachable.
1521 }
1522
1523 bool CallAnalyzer::visitInstruction(Instruction &I) {
1524   // Some instructions are free. All of the free intrinsics can also be
1525   // handled by SROA, etc.
1526   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1527     return true;
1528
1529   // We found something we don't understand or can't handle. Mark any SROA-able
1530   // values in the operand list as no longer viable.
1531   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1532     disableSROA(*OI);
1533
1534   return false;
1535 }
1536
1537 /// Analyze a basic block for its contribution to the inline cost.
1538 ///
1539 /// This method walks the analyzer over every instruction in the given basic
1540 /// block and accounts for their cost during inlining at this callsite. It
1541 /// aborts early if the threshold has been exceeded or an impossible to inline
1542 /// construct has been detected. It returns false if inlining is no longer
1543 /// viable, and true if inlining remains viable.
1544 InlineResult
1545 CallAnalyzer::analyzeBlock(BasicBlock *BB,
1546                            SmallPtrSetImpl<const Value *> &EphValues) {
1547   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1548     // FIXME: Currently, the number of instructions in a function regardless of
1549     // our ability to simplify them during inline to constants or dead code,
1550     // are actually used by the vector bonus heuristic. As long as that's true,
1551     // we have to special case debug intrinsics here to prevent differences in
1552     // inlining due to debug symbols. Eventually, the number of unsimplified
1553     // instructions shouldn't factor into the cost computation, but until then,
1554     // hack around it here.
1555     if (isa<DbgInfoIntrinsic>(I))
1556       continue;
1557
1558     // Skip ephemeral values.
1559     if (EphValues.count(&*I))
1560       continue;
1561
1562     ++NumInstructions;
1563     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1564       ++NumVectorInstructions;
1565
1566     // If the instruction simplified to a constant, there is no cost to this
1567     // instruction. Visit the instructions using our InstVisitor to account for
1568     // all of the per-instruction logic. The visit tree returns true if we
1569     // consumed the instruction in any way, and false if the instruction's base
1570     // cost should count against inlining.
1571     if (Base::visit(&*I))
1572       ++NumInstructionsSimplified;
1573     else
1574       Cost += InlineConstants::InstrCost;
1575
1576     using namespace ore;
1577     // If the visit this instruction detected an uninlinable pattern, abort.
1578     InlineResult IR;
1579     if (IsRecursiveCall)
1580       IR = "recursive";
1581     else if (ExposesReturnsTwice)
1582       IR = "exposes returns twice";
1583     else if (HasDynamicAlloca)
1584       IR = "dynamic alloca";
1585     else if (HasIndirectBr)
1586       IR = "indirect branch";
1587     else if (HasUninlineableIntrinsic)
1588       IR = "uninlinable intrinsic";
1589     else if (InitsVargArgs)
1590       IR = "varargs";
1591     if (!IR) {
1592       if (ORE)
1593         ORE->emit([&]() {
1594           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1595                                           CandidateCS.getInstruction())
1596                  << NV("Callee", &F) << " has uninlinable pattern ("
1597                  << NV("InlineResult", IR.message)
1598                  << ") and cost is not fully computed";
1599         });
1600       return IR;
1601     }
1602
1603     // If the caller is a recursive function then we don't want to inline
1604     // functions which allocate a lot of stack space because it would increase
1605     // the caller stack usage dramatically.
1606     if (IsCallerRecursive &&
1607         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
1608       InlineResult IR = "recursive and allocates too much stack space";
1609       if (ORE)
1610         ORE->emit([&]() {
1611           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1612                                           CandidateCS.getInstruction())
1613                  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
1614                  << ". Cost is not fully computed";
1615         });
1616       return IR;
1617     }
1618
1619     // Check if we've past the maximum possible threshold so we don't spin in
1620     // huge basic blocks that will never inline.
1621     if (Cost >= Threshold && !ComputeFullInlineCost)
1622       return false;
1623   }
1624
1625   return true;
1626 }
1627
1628 /// Compute the base pointer and cumulative constant offsets for V.
1629 ///
1630 /// This strips all constant offsets off of V, leaving it the base pointer, and
1631 /// accumulates the total constant offset applied in the returned constant. It
1632 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1633 /// no constant offsets applied.
1634 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1635   if (!V->getType()->isPointerTy())
1636     return nullptr;
1637
1638   unsigned AS = V->getType()->getPointerAddressSpace();
1639   unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1640   APInt Offset = APInt::getNullValue(IntPtrWidth);
1641
1642   // Even though we don't look through PHI nodes, we could be called on an
1643   // instruction in an unreachable block, which may be on a cycle.
1644   SmallPtrSet<Value *, 4> Visited;
1645   Visited.insert(V);
1646   do {
1647     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1648       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1649         return nullptr;
1650       V = GEP->getPointerOperand();
1651     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1652       V = cast<Operator>(V)->getOperand(0);
1653     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1654       if (GA->isInterposable())
1655         break;
1656       V = GA->getAliasee();
1657     } else {
1658       break;
1659     }
1660     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1661   } while (Visited.insert(V).second);
1662
1663   Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1664   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1665 }
1666
1667 /// Find dead blocks due to deleted CFG edges during inlining.
1668 ///
1669 /// If we know the successor of the current block, \p CurrBB, has to be \p
1670 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1671 /// no live incoming CFG edges.  If one block is found to be dead, we can
1672 /// continue growing the dead block list by checking the successors of the dead
1673 /// blocks to see if all their incoming edges are dead or not.
1674 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1675   auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1676     // A CFG edge is dead if the predecessor is dead or the predessor has a
1677     // known successor which is not the one under exam.
1678     return (DeadBlocks.count(Pred) ||
1679             (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1680   };
1681
1682   auto IsNewlyDead = [&](BasicBlock *BB) {
1683     // If all the edges to a block are dead, the block is also dead.
1684     return (!DeadBlocks.count(BB) &&
1685             llvm::all_of(predecessors(BB),
1686                          [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1687   };
1688
1689   for (BasicBlock *Succ : successors(CurrBB)) {
1690     if (Succ == NextBB || !IsNewlyDead(Succ))
1691       continue;
1692     SmallVector<BasicBlock *, 4> NewDead;
1693     NewDead.push_back(Succ);
1694     while (!NewDead.empty()) {
1695       BasicBlock *Dead = NewDead.pop_back_val();
1696       if (DeadBlocks.insert(Dead))
1697         // Continue growing the dead block lists.
1698         for (BasicBlock *S : successors(Dead))
1699           if (IsNewlyDead(S))
1700             NewDead.push_back(S);
1701     }
1702   }
1703 }
1704
1705 /// Analyze a call site for potential inlining.
1706 ///
1707 /// Returns true if inlining this call is viable, and false if it is not
1708 /// viable. It computes the cost and adjusts the threshold based on numerous
1709 /// factors and heuristics. If this method returns false but the computed cost
1710 /// is below the computed threshold, then inlining was forcibly disabled by
1711 /// some artifact of the routine.
1712 InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
1713   ++NumCallsAnalyzed;
1714
1715   // Perform some tweaks to the cost and threshold based on the direct
1716   // callsite information.
1717
1718   // We want to more aggressively inline vector-dense kernels, so up the
1719   // threshold, and we'll lower it if the % of vector instructions gets too
1720   // low. Note that these bonuses are some what arbitrary and evolved over time
1721   // by accident as much as because they are principled bonuses.
1722   //
1723   // FIXME: It would be nice to remove all such bonuses. At least it would be
1724   // nice to base the bonus values on something more scientific.
1725   assert(NumInstructions == 0);
1726   assert(NumVectorInstructions == 0);
1727
1728   // Update the threshold based on callsite properties
1729   updateThreshold(CS, F);
1730
1731   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1732   // this Threshold any time, and cost cannot decrease, we can stop processing
1733   // the rest of the function body.
1734   Threshold += (SingleBBBonus + VectorBonus);
1735
1736   // Give out bonuses for the callsite, as the instructions setting them up
1737   // will be gone after inlining.
1738   Cost -= getCallsiteCost(CS, DL);
1739
1740   // If this function uses the coldcc calling convention, prefer not to inline
1741   // it.
1742   if (F.getCallingConv() == CallingConv::Cold)
1743     Cost += InlineConstants::ColdccPenalty;
1744
1745   // Check if we're done. This can happen due to bonuses and penalties.
1746   if (Cost >= Threshold && !ComputeFullInlineCost)
1747     return "high cost";
1748
1749   if (F.empty())
1750     return true;
1751
1752   Function *Caller = CS.getInstruction()->getFunction();
1753   // Check if the caller function is recursive itself.
1754   for (User *U : Caller->users()) {
1755     CallSite Site(U);
1756     if (!Site)
1757       continue;
1758     Instruction *I = Site.getInstruction();
1759     if (I->getFunction() == Caller) {
1760       IsCallerRecursive = true;
1761       break;
1762     }
1763   }
1764
1765   // Populate our simplified values by mapping from function arguments to call
1766   // arguments with known important simplifications.
1767   CallSite::arg_iterator CAI = CS.arg_begin();
1768   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1769        FAI != FAE; ++FAI, ++CAI) {
1770     assert(CAI != CS.arg_end());
1771     if (Constant *C = dyn_cast<Constant>(CAI))
1772       SimplifiedValues[&*FAI] = C;
1773
1774     Value *PtrArg = *CAI;
1775     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1776       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1777
1778       // We can SROA any pointer arguments derived from alloca instructions.
1779       if (isa<AllocaInst>(PtrArg)) {
1780         SROAArgValues[&*FAI] = PtrArg;
1781         SROAArgCosts[PtrArg] = 0;
1782       }
1783     }
1784   }
1785   NumConstantArgs = SimplifiedValues.size();
1786   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1787   NumAllocaArgs = SROAArgValues.size();
1788
1789   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1790   // the ephemeral values multiple times (and they're completely determined by
1791   // the callee, so this is purely duplicate work).
1792   SmallPtrSet<const Value *, 32> EphValues;
1793   CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1794
1795   // The worklist of live basic blocks in the callee *after* inlining. We avoid
1796   // adding basic blocks of the callee which can be proven to be dead for this
1797   // particular call site in order to get more accurate cost estimates. This
1798   // requires a somewhat heavyweight iteration pattern: we need to walk the
1799   // basic blocks in a breadth-first order as we insert live successors. To
1800   // accomplish this, prioritizing for small iterations because we exit after
1801   // crossing our threshold, we use a small-size optimized SetVector.
1802   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1803                     SmallPtrSet<BasicBlock *, 16>>
1804       BBSetVector;
1805   BBSetVector BBWorklist;
1806   BBWorklist.insert(&F.getEntryBlock());
1807   bool SingleBB = true;
1808   // Note that we *must not* cache the size, this loop grows the worklist.
1809   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1810     // Bail out the moment we cross the threshold. This means we'll under-count
1811     // the cost, but only when undercounting doesn't matter.
1812     if (Cost >= Threshold && !ComputeFullInlineCost)
1813       break;
1814
1815     BasicBlock *BB = BBWorklist[Idx];
1816     if (BB->empty())
1817       continue;
1818
1819     // Disallow inlining a blockaddress. A blockaddress only has defined
1820     // behavior for an indirect branch in the same function, and we do not
1821     // currently support inlining indirect branches. But, the inliner may not
1822     // see an indirect branch that ends up being dead code at a particular call
1823     // site. If the blockaddress escapes the function, e.g., via a global
1824     // variable, inlining may lead to an invalid cross-function reference.
1825     if (BB->hasAddressTaken())
1826       return "blockaddress";
1827
1828     // Analyze the cost of this block. If we blow through the threshold, this
1829     // returns false, and we can bail on out.
1830     InlineResult IR = analyzeBlock(BB, EphValues);
1831     if (!IR)
1832       return IR;
1833
1834     TerminatorInst *TI = BB->getTerminator();
1835
1836     // Add in the live successors by first checking whether we have terminator
1837     // that may be simplified based on the values simplified by this call.
1838     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1839       if (BI->isConditional()) {
1840         Value *Cond = BI->getCondition();
1841         if (ConstantInt *SimpleCond =
1842                 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1843           BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1844           BBWorklist.insert(NextBB);
1845           KnownSuccessors[BB] = NextBB;
1846           findDeadBlocks(BB, NextBB);
1847           continue;
1848         }
1849       }
1850     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1851       Value *Cond = SI->getCondition();
1852       if (ConstantInt *SimpleCond =
1853               dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1854         BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1855         BBWorklist.insert(NextBB);
1856         KnownSuccessors[BB] = NextBB;
1857         findDeadBlocks(BB, NextBB);
1858         continue;
1859       }
1860     }
1861
1862     // If we're unable to select a particular successor, just count all of
1863     // them.
1864     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1865          ++TIdx)
1866       BBWorklist.insert(TI->getSuccessor(TIdx));
1867
1868     // If we had any successors at this point, than post-inlining is likely to
1869     // have them as well. Note that we assume any basic blocks which existed
1870     // due to branches or switches which folded above will also fold after
1871     // inlining.
1872     if (SingleBB && TI->getNumSuccessors() > 1) {
1873       // Take off the bonus we applied to the threshold.
1874       Threshold -= SingleBBBonus;
1875       SingleBB = false;
1876     }
1877   }
1878
1879   bool OnlyOneCallAndLocalLinkage =
1880       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1881   // If this is a noduplicate call, we can still inline as long as
1882   // inlining this would cause the removal of the caller (so the instruction
1883   // is not actually duplicated, just moved).
1884   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1885     return "noduplicate";
1886
1887   // We applied the maximum possible vector bonus at the beginning. Now,
1888   // subtract the excess bonus, if any, from the Threshold before
1889   // comparing against Cost.
1890   if (NumVectorInstructions <= NumInstructions / 10)
1891     Threshold -= VectorBonus;
1892   else if (NumVectorInstructions <= NumInstructions / 2)
1893     Threshold -= VectorBonus/2;
1894
1895   return Cost < std::max(1, Threshold);
1896 }
1897
1898 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1899 /// Dump stats about this call's analysis.
1900 LLVM_DUMP_METHOD void CallAnalyzer::dump() {
1901 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
1902   DEBUG_PRINT_STAT(NumConstantArgs);
1903   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1904   DEBUG_PRINT_STAT(NumAllocaArgs);
1905   DEBUG_PRINT_STAT(NumConstantPtrCmps);
1906   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1907   DEBUG_PRINT_STAT(NumInstructionsSimplified);
1908   DEBUG_PRINT_STAT(NumInstructions);
1909   DEBUG_PRINT_STAT(SROACostSavings);
1910   DEBUG_PRINT_STAT(SROACostSavingsLost);
1911   DEBUG_PRINT_STAT(LoadEliminationCost);
1912   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1913   DEBUG_PRINT_STAT(Cost);
1914   DEBUG_PRINT_STAT(Threshold);
1915 #undef DEBUG_PRINT_STAT
1916 }
1917 #endif
1918
1919 /// Test that there are no attribute conflicts between Caller and Callee
1920 ///        that prevent inlining.
1921 static bool functionsHaveCompatibleAttributes(Function *Caller,
1922                                               Function *Callee,
1923                                               TargetTransformInfo &TTI) {
1924   return TTI.areInlineCompatible(Caller, Callee) &&
1925          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1926 }
1927
1928 int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
1929   int Cost = 0;
1930   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1931     if (CS.isByValArgument(I)) {
1932       // We approximate the number of loads and stores needed by dividing the
1933       // size of the byval type by the target's pointer size.
1934       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1935       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1936       unsigned AS = PTy->getAddressSpace();
1937       unsigned PointerSize = DL.getPointerSizeInBits(AS);
1938       // Ceiling division.
1939       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1940
1941       // If it generates more than 8 stores it is likely to be expanded as an
1942       // inline memcpy so we take that as an upper bound. Otherwise we assume
1943       // one load and one store per word copied.
1944       // FIXME: The maxStoresPerMemcpy setting from the target should be used
1945       // here instead of a magic number of 8, but it's not available via
1946       // DataLayout.
1947       NumStores = std::min(NumStores, 8U);
1948
1949       Cost += 2 * NumStores * InlineConstants::InstrCost;
1950     } else {
1951       // For non-byval arguments subtract off one instruction per call
1952       // argument.
1953       Cost += InlineConstants::InstrCost;
1954     }
1955   }
1956   // The call instruction also disappears after inlining.
1957   Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1958   return Cost;
1959 }
1960
1961 InlineCost llvm::getInlineCost(
1962     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1963     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1964     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1965     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
1966   return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1967                        GetAssumptionCache, GetBFI, PSI, ORE);
1968 }
1969
1970 InlineCost llvm::getInlineCost(
1971     CallSite CS, Function *Callee, const InlineParams &Params,
1972     TargetTransformInfo &CalleeTTI,
1973     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1974     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1975     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
1976
1977   // Cannot inline indirect calls.
1978   if (!Callee)
1979     return llvm::InlineCost::getNever("indirect call");
1980
1981   // Never inline calls with byval arguments that does not have the alloca
1982   // address space. Since byval arguments can be replaced with a copy to an
1983   // alloca, the inlined code would need to be adjusted to handle that the
1984   // argument is in the alloca address space (so it is a little bit complicated
1985   // to solve).
1986   unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
1987   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
1988     if (CS.isByValArgument(I)) {
1989       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1990       if (PTy->getAddressSpace() != AllocaAS)
1991         return llvm::InlineCost::getNever("byval arguments without alloca"
1992                                           " address space");
1993     }
1994
1995   // Calls to functions with always-inline attributes should be inlined
1996   // whenever possible.
1997   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1998     if (isInlineViable(*Callee))
1999       return llvm::InlineCost::getAlways("always inline attribute");
2000     return llvm::InlineCost::getNever("inapplicable always inline attribute");
2001   }
2002
2003   // Never inline functions with conflicting attributes (unless callee has
2004   // always-inline attribute).
2005   Function *Caller = CS.getCaller();
2006   if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
2007     return llvm::InlineCost::getNever("conflicting attributes");
2008
2009   // Don't inline this call if the caller has the optnone attribute.
2010   if (Caller->hasFnAttribute(Attribute::OptimizeNone))
2011     return llvm::InlineCost::getNever("optnone attribute");
2012
2013   // Don't inline a function that treats null pointer as valid into a caller
2014   // that does not have this attribute.
2015   if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2016     return llvm::InlineCost::getNever("nullptr definitions incompatible");
2017
2018   // Don't inline functions which can be interposed at link-time.
2019   if (Callee->isInterposable())
2020     return llvm::InlineCost::getNever("interposable");
2021
2022   // Don't inline functions marked noinline.
2023   if (Callee->hasFnAttribute(Attribute::NoInline))
2024     return llvm::InlineCost::getNever("noinline function attribute");
2025
2026   // Don't inline call sites marked noinline.
2027   if (CS.isNoInline())
2028     return llvm::InlineCost::getNever("noinline call site attribute");
2029
2030   LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
2031                           << "... (caller:" << Caller->getName() << ")\n");
2032
2033   CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
2034                   Params);
2035   InlineResult ShouldInline = CA.analyzeCall(CS);
2036
2037   LLVM_DEBUG(CA.dump());
2038
2039   // Check if there was a reason to force inlining or no inlining.
2040   if (!ShouldInline && CA.getCost() < CA.getThreshold())
2041     return InlineCost::getNever(ShouldInline.message);
2042   if (ShouldInline && CA.getCost() >= CA.getThreshold())
2043     return InlineCost::getAlways("empty function");
2044
2045   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2046 }
2047
2048 bool llvm::isInlineViable(Function &F) {
2049   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2050   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2051     // Disallow inlining of functions which contain indirect branches or
2052     // blockaddresses.
2053     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
2054       return false;
2055
2056     for (auto &II : *BI) {
2057       CallSite CS(&II);
2058       if (!CS)
2059         continue;
2060
2061       // Disallow recursive calls.
2062       if (&F == CS.getCalledFunction())
2063         return false;
2064
2065       // Disallow calls which expose returns-twice to a function not previously
2066       // attributed as such.
2067       if (!ReturnsTwice && CS.isCall() &&
2068           cast<CallInst>(CS.getInstruction())->canReturnTwice())
2069         return false;
2070
2071       if (CS.getCalledFunction())
2072         switch (CS.getCalledFunction()->getIntrinsicID()) {
2073         default:
2074           break;
2075         // Disallow inlining of @llvm.icall.branch.funnel because current
2076         // backend can't separate call targets from call arguments.
2077         case llvm::Intrinsic::icall_branch_funnel:
2078         // Disallow inlining functions that call @llvm.localescape. Doing this
2079         // correctly would require major changes to the inliner.
2080         case llvm::Intrinsic::localescape:
2081         // Disallow inlining of functions that initialize VarArgs with va_start.
2082         case llvm::Intrinsic::vastart:
2083           return false;
2084         }
2085     }
2086   }
2087
2088   return true;
2089 }
2090
2091 // APIs to create InlineParams based on command line flags and/or other
2092 // parameters.
2093
2094 InlineParams llvm::getInlineParams(int Threshold) {
2095   InlineParams Params;
2096
2097   // This field is the threshold to use for a callee by default. This is
2098   // derived from one or more of:
2099   //  * optimization or size-optimization levels,
2100   //  * a value passed to createFunctionInliningPass function, or
2101   //  * the -inline-threshold flag.
2102   //  If the -inline-threshold flag is explicitly specified, that is used
2103   //  irrespective of anything else.
2104   if (InlineThreshold.getNumOccurrences() > 0)
2105     Params.DefaultThreshold = InlineThreshold;
2106   else
2107     Params.DefaultThreshold = Threshold;
2108
2109   // Set the HintThreshold knob from the -inlinehint-threshold.
2110   Params.HintThreshold = HintThreshold;
2111
2112   // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2113   Params.HotCallSiteThreshold = HotCallSiteThreshold;
2114
2115   // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2116   // populate LocallyHotCallSiteThreshold. Later, we populate
2117   // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2118   // we know that optimization level is O3 (in the getInlineParams variant that
2119   // takes the opt and size levels).
2120   // FIXME: Remove this check (and make the assignment unconditional) after
2121   // addressing size regression issues at O2.
2122   if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2123     Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
2124
2125   // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2126   Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
2127
2128   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2129   // -inlinehint-threshold commandline option is not explicitly given. If that
2130   // option is present, then its value applies even for callees with size and
2131   // minsize attributes.
2132   // If the -inline-threshold is not specified, set the ColdThreshold from the
2133   // -inlinecold-threshold even if it is not explicitly passed. If
2134   // -inline-threshold is specified, then -inlinecold-threshold needs to be
2135   // explicitly specified to set the ColdThreshold knob
2136   if (InlineThreshold.getNumOccurrences() == 0) {
2137     Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
2138     Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
2139     Params.ColdThreshold = ColdThreshold;
2140   } else if (ColdThreshold.getNumOccurrences() > 0) {
2141     Params.ColdThreshold = ColdThreshold;
2142   }
2143   return Params;
2144 }
2145
2146 InlineParams llvm::getInlineParams() {
2147   return getInlineParams(InlineThreshold);
2148 }
2149
2150 // Compute the default threshold for inlining based on the opt level and the
2151 // size opt level.
2152 static int computeThresholdFromOptLevels(unsigned OptLevel,
2153                                          unsigned SizeOptLevel) {
2154   if (OptLevel > 2)
2155     return InlineConstants::OptAggressiveThreshold;
2156   if (SizeOptLevel == 1) // -Os
2157     return InlineConstants::OptSizeThreshold;
2158   if (SizeOptLevel == 2) // -Oz
2159     return InlineConstants::OptMinSizeThreshold;
2160   return InlineThreshold;
2161 }
2162
2163 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2164   auto Params =
2165       getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2166   // At O3, use the value of -locally-hot-callsite-threshold option to populate
2167   // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2168   // when it is specified explicitly.
2169   if (OptLevel > 2)
2170     Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
2171   return Params;
2172 }