lib/Analysis/TargetTransformInfo.cpp

   1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "llvm/Analysis/TargetTransformInfo.h"
  10 #include "llvm/Analysis/TargetTransformInfoImpl.h"
  11 #include "llvm/IR/CallSite.h"
  12 #include "llvm/IR/CFG.h"
  13 #include "llvm/IR/DataLayout.h"
  14 #include "llvm/IR/Instruction.h"
  15 #include "llvm/IR/Instructions.h"
  16 #include "llvm/IR/IntrinsicInst.h"
  17 #include "llvm/IR/Module.h"
  18 #include "llvm/IR/Operator.h"
  19 #include "llvm/IR/PatternMatch.h"
  20 #include "llvm/Support/CommandLine.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include "llvm/Analysis/CFG.h"
  23 #include "llvm/Analysis/LoopIterator.h"
  24 #include <utility>
  25
  26 using namespace llvm;
  27 using namespace PatternMatch;
  28
  29 #define DEBUG_TYPE "tti"
  30
  31 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
  32                                      cl::Hidden,
  33                                      cl::desc("Recognize reduction patterns."));
  34
  35 namespace {
  36 /// No-op implementation of the TTI interface using the utility base
  37 /// classes.
  38 ///
  39 /// This is used when no target specific information is available.
  40 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
  41   explicit NoTTIImpl(const DataLayout &DL)
  42       : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
  43 };
  44 }
  45
  46 bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
  47   // If the loop has irreducible control flow, it can not be converted to
  48   // Hardware loop.
  49   LoopBlocksRPO RPOT(L);
  50   RPOT.perform(&LI);
  51   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
  52     return false;
  53   return true;
  54 }
  55
  56 bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
  57                                                LoopInfo &LI, DominatorTree &DT,
  58                                                bool ForceNestedLoop,
  59                                                bool ForceHardwareLoopPHI) {
  60   SmallVector<BasicBlock *, 4> ExitingBlocks;
  61   L->getExitingBlocks(ExitingBlocks);
  62
  63   for (BasicBlock *BB : ExitingBlocks) {
  64     // If we pass the updated counter back through a phi, we need to know
  65     // which latch the updated value will be coming from.
  66     if (!L->isLoopLatch(BB)) {
  67       if (ForceHardwareLoopPHI || CounterInReg)
  68         continue;
  69     }
  70
  71     const SCEV *EC = SE.getExitCount(L, BB);
  72     if (isa<SCEVCouldNotCompute>(EC))
  73       continue;
  74     if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
  75       if (ConstEC->getValue()->isZero())
  76         continue;
  77     } else if (!SE.isLoopInvariant(EC, L))
  78       continue;
  79
  80     if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
  81       continue;
  82
  83     // If this exiting block is contained in a nested loop, it is not eligible
  84     // for insertion of the branch-and-decrement since the inner loop would
  85     // end up messing up the value in the CTR.
  86     if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
  87       continue;
  88
  89     // We now have a loop-invariant count of loop iterations (which is not the
  90     // constant zero) for which we know that this loop will not exit via this
  91     // existing block.
  92
  93     // We need to make sure that this block will run on every loop iteration.
  94     // For this to be true, we must dominate all blocks with backedges. Such
  95     // blocks are in-loop predecessors to the header block.
  96     bool NotAlways = false;
  97     for (BasicBlock *Pred : predecessors(L->getHeader())) {
  98       if (!L->contains(Pred))
  99         continue;
 100
 101       if (!DT.dominates(BB, Pred)) {
 102         NotAlways = true;
 103         break;
 104       }
 105     }
 106
 107     if (NotAlways)
 108       continue;
 109
 110     // Make sure this blocks ends with a conditional branch.
 111     Instruction *TI = BB->getTerminator();
 112     if (!TI)
 113       continue;
 114
 115     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
 116       if (!BI->isConditional())
 117         continue;
 118
 119       ExitBranch = BI;
 120     } else
 121       continue;
 122
 123     // Note that this block may not be the loop latch block, even if the loop
 124     // has a latch block.
 125     ExitBlock = BB;
 126     ExitCount = EC;
 127     break;
 128   }
 129
 130   if (!ExitBlock)
 131     return false;
 132   return true;
 133 }
 134
 135 TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
 136     : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
 137
 138 TargetTransformInfo::~TargetTransformInfo() {}
 139
 140 TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
 141     : TTIImpl(std::move(Arg.TTIImpl)) {}
 142
 143 TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
 144   TTIImpl = std::move(RHS.TTIImpl);
 145   return *this;
 146 }
 147
 148 int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
 149                                           Type *OpTy) const {
 150   int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
 151   assert(Cost >= 0 && "TTI should not produce negative costs!");
 152   return Cost;
 153 }
 154
 155 int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs,
 156                                      const User *U) const {
 157   int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
 158   assert(Cost >= 0 && "TTI should not produce negative costs!");
 159   return Cost;
 160 }
 161
 162 int TargetTransformInfo::getCallCost(const Function *F,
 163                                      ArrayRef<const Value *> Arguments,
 164                                      const User *U) const {
 165   int Cost = TTIImpl->getCallCost(F, Arguments, U);
 166   assert(Cost >= 0 && "TTI should not produce negative costs!");
 167   return Cost;
 168 }
 169
 170 unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
 171   return TTIImpl->getInliningThresholdMultiplier();
 172 }
 173
 174 int TargetTransformInfo::getInlinerVectorBonusPercent() const {
 175   return TTIImpl->getInlinerVectorBonusPercent();
 176 }
 177
 178 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
 179                                     ArrayRef<const Value *> Operands) const {
 180   return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
 181 }
 182
 183 int TargetTransformInfo::getExtCost(const Instruction *I,
 184                                     const Value *Src) const {
 185   return TTIImpl->getExtCost(I, Src);
 186 }
 187
 188 int TargetTransformInfo::getIntrinsicCost(
 189     Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments,
 190     const User *U) const {
 191   int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
 192   assert(Cost >= 0 && "TTI should not produce negative costs!");
 193   return Cost;
 194 }
 195
 196 unsigned
 197 TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
 198                                                       unsigned &JTSize) const {
 199   return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
 200 }
 201
 202 int TargetTransformInfo::getUserCost(const User *U,
 203     ArrayRef<const Value *> Operands) const {
 204   int Cost = TTIImpl->getUserCost(U, Operands);
 205   assert(Cost >= 0 && "TTI should not produce negative costs!");
 206   return Cost;
 207 }
 208
 209 bool TargetTransformInfo::hasBranchDivergence() const {
 210   return TTIImpl->hasBranchDivergence();
 211 }
 212
 213 bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
 214   return TTIImpl->isSourceOfDivergence(V);
 215 }
 216
 217 bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
 218   return TTIImpl->isAlwaysUniform(V);
 219 }
 220
 221 unsigned TargetTransformInfo::getFlatAddressSpace() const {
 222   return TTIImpl->getFlatAddressSpace();
 223 }
 224
 225 bool TargetTransformInfo::collectFlatAddressOperands(
 226   SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const  {
 227   return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
 228 }
 229
 230 bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
 231   IntrinsicInst *II, Value *OldV, Value *NewV) const {
 232   return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
 233 }
 234
 235 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
 236   return TTIImpl->isLoweredToCall(F);
 237 }
 238
 239 bool TargetTransformInfo::isHardwareLoopProfitable(
 240   Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
 241   TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
 242   return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
 243 }
 244
 245 void TargetTransformInfo::getUnrollingPreferences(
 246     Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
 247   return TTIImpl->getUnrollingPreferences(L, SE, UP);
 248 }
 249
 250 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
 251   return TTIImpl->isLegalAddImmediate(Imm);
 252 }
 253
 254 bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
 255   return TTIImpl->isLegalICmpImmediate(Imm);
 256 }
 257
 258 bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
 259                                                 int64_t BaseOffset,
 260                                                 bool HasBaseReg,
 261                                                 int64_t Scale,
 262                                                 unsigned AddrSpace,
 263                                                 Instruction *I) const {
 264   return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
 265                                         Scale, AddrSpace, I);
 266 }
 267
 268 bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
 269   return TTIImpl->isLSRCostLess(C1, C2);
 270 }
 271
 272 bool TargetTransformInfo::canMacroFuseCmp() const {
 273   return TTIImpl->canMacroFuseCmp();
 274 }
 275
 276 bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
 277                                      ScalarEvolution *SE, LoopInfo *LI,
 278                                      DominatorTree *DT, AssumptionCache *AC,
 279                                      TargetLibraryInfo *LibInfo) const {
 280   return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
 281 }
 282
 283 bool TargetTransformInfo::shouldFavorPostInc() const {
 284   return TTIImpl->shouldFavorPostInc();
 285 }
 286
 287 bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
 288   return TTIImpl->shouldFavorBackedgeIndex(L);
 289 }
 290
 291 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
 292                                              MaybeAlign Alignment) const {
 293   return TTIImpl->isLegalMaskedStore(DataType, Alignment);
 294 }
 295
 296 bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
 297                                             MaybeAlign Alignment) const {
 298   return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
 299 }
 300
 301 bool TargetTransformInfo::isLegalNTStore(Type *DataType,
 302                                          Align Alignment) const {
 303   return TTIImpl->isLegalNTStore(DataType, Alignment);
 304 }
 305
 306 bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
 307   return TTIImpl->isLegalNTLoad(DataType, Alignment);
 308 }
 309
 310 bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
 311   return TTIImpl->isLegalMaskedGather(DataType);
 312 }
 313
 314 bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
 315   return TTIImpl->isLegalMaskedScatter(DataType);
 316 }
 317
 318 bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
 319   return TTIImpl->isLegalMaskedCompressStore(DataType);
 320 }
 321
 322 bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
 323   return TTIImpl->isLegalMaskedExpandLoad(DataType);
 324 }
 325
 326 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
 327   return TTIImpl->hasDivRemOp(DataType, IsSigned);
 328 }
 329
 330 bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
 331                                              unsigned AddrSpace) const {
 332   return TTIImpl->hasVolatileVariant(I, AddrSpace);
 333 }
 334
 335 bool TargetTransformInfo::prefersVectorizedAddressing() const {
 336   return TTIImpl->prefersVectorizedAddressing();
 337 }
 338
 339 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
 340                                               int64_t BaseOffset,
 341                                               bool HasBaseReg,
 342                                               int64_t Scale,
 343                                               unsigned AddrSpace) const {
 344   int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
 345                                            Scale, AddrSpace);
 346   assert(Cost >= 0 && "TTI should not produce negative costs!");
 347   return Cost;
 348 }
 349
 350 bool TargetTransformInfo::LSRWithInstrQueries() const {
 351   return TTIImpl->LSRWithInstrQueries();
 352 }
 353
 354 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
 355   return TTIImpl->isTruncateFree(Ty1, Ty2);
 356 }
 357
 358 bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
 359   return TTIImpl->isProfitableToHoist(I);
 360 }
 361
 362 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
 363
 364 bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
 365   return TTIImpl->isTypeLegal(Ty);
 366 }
 367
 368 bool TargetTransformInfo::shouldBuildLookupTables() const {
 369   return TTIImpl->shouldBuildLookupTables();
 370 }
 371 bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const {
 372   return TTIImpl->shouldBuildLookupTablesForConstant(C);
 373 }
 374
 375 bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
 376   return TTIImpl->useColdCCForColdCall(F);
 377 }
 378
 379 unsigned TargetTransformInfo::
 380 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
 381   return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
 382 }
 383
 384 unsigned TargetTransformInfo::
 385 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
 386                                  unsigned VF) const {
 387   return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
 388 }
 389
 390 bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
 391   return TTIImpl->supportsEfficientVectorElementLoadStore();
 392 }
 393
 394 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
 395   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
 396 }
 397
 398 TargetTransformInfo::MemCmpExpansionOptions
 399 TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
 400   return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
 401 }
 402
 403 bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
 404   return TTIImpl->enableInterleavedAccessVectorization();
 405 }
 406
 407 bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
 408   return TTIImpl->enableMaskedInterleavedAccessVectorization();
 409 }
 410
 411 bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
 412   return TTIImpl->isFPVectorizationPotentiallyUnsafe();
 413 }
 414
 415 bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
 416                                                          unsigned BitWidth,
 417                                                          unsigned AddressSpace,
 418                                                          unsigned Alignment,
 419                                                          bool *Fast) const {
 420   return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
 421                                                  Alignment, Fast);
 422 }
 423
 424 TargetTransformInfo::PopcntSupportKind
 425 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
 426   return TTIImpl->getPopcntSupport(IntTyWidthInBit);
 427 }
 428
 429 bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
 430   return TTIImpl->haveFastSqrt(Ty);
 431 }
 432
 433 bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
 434   return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
 435 }
 436
 437 int TargetTransformInfo::getFPOpCost(Type *Ty) const {
 438   int Cost = TTIImpl->getFPOpCost(Ty);
 439   assert(Cost >= 0 && "TTI should not produce negative costs!");
 440   return Cost;
 441 }
 442
 443 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
 444                                                const APInt &Imm,
 445                                                Type *Ty) const {
 446   int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
 447   assert(Cost >= 0 && "TTI should not produce negative costs!");
 448   return Cost;
 449 }
 450
 451 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
 452   int Cost = TTIImpl->getIntImmCost(Imm, Ty);
 453   assert(Cost >= 0 && "TTI should not produce negative costs!");
 454   return Cost;
 455 }
 456
 457 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
 458                                        const APInt &Imm, Type *Ty) const {
 459   int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
 460   assert(Cost >= 0 && "TTI should not produce negative costs!");
 461   return Cost;
 462 }
 463
 464 int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
 465                                        const APInt &Imm, Type *Ty) const {
 466   int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
 467   assert(Cost >= 0 && "TTI should not produce negative costs!");
 468   return Cost;
 469 }
 470
 471 unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
 472   return TTIImpl->getNumberOfRegisters(ClassID);
 473 }
 474
 475 unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const {
 476   return TTIImpl->getRegisterClassForType(Vector, Ty);
 477 }
 478
 479 const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
 480   return TTIImpl->getRegisterClassName(ClassID);
 481 }
 482
 483 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
 484   return TTIImpl->getRegisterBitWidth(Vector);
 485 }
 486
 487 unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
 488   return TTIImpl->getMinVectorRegisterBitWidth();
 489 }
 490
 491 bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
 492   return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
 493 }
 494
 495 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
 496   return TTIImpl->getMinimumVF(ElemWidth);
 497 }
 498
 499 bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
 500     const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
 501   return TTIImpl->shouldConsiderAddressTypePromotion(
 502       I, AllowPromotionWithoutCommonHeader);
 503 }
 504
 505 unsigned TargetTransformInfo::getCacheLineSize() const {
 506   return TTIImpl->getCacheLineSize();
 507 }
 508
 509 llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level)
 510   const {
 511   return TTIImpl->getCacheSize(Level);
 512 }
 513
 514 llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity(
 515   CacheLevel Level) const {
 516   return TTIImpl->getCacheAssociativity(Level);
 517 }
 518
 519 unsigned TargetTransformInfo::getPrefetchDistance() const {
 520   return TTIImpl->getPrefetchDistance();
 521 }
 522
 523 unsigned TargetTransformInfo::getMinPrefetchStride() const {
 524   return TTIImpl->getMinPrefetchStride();
 525 }
 526
 527 unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
 528   return TTIImpl->getMaxPrefetchIterationsAhead();
 529 }
 530
 531 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
 532   return TTIImpl->getMaxInterleaveFactor(VF);
 533 }
 534
 535 TargetTransformInfo::OperandValueKind
 536 TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
 537   OperandValueKind OpInfo = OK_AnyValue;
 538   OpProps = OP_None;
 539
 540   if (auto *CI = dyn_cast<ConstantInt>(V)) {
 541     if (CI->getValue().isPowerOf2())
 542       OpProps = OP_PowerOf2;
 543     return OK_UniformConstantValue;
 544   }
 545
 546   // A broadcast shuffle creates a uniform value.
 547   // TODO: Add support for non-zero index broadcasts.
 548   // TODO: Add support for different source vector width.
 549   if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
 550     if (ShuffleInst->isZeroEltSplat())
 551       OpInfo = OK_UniformValue;
 552
 553   const Value *Splat = getSplatValue(V);
 554
 555   // Check for a splat of a constant or for a non uniform vector of constants
 556   // and check if the constant(s) are all powers of two.
 557   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
 558     OpInfo = OK_NonUniformConstantValue;
 559     if (Splat) {
 560       OpInfo = OK_UniformConstantValue;
 561       if (auto *CI = dyn_cast<ConstantInt>(Splat))
 562         if (CI->getValue().isPowerOf2())
 563           OpProps = OP_PowerOf2;
 564     } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
 565       OpProps = OP_PowerOf2;
 566       for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
 567         if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
 568           if (CI->getValue().isPowerOf2())
 569             continue;
 570         OpProps = OP_None;
 571         break;
 572       }
 573     }
 574   }
 575
 576   // Check for a splat of a uniform value. This is not loop aware, so return
 577   // true only for the obviously uniform cases (argument, globalvalue)
 578   if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
 579     OpInfo = OK_UniformValue;
 580
 581   return OpInfo;
 582 }
 583
 584 int TargetTransformInfo::getArithmeticInstrCost(
 585     unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
 586     OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
 587     OperandValueProperties Opd2PropInfo,
 588     ArrayRef<const Value *> Args) const {
 589   int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
 590                                              Opd1PropInfo, Opd2PropInfo, Args);
 591   assert(Cost >= 0 && "TTI should not produce negative costs!");
 592   return Cost;
 593 }
 594
 595 int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
 596                                         Type *SubTp) const {
 597   int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
 598   assert(Cost >= 0 && "TTI should not produce negative costs!");
 599   return Cost;
 600 }
 601
 602 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
 603                                  Type *Src, const Instruction *I) const {
 604   assert ((I == nullptr || I->getOpcode() == Opcode) &&
 605           "Opcode should reflect passed instruction.");
 606   int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
 607   assert(Cost >= 0 && "TTI should not produce negative costs!");
 608   return Cost;
 609 }
 610
 611 int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
 612                                                   VectorType *VecTy,
 613                                                   unsigned Index) const {
 614   int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
 615   assert(Cost >= 0 && "TTI should not produce negative costs!");
 616   return Cost;
 617 }
 618
 619 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
 620   int Cost = TTIImpl->getCFInstrCost(Opcode);
 621   assert(Cost >= 0 && "TTI should not produce negative costs!");
 622   return Cost;
 623 }
 624
 625 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 626                                  Type *CondTy, const Instruction *I) const {
 627   assert ((I == nullptr || I->getOpcode() == Opcode) &&
 628           "Opcode should reflect passed instruction.");
 629   int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 630   assert(Cost >= 0 && "TTI should not produce negative costs!");
 631   return Cost;
 632 }
 633
 634 int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
 635                                             unsigned Index) const {
 636   int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
 637   assert(Cost >= 0 && "TTI should not produce negative costs!");
 638   return Cost;
 639 }
 640
 641 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
 642                                          unsigned Alignment,
 643                                          unsigned AddressSpace,
 644                                          const Instruction *I) const {
 645   assert ((I == nullptr || I->getOpcode() == Opcode) &&
 646           "Opcode should reflect passed instruction.");
 647   int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
 648   assert(Cost >= 0 && "TTI should not produce negative costs!");
 649   return Cost;
 650 }
 651
 652 int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
 653                                                unsigned Alignment,
 654                                                unsigned AddressSpace) const {
 655   int Cost =
 656       TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
 657   assert(Cost >= 0 && "TTI should not produce negative costs!");
 658   return Cost;
 659 }
 660
 661 int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 662                                                 Value *Ptr, bool VariableMask,
 663                                                 unsigned Alignment) const {
 664   int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
 665                                              Alignment);
 666   assert(Cost >= 0 && "TTI should not produce negative costs!");
 667   return Cost;
 668 }
 669
 670 int TargetTransformInfo::getInterleavedMemoryOpCost(
 671     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
 672     unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
 673     bool UseMaskForGaps) const {
 674   int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
 675                                                  Alignment, AddressSpace,
 676                                                  UseMaskForCond,
 677                                                  UseMaskForGaps);
 678   assert(Cost >= 0 && "TTI should not produce negative costs!");
 679   return Cost;
 680 }
 681
 682 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 683                                     ArrayRef<Type *> Tys, FastMathFlags FMF,
 684                                     unsigned ScalarizationCostPassed) const {
 685   int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
 686                                             ScalarizationCostPassed);
 687   assert(Cost >= 0 && "TTI should not produce negative costs!");
 688   return Cost;
 689 }
 690
 691 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 692            ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
 693   int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
 694   assert(Cost >= 0 && "TTI should not produce negative costs!");
 695   return Cost;
 696 }
 697
 698 int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
 699                                           ArrayRef<Type *> Tys) const {
 700   int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
 701   assert(Cost >= 0 && "TTI should not produce negative costs!");
 702   return Cost;
 703 }
 704
 705 unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
 706   return TTIImpl->getNumberOfParts(Tp);
 707 }
 708
 709 int TargetTransformInfo::getAddressComputationCost(Type *Tp,
 710                                                    ScalarEvolution *SE,
 711                                                    const SCEV *Ptr) const {
 712   int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
 713   assert(Cost >= 0 && "TTI should not produce negative costs!");
 714   return Cost;
 715 }
 716
 717 int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
 718   int Cost = TTIImpl->getMemcpyCost(I);
 719   assert(Cost >= 0 && "TTI should not produce negative costs!");
 720   return Cost;
 721 }
 722
 723 int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
 724                                                     bool IsPairwiseForm) const {
 725   int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
 726   assert(Cost >= 0 && "TTI should not produce negative costs!");
 727   return Cost;
 728 }
 729
 730 int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy,
 731                                                 bool IsPairwiseForm,
 732                                                 bool IsUnsigned) const {
 733   int Cost =
 734       TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
 735   assert(Cost >= 0 && "TTI should not produce negative costs!");
 736   return Cost;
 737 }
 738
 739 unsigned
 740 TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
 741   return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
 742 }
 743
 744 bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
 745                                              MemIntrinsicInfo &Info) const {
 746   return TTIImpl->getTgtMemIntrinsic(Inst, Info);
 747 }
 748
 749 unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
 750   return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
 751 }
 752
 753 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
 754     IntrinsicInst *Inst, Type *ExpectedType) const {
 755   return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
 756 }
 757
 758 Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context,
 759                                                      Value *Length,
 760                                                      unsigned SrcAlign,
 761                                                      unsigned DestAlign) const {
 762   return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
 763                                             DestAlign);
 764 }
 765
 766 void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
 767     SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
 768     unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
 769   TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
 770                                              SrcAlign, DestAlign);
 771 }
 772
 773 bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
 774                                               const Function *Callee) const {
 775   return TTIImpl->areInlineCompatible(Caller, Callee);
 776 }
 777
 778 bool TargetTransformInfo::areFunctionArgsABICompatible(
 779     const Function *Caller, const Function *Callee,
 780     SmallPtrSetImpl<Argument *> &Args) const {
 781   return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
 782 }
 783
 784 bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
 785                                              Type *Ty) const {
 786   return TTIImpl->isIndexedLoadLegal(Mode, Ty);
 787 }
 788
 789 bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
 790                                               Type *Ty) const {
 791   return TTIImpl->isIndexedStoreLegal(Mode, Ty);
 792 }
 793
 794 unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
 795   return TTIImpl->getLoadStoreVecRegBitWidth(AS);
 796 }
 797
 798 bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
 799   return TTIImpl->isLegalToVectorizeLoad(LI);
 800 }
 801
 802 bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
 803   return TTIImpl->isLegalToVectorizeStore(SI);
 804 }
 805
 806 bool TargetTransformInfo::isLegalToVectorizeLoadChain(
 807     unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
 808   return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
 809                                               AddrSpace);
 810 }
 811
 812 bool TargetTransformInfo::isLegalToVectorizeStoreChain(
 813     unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
 814   return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
 815                                                AddrSpace);
 816 }
 817
 818 unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
 819                                                   unsigned LoadSize,
 820                                                   unsigned ChainSizeInBytes,
 821                                                   VectorType *VecTy) const {
 822   return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
 823 }
 824
 825 unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
 826                                                    unsigned StoreSize,
 827                                                    unsigned ChainSizeInBytes,
 828                                                    VectorType *VecTy) const {
 829   return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
 830 }
 831
 832 bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
 833                                                 Type *Ty, ReductionFlags Flags) const {
 834   return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
 835 }
 836
 837 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
 838   return TTIImpl->shouldExpandReduction(II);
 839 }
 840
 841 unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
 842   return TTIImpl->getGISelRematGlobalCost();
 843 }
 844
 845 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
 846   return TTIImpl->getInstructionLatency(I);
 847 }
 848
 849 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
 850                                      unsigned Level) {
 851   // We don't need a shuffle if we just want to have element 0 in position 0 of
 852   // the vector.
 853   if (!SI && Level == 0 && IsLeft)
 854     return true;
 855   else if (!SI)
 856     return false;
 857
 858   SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
 859
 860   // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
 861   // we look at the left or right side.
 862   for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
 863     Mask[i] = val;
 864
 865   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
 866   return Mask == ActualMask;
 867 }
 868
 869 namespace {
 870 /// Kind of the reduction data.
 871 enum ReductionKind {
 872   RK_None,           /// Not a reduction.
 873   RK_Arithmetic,     /// Binary reduction data.
 874   RK_MinMax,         /// Min/max reduction data.
 875   RK_UnsignedMinMax, /// Unsigned min/max reduction data.
 876 };
 877 /// Contains opcode + LHS/RHS parts of the reduction operations.
 878 struct ReductionData {
 879   ReductionData() = delete;
 880   ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
 881       : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
 882     assert(Kind != RK_None && "expected binary or min/max reduction only.");
 883   }
 884   unsigned Opcode = 0;
 885   Value *LHS = nullptr;
 886   Value *RHS = nullptr;
 887   ReductionKind Kind = RK_None;
 888   bool hasSameData(ReductionData &RD) const {
 889     return Kind == RD.Kind && Opcode == RD.Opcode;
 890   }
 891 };
 892 } // namespace
 893
 894 static Optional<ReductionData> getReductionData(Instruction *I) {
 895   Value *L, *R;
 896   if (m_BinOp(m_Value(L), m_Value(R)).match(I))
 897     return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
 898   if (auto *SI = dyn_cast<SelectInst>(I)) {
 899     if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
 900         m_SMax(m_Value(L), m_Value(R)).match(SI) ||
 901         m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
 902         m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
 903         m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
 904         m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
 905       auto *CI = cast<CmpInst>(SI->getCondition());
 906       return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
 907     }
 908     if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
 909         m_UMax(m_Value(L), m_Value(R)).match(SI)) {
 910       auto *CI = cast<CmpInst>(SI->getCondition());
 911       return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
 912     }
 913   }
 914   return llvm::None;
 915 }
 916
 917 static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
 918                                                    unsigned Level,
 919                                                    unsigned NumLevels) {
 920   // Match one level of pairwise operations.
 921   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
 922   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
 923   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
 924   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
 925   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
 926   if (!I)
 927     return RK_None;
 928
 929   assert(I->getType()->isVectorTy() && "Expecting a vector type");
 930
 931   Optional<ReductionData> RD = getReductionData(I);
 932   if (!RD)
 933     return RK_None;
 934
 935   ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
 936   if (!LS && Level)
 937     return RK_None;
 938   ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
 939   if (!RS && Level)
 940     return RK_None;
 941
 942   // On level 0 we can omit one shufflevector instruction.
 943   if (!Level && !RS && !LS)
 944     return RK_None;
 945
 946   // Shuffle inputs must match.
 947   Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
 948   Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
 949   Value *NextLevelOp = nullptr;
 950   if (NextLevelOpR && NextLevelOpL) {
 951     // If we have two shuffles their operands must match.
 952     if (NextLevelOpL != NextLevelOpR)
 953       return RK_None;
 954
 955     NextLevelOp = NextLevelOpL;
 956   } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
 957     // On the first level we can omit the shufflevector <0, undef,...>. So the
 958     // input to the other shufflevector <1, undef> must match with one of the
 959     // inputs to the current binary operation.
 960     // Example:
 961     //  %NextLevelOpL = shufflevector %R, <1, undef ...>
 962     //  %BinOp        = fadd          %NextLevelOpL, %R
 963     if (NextLevelOpL && NextLevelOpL != RD->RHS)
 964       return RK_None;
 965     else if (NextLevelOpR && NextLevelOpR != RD->LHS)
 966       return RK_None;
 967
 968     NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
 969   } else
 970     return RK_None;
 971
 972   // Check that the next levels binary operation exists and matches with the
 973   // current one.
 974   if (Level + 1 != NumLevels) {
 975     Optional<ReductionData> NextLevelRD =
 976         getReductionData(cast<Instruction>(NextLevelOp));
 977     if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
 978       return RK_None;
 979   }
 980
 981   // Shuffle mask for pairwise operation must match.
 982   if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
 983     if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
 984       return RK_None;
 985   } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
 986     if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
 987       return RK_None;
 988   } else {
 989     return RK_None;
 990   }
 991
 992   if (++Level == NumLevels)
 993     return RD->Kind;
 994
 995   // Match next level.
 996   return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
 997                                        NumLevels);
 998 }
 999
1000 static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
1001                                             unsigned &Opcode, Type *&Ty) {
1002   if (!EnableReduxCost)
1003     return RK_None;
1004
1005   // Need to extract the first element.
1006   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1007   unsigned Idx = ~0u;
1008   if (CI)
1009     Idx = CI->getZExtValue();
1010   if (Idx != 0)
1011     return RK_None;
1012
1013   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1014   if (!RdxStart)
1015     return RK_None;
1016   Optional<ReductionData> RD = getReductionData(RdxStart);
1017   if (!RD)
1018     return RK_None;
1019
1020   Type *VecTy = RdxStart->getType();
1021   unsigned NumVecElems = VecTy->getVectorNumElements();
1022   if (!isPowerOf2_32(NumVecElems))
1023     return RK_None;
1024
1025   // We look for a sequence of shuffle,shuffle,add triples like the following
1026   // that builds a pairwise reduction tree.
1027   //
1028   //  (X0, X1, X2, X3)
1029   //   (X0 + X1, X2 + X3, undef, undef)
1030   //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
1031   //
1032   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
1033   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
1034   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
1035   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1036   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
1037   // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1038   //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1039   // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1040   //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1041   // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
1042   // %r = extractelement <4 x float> %bin.rdx8, i32 0
1043   if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
1044       RK_None)
1045     return RK_None;
1046
1047   Opcode = RD->Opcode;
1048   Ty = VecTy;
1049
1050   return RD->Kind;
1051 }
1052
1053 static std::pair<Value *, ShuffleVectorInst *>
1054 getShuffleAndOtherOprd(Value *L, Value *R) {
1055   ShuffleVectorInst *S = nullptr;
1056
1057   if ((S = dyn_cast<ShuffleVectorInst>(L)))
1058     return std::make_pair(R, S);
1059
1060   S = dyn_cast<ShuffleVectorInst>(R);
1061   return std::make_pair(L, S);
1062 }
1063
1064 static ReductionKind
1065 matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
1066                               unsigned &Opcode, Type *&Ty) {
1067   if (!EnableReduxCost)
1068     return RK_None;
1069
1070   // Need to extract the first element.
1071   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1072   unsigned Idx = ~0u;
1073   if (CI)
1074     Idx = CI->getZExtValue();
1075   if (Idx != 0)
1076     return RK_None;
1077
1078   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1079   if (!RdxStart)
1080     return RK_None;
1081   Optional<ReductionData> RD = getReductionData(RdxStart);
1082   if (!RD)
1083     return RK_None;
1084
1085   Type *VecTy = ReduxRoot->getOperand(0)->getType();
1086   unsigned NumVecElems = VecTy->getVectorNumElements();
1087   if (!isPowerOf2_32(NumVecElems))
1088     return RK_None;
1089
1090   // We look for a sequence of shuffles and adds like the following matching one
1091   // fadd, shuffle vector pair at a time.
1092   //
1093   // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
1094   //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1095   // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
1096   // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
1097   //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1098   // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
1099   // %r = extractelement <4 x float> %bin.rdx8, i32 0
1100
1101   unsigned MaskStart = 1;
1102   Instruction *RdxOp = RdxStart;
1103   SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
1104   unsigned NumVecElemsRemain = NumVecElems;
1105   while (NumVecElemsRemain - 1) {
1106     // Check for the right reduction operation.
1107     if (!RdxOp)
1108       return RK_None;
1109     Optional<ReductionData> RDLevel = getReductionData(RdxOp);
1110     if (!RDLevel || !RDLevel->hasSameData(*RD))
1111       return RK_None;
1112
1113     Value *NextRdxOp;
1114     ShuffleVectorInst *Shuffle;
1115     std::tie(NextRdxOp, Shuffle) =
1116         getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
1117
1118     // Check the current reduction operation and the shuffle use the same value.
1119     if (Shuffle == nullptr)
1120       return RK_None;
1121     if (Shuffle->getOperand(0) != NextRdxOp)
1122       return RK_None;
1123
1124     // Check that shuffle masks matches.
1125     for (unsigned j = 0; j != MaskStart; ++j)
1126       ShuffleMask[j] = MaskStart + j;
1127     // Fill the rest of the mask with -1 for undef.
1128     std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
1129
1130     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
1131     if (ShuffleMask != Mask)
1132       return RK_None;
1133
1134     RdxOp = dyn_cast<Instruction>(NextRdxOp);
1135     NumVecElemsRemain /= 2;
1136     MaskStart *= 2;
1137   }
1138
1139   Opcode = RD->Opcode;
1140   Ty = VecTy;
1141   return RD->Kind;
1142 }
1143
1144 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1145   switch (I->getOpcode()) {
1146   case Instruction::GetElementPtr:
1147     return getUserCost(I);
1148
1149   case Instruction::Ret:
1150   case Instruction::PHI:
1151   case Instruction::Br: {
1152     return getCFInstrCost(I->getOpcode());
1153   }
1154   case Instruction::Add:
1155   case Instruction::FAdd:
1156   case Instruction::Sub:
1157   case Instruction::FSub:
1158   case Instruction::Mul:
1159   case Instruction::FMul:
1160   case Instruction::UDiv:
1161   case Instruction::SDiv:
1162   case Instruction::FDiv:
1163   case Instruction::URem:
1164   case Instruction::SRem:
1165   case Instruction::FRem:
1166   case Instruction::Shl:
1167   case Instruction::LShr:
1168   case Instruction::AShr:
1169   case Instruction::And:
1170   case Instruction::Or:
1171   case Instruction::Xor: {
1172     TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
1173     TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
1174     Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1175     Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1176     SmallVector<const Value *, 2> Operands(I->operand_values());
1177     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1178                                   Op1VP, Op2VP, Operands);
1179   }
1180   case Instruction::FNeg: {
1181     TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
1182     TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
1183     Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1184     Op2VK = OK_AnyValue;
1185     Op2VP = OP_None;
1186     SmallVector<const Value *, 2> Operands(I->operand_values());
1187     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1188                                   Op1VP, Op2VP, Operands);
1189   }
1190   case Instruction::Select: {
1191     const SelectInst *SI = cast<SelectInst>(I);
1192     Type *CondTy = SI->getCondition()->getType();
1193     return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1194   }
1195   case Instruction::ICmp:
1196   case Instruction::FCmp: {
1197     Type *ValTy = I->getOperand(0)->getType();
1198     return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1199   }
1200   case Instruction::Store: {
1201     const StoreInst *SI = cast<StoreInst>(I);
1202     Type *ValTy = SI->getValueOperand()->getType();
1203     return getMemoryOpCost(I->getOpcode(), ValTy,
1204                                 SI->getAlignment(),
1205                                 SI->getPointerAddressSpace(), I);
1206   }
1207   case Instruction::Load: {
1208     const LoadInst *LI = cast<LoadInst>(I);
1209     return getMemoryOpCost(I->getOpcode(), I->getType(),
1210                                 LI->getAlignment(),
1211                                 LI->getPointerAddressSpace(), I);
1212   }
1213   case Instruction::ZExt:
1214   case Instruction::SExt:
1215   case Instruction::FPToUI:
1216   case Instruction::FPToSI:
1217   case Instruction::FPExt:
1218   case Instruction::PtrToInt:
1219   case Instruction::IntToPtr:
1220   case Instruction::SIToFP:
1221   case Instruction::UIToFP:
1222   case Instruction::Trunc:
1223   case Instruction::FPTrunc:
1224   case Instruction::BitCast:
1225   case Instruction::AddrSpaceCast: {
1226     Type *SrcTy = I->getOperand(0)->getType();
1227     return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1228   }
1229   case Instruction::ExtractElement: {
1230     const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1231     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
1232     unsigned Idx = -1;
1233     if (CI)
1234       Idx = CI->getZExtValue();
1235
1236     // Try to match a reduction sequence (series of shufflevector and vector
1237     // adds followed by a extractelement).
1238     unsigned ReduxOpCode;
1239     Type *ReduxType;
1240
1241     switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1242     case RK_Arithmetic:
1243       return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1244                                              /*IsPairwiseForm=*/false);
1245     case RK_MinMax:
1246       return getMinMaxReductionCost(
1247           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1248           /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1249     case RK_UnsignedMinMax:
1250       return getMinMaxReductionCost(
1251           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1252           /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1253     case RK_None:
1254       break;
1255     }
1256
1257     switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1258     case RK_Arithmetic:
1259       return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1260                                              /*IsPairwiseForm=*/true);
1261     case RK_MinMax:
1262       return getMinMaxReductionCost(
1263           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1264           /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1265     case RK_UnsignedMinMax:
1266       return getMinMaxReductionCost(
1267           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1268           /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1269     case RK_None:
1270       break;
1271     }
1272
1273     return getVectorInstrCost(I->getOpcode(),
1274                                    EEI->getOperand(0)->getType(), Idx);
1275   }
1276   case Instruction::InsertElement: {
1277     const InsertElementInst * IE = cast<InsertElementInst>(I);
1278     ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1279     unsigned Idx = -1;
1280     if (CI)
1281       Idx = CI->getZExtValue();
1282     return getVectorInstrCost(I->getOpcode(),
1283                                    IE->getType(), Idx);
1284   }
1285   case Instruction::ExtractValue:
1286     return 0; // Model all ExtractValue nodes as free.
1287   case Instruction::ShuffleVector: {
1288     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1289     Type *Ty = Shuffle->getType();
1290     Type *SrcTy = Shuffle->getOperand(0)->getType();
1291
1292     // TODO: Identify and add costs for insert subvector, etc.
1293     int SubIndex;
1294     if (Shuffle->isExtractSubvectorMask(SubIndex))
1295       return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1296
1297     if (Shuffle->changesLength())
1298       return -1;
1299
1300     if (Shuffle->isIdentity())
1301       return 0;
1302
1303     if (Shuffle->isReverse())
1304       return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1305
1306     if (Shuffle->isSelect())
1307       return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1308
1309     if (Shuffle->isTranspose())
1310       return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1311
1312     if (Shuffle->isZeroEltSplat())
1313       return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1314
1315     if (Shuffle->isSingleSource())
1316       return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1317
1318     return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1319   }
1320   case Instruction::Call:
1321     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1322       SmallVector<Value *, 4> Args(II->arg_operands());
1323
1324       FastMathFlags FMF;
1325       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1326         FMF = FPMO->getFastMathFlags();
1327
1328       return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1329                                         Args, FMF);
1330     }
1331     return -1;
1332   default:
1333     // We don't have any information on this instruction.
1334     return -1;
1335   }
1336 }
1337
1338 TargetTransformInfo::Concept::~Concept() {}
1339
1340 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1341
1342 TargetIRAnalysis::TargetIRAnalysis(
1343     std::function<Result(const Function &)> TTICallback)
1344     : TTICallback(std::move(TTICallback)) {}
1345
1346 TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
1347                                                FunctionAnalysisManager &) {
1348   return TTICallback(F);
1349 }
1350
1351 AnalysisKey TargetIRAnalysis::Key;
1352
1353 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1354   return Result(F.getParent()->getDataLayout());
1355 }
1356
1357 // Register the basic pass.
1358 INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
1359                 "Target Transform Information", false, true)
1360 char TargetTransformInfoWrapperPass::ID = 0;
1361
1362 void TargetTransformInfoWrapperPass::anchor() {}
1363
1364 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
1365     : ImmutablePass(ID) {
1366   initializeTargetTransformInfoWrapperPassPass(
1367       *PassRegistry::getPassRegistry());
1368 }
1369
1370 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
1371     TargetIRAnalysis TIRA)
1372     : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1373   initializeTargetTransformInfoWrapperPassPass(
1374       *PassRegistry::getPassRegistry());
1375 }
1376
1377 TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
1378   FunctionAnalysisManager DummyFAM;
1379   TTI = TIRA.run(F, DummyFAM);
1380   return *TTI;
1381 }
1382
1383 ImmutablePass *
1384 llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
1385   return new TargetTransformInfoWrapperPass(std::move(TIRA));
1386 }