llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp

   1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file defines the WebAssembly-specific TargetTransformInfo
  11 /// implementation.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "WebAssemblyTargetTransformInfo.h"
  16 #include "llvm/CodeGen/CostTable.h"
  17 #include "llvm/Support/Debug.h"
  18 using namespace llvm;
  19
  20 #define DEBUG_TYPE "wasmtti"
  21
  22 TargetTransformInfo::PopcntSupportKind
  23 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
  24   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
  25   return TargetTransformInfo::PSK_FastHardware;
  26 }
  27
  28 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
  29   unsigned Result = BaseT::getNumberOfRegisters(ClassID);
  30
  31   // For SIMD, use at least 16 registers, as a rough guess.
  32   bool Vector = (ClassID == 1);
  33   if (Vector)
  34     Result = std::max(Result, 16u);
  35
  36   return Result;
  37 }
  38
  39 TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(
  40     TargetTransformInfo::RegisterKind K) const {
  41   switch (K) {
  42   case TargetTransformInfo::RGK_Scalar:
  43     return TypeSize::getFixed(64);
  44   case TargetTransformInfo::RGK_FixedWidthVector:
  45     return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
  46   case TargetTransformInfo::RGK_ScalableVector:
  47     return TypeSize::getScalable(0);
  48   }
  49
  50   llvm_unreachable("Unsupported register kind");
  51 }
  52
  53 InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
  54     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  55     TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
  56     TTI::OperandValueProperties Opd1PropInfo,
  57     TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
  58     const Instruction *CxtI) {
  59
  60   InstructionCost Cost =
  61       BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
  62           Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
  63
  64   if (auto *VTy = dyn_cast<VectorType>(Ty)) {
  65     switch (Opcode) {
  66     case Instruction::LShr:
  67     case Instruction::AShr:
  68     case Instruction::Shl:
  69       // SIMD128's shifts currently only accept a scalar shift count. For each
  70       // element, we'll need to extract, op, insert. The following is a rough
  71       // approxmation.
  72       if (Opd2Info != TTI::OK_UniformValue &&
  73           Opd2Info != TTI::OK_UniformConstantValue)
  74         Cost =
  75             cast<FixedVectorType>(VTy)->getNumElements() *
  76             (TargetTransformInfo::TCC_Basic +
  77              getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
  78              TargetTransformInfo::TCC_Basic);
  79       break;
  80     }
  81   }
  82   return Cost;
  83 }
  84
  85 InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode,
  86                                                        Type *Val,
  87                                                        unsigned Index) {
  88   InstructionCost Cost =
  89       BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
  90
  91   // SIMD128's insert/extract currently only take constant indices.
  92   if (Index == -1u)
  93     return Cost + 25 * TargetTransformInfo::TCC_Expensive;
  94
  95   return Cost;
  96 }
  97
  98 bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
  99                                              const Function *Callee) const {
 100   // Allow inlining only when the Callee has a subset of the Caller's
 101   // features. In principle, we should be able to inline regardless of any
 102   // features because WebAssembly supports features at module granularity, not
 103   // function granularity, but without this restriction it would be possible for
 104   // a module to "forget" about features if all the functions that used them
 105   // were inlined.
 106   const TargetMachine &TM = getTLI()->getTargetMachine();
 107
 108   const FeatureBitset &CallerBits =
 109       TM.getSubtargetImpl(*Caller)->getFeatureBits();
 110   const FeatureBitset &CalleeBits =
 111       TM.getSubtargetImpl(*Callee)->getFeatureBits();
 112
 113   return (CallerBits & CalleeBits) == CalleeBits;
 114 }
 115
 116 void WebAssemblyTTIImpl::getUnrollingPreferences(
 117     Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
 118     OptimizationRemarkEmitter *ORE) const {
 119   // Scan the loop: don't unroll loops with calls. This is a standard approach
 120   // for most (all?) targets.
 121   for (BasicBlock *BB : L->blocks())
 122     for (Instruction &I : *BB)
 123       if (isa<CallInst>(I) || isa<InvokeInst>(I))
 124         if (const Function *F = cast<CallBase>(I).getCalledFunction())
 125           if (isLoweredToCall(F))
 126             return;
 127
 128   // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
 129   // the various microarchitectures that use the BasicTTI implementation and
 130   // has been selected through heuristics across multiple cores and runtimes.
 131   UP.Partial = UP.Runtime = UP.UpperBound = true;
 132   UP.PartialThreshold = 30;
 133
 134   // Avoid unrolling when optimizing for size.
 135   UP.OptSizeThreshold = 0;
 136   UP.PartialOptSizeThreshold = 0;
 137
 138   // Set number of instructions optimized when "back edge"
 139   // becomes "fall through" to default value of 2.
 140   UP.BEInsns = 2;
 141 }