1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines the WebAssembly-specific TargetTransformInfo
13 //===----------------------------------------------------------------------===//
15 #include "WebAssemblyTargetTransformInfo.h"
16 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/Support/Debug.h"
20 #define DEBUG_TYPE "wasmtti"
22 TargetTransformInfo::PopcntSupportKind
23 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth
) const {
24 assert(isPowerOf2_32(TyWidth
) && "Ty width must be power of 2");
25 return TargetTransformInfo::PSK_FastHardware
;
28 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID
) const {
29 unsigned Result
= BaseT::getNumberOfRegisters(ClassID
);
31 // For SIMD, use at least 16 registers, as a rough guess.
32 bool Vector
= (ClassID
== 1);
34 Result
= std::max(Result
, 16u);
39 TypeSize
WebAssemblyTTIImpl::getRegisterBitWidth(
40 TargetTransformInfo::RegisterKind K
) const {
42 case TargetTransformInfo::RGK_Scalar
:
43 return TypeSize::getFixed(64);
44 case TargetTransformInfo::RGK_FixedWidthVector
:
45 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
46 case TargetTransformInfo::RGK_ScalableVector
:
47 return TypeSize::getScalable(0);
50 llvm_unreachable("Unsupported register kind");
53 InstructionCost
WebAssemblyTTIImpl::getArithmeticInstrCost(
54 unsigned Opcode
, Type
*Ty
, TTI::TargetCostKind CostKind
,
55 TTI::OperandValueKind Opd1Info
, TTI::OperandValueKind Opd2Info
,
56 TTI::OperandValueProperties Opd1PropInfo
,
57 TTI::OperandValueProperties Opd2PropInfo
, ArrayRef
<const Value
*> Args
,
58 const Instruction
*CxtI
) {
60 InstructionCost Cost
=
61 BasicTTIImplBase
<WebAssemblyTTIImpl
>::getArithmeticInstrCost(
62 Opcode
, Ty
, CostKind
, Opd1Info
, Opd2Info
, Opd1PropInfo
, Opd2PropInfo
);
64 if (auto *VTy
= dyn_cast
<VectorType
>(Ty
)) {
66 case Instruction::LShr
:
67 case Instruction::AShr
:
68 case Instruction::Shl
:
69 // SIMD128's shifts currently only accept a scalar shift count. For each
70 // element, we'll need to extract, op, insert. The following is a rough
72 if (Opd2Info
!= TTI::OK_UniformValue
&&
73 Opd2Info
!= TTI::OK_UniformConstantValue
)
75 cast
<FixedVectorType
>(VTy
)->getNumElements() *
76 (TargetTransformInfo::TCC_Basic
+
77 getArithmeticInstrCost(Opcode
, VTy
->getElementType(), CostKind
) +
78 TargetTransformInfo::TCC_Basic
);
85 InstructionCost
WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode
,
88 InstructionCost Cost
=
89 BasicTTIImplBase::getVectorInstrCost(Opcode
, Val
, Index
);
91 // SIMD128's insert/extract currently only take constant indices.
93 return Cost
+ 25 * TargetTransformInfo::TCC_Expensive
;
98 bool WebAssemblyTTIImpl::areInlineCompatible(const Function
*Caller
,
99 const Function
*Callee
) const {
100 // Allow inlining only when the Callee has a subset of the Caller's
101 // features. In principle, we should be able to inline regardless of any
102 // features because WebAssembly supports features at module granularity, not
103 // function granularity, but without this restriction it would be possible for
104 // a module to "forget" about features if all the functions that used them
106 const TargetMachine
&TM
= getTLI()->getTargetMachine();
108 const FeatureBitset
&CallerBits
=
109 TM
.getSubtargetImpl(*Caller
)->getFeatureBits();
110 const FeatureBitset
&CalleeBits
=
111 TM
.getSubtargetImpl(*Callee
)->getFeatureBits();
113 return (CallerBits
& CalleeBits
) == CalleeBits
;
116 void WebAssemblyTTIImpl::getUnrollingPreferences(
117 Loop
*L
, ScalarEvolution
&SE
, TTI::UnrollingPreferences
&UP
,
118 OptimizationRemarkEmitter
*ORE
) const {
119 // Scan the loop: don't unroll loops with calls. This is a standard approach
120 // for most (all?) targets.
121 for (BasicBlock
*BB
: L
->blocks())
122 for (Instruction
&I
: *BB
)
123 if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
124 if (const Function
*F
= cast
<CallBase
>(I
).getCalledFunction())
125 if (isLoweredToCall(F
))
128 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
129 // the various microarchitectures that use the BasicTTI implementation and
130 // has been selected through heuristics across multiple cores and runtimes.
131 UP
.Partial
= UP
.Runtime
= UP
.UpperBound
= true;
132 UP
.PartialThreshold
= 30;
134 // Avoid unrolling when optimizing for size.
135 UP
.OptSizeThreshold
= 0;
136 UP
.PartialOptSizeThreshold
= 0;
138 // Set number of instructions optimized when "back edge"
139 // becomes "fall through" to default value of 2.