[Alignment][NFC] Use Align with TargetLowering::setMinFunctionAlignment
[llvm-core.git] / include / llvm / Analysis / TargetTransformInfoImpl.h
blobcc41488f517d3979b944e1b6a990823664b77b47
1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file provides helpers for the implementation of
10 /// a TargetTransformInfo-conforming class.
11 ///
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
17 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/Analysis/VectorUtils.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GetElementPtrTypeIterator.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/Type.h"
27 namespace llvm {
29 /// Base class for use as a mix-in that aids implementing
30 /// a TargetTransformInfo-compatible class.
31 class TargetTransformInfoImplBase {
32 protected:
33 typedef TargetTransformInfo TTI;
35 const DataLayout &DL;
37 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
39 public:
40 // Provide value semantics. MSVC requires that we spell all of these out.
41 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42 : DL(Arg.DL) {}
43 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
45 const DataLayout &getDataLayout() const { return DL; }
47 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
48 switch (Opcode) {
49 default:
50 // By default, just classify everything as 'basic'.
51 return TTI::TCC_Basic;
53 case Instruction::GetElementPtr:
54 llvm_unreachable("Use getGEPCost for GEP operations!");
56 case Instruction::BitCast:
57 assert(OpTy && "Cast instructions must provide the operand type");
58 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
59 // Identity and pointer-to-pointer casts are free.
60 return TTI::TCC_Free;
62 // Otherwise, the default basic cost is used.
63 return TTI::TCC_Basic;
65 case Instruction::FDiv:
66 case Instruction::FRem:
67 case Instruction::SDiv:
68 case Instruction::SRem:
69 case Instruction::UDiv:
70 case Instruction::URem:
71 return TTI::TCC_Expensive;
73 case Instruction::IntToPtr: {
74 // An inttoptr cast is free so long as the input is a legal integer type
75 // which doesn't contain values outside the range of a pointer.
76 unsigned OpSize = OpTy->getScalarSizeInBits();
77 if (DL.isLegalInteger(OpSize) &&
78 OpSize <= DL.getPointerTypeSizeInBits(Ty))
79 return TTI::TCC_Free;
81 // Otherwise it's not a no-op.
82 return TTI::TCC_Basic;
84 case Instruction::PtrToInt: {
85 // A ptrtoint cast is free so long as the result is large enough to store
86 // the pointer, and a legal integer type.
87 unsigned DestSize = Ty->getScalarSizeInBits();
88 if (DL.isLegalInteger(DestSize) &&
89 DestSize >= DL.getPointerTypeSizeInBits(OpTy))
90 return TTI::TCC_Free;
92 // Otherwise it's not a no-op.
93 return TTI::TCC_Basic;
95 case Instruction::Trunc:
96 // trunc to a native type is free (assuming the target has compare and
97 // shift-right of the same width).
98 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
99 return TTI::TCC_Free;
101 return TTI::TCC_Basic;
105 int getGEPCost(Type *PointeeType, const Value *Ptr,
106 ArrayRef<const Value *> Operands) {
107 // In the basic model, we just assume that all-constant GEPs will be folded
108 // into their uses via addressing modes.
109 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
110 if (!isa<Constant>(Operands[Idx]))
111 return TTI::TCC_Basic;
113 return TTI::TCC_Free;
116 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
117 unsigned &JTSize) {
118 JTSize = 0;
119 return SI.getNumCases();
122 int getExtCost(const Instruction *I, const Value *Src) {
123 return TTI::TCC_Basic;
126 unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
127 assert(FTy && "FunctionType must be provided to this routine.");
129 // The target-independent implementation just measures the size of the
130 // function by approximating that each argument will take on average one
131 // instruction to prepare.
133 if (NumArgs < 0)
134 // Set the argument number to the number of explicit arguments in the
135 // function.
136 NumArgs = FTy->getNumParams();
138 return TTI::TCC_Basic * (NumArgs + 1);
141 unsigned getInliningThresholdMultiplier() { return 1; }
143 int getInlinerVectorBonusPercent() { return 150; }
145 unsigned getMemcpyCost(const Instruction *I) {
146 return TTI::TCC_Expensive;
149 bool hasBranchDivergence() { return false; }
151 bool isSourceOfDivergence(const Value *V) { return false; }
153 bool isAlwaysUniform(const Value *V) { return false; }
155 unsigned getFlatAddressSpace () {
156 return -1;
159 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
160 Intrinsic::ID IID) const {
161 return false;
164 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
165 Value *OldV, Value *NewV) const {
166 return false;
169 bool isLoweredToCall(const Function *F) {
170 assert(F && "A concrete function must be provided to this routine.");
172 // FIXME: These should almost certainly not be handled here, and instead
173 // handled with the help of TLI or the target itself. This was largely
174 // ported from existing analysis heuristics here so that such refactorings
175 // can take place in the future.
177 if (F->isIntrinsic())
178 return false;
180 if (F->hasLocalLinkage() || !F->hasName())
181 return true;
183 StringRef Name = F->getName();
185 // These will all likely lower to a single selection DAG node.
186 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
187 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
188 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
189 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
190 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
191 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
192 return false;
194 // These are all likely to be optimized into something smaller.
195 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
196 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
197 Name == "floorf" || Name == "ceil" || Name == "round" ||
198 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
199 Name == "llabs")
200 return false;
202 return true;
205 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
206 AssumptionCache &AC,
207 TargetLibraryInfo *LibInfo,
208 HardwareLoopInfo &HWLoopInfo) {
209 return false;
212 void getUnrollingPreferences(Loop *, ScalarEvolution &,
213 TTI::UnrollingPreferences &) {}
215 bool isLegalAddImmediate(int64_t Imm) { return false; }
217 bool isLegalICmpImmediate(int64_t Imm) { return false; }
219 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
220 bool HasBaseReg, int64_t Scale,
221 unsigned AddrSpace, Instruction *I = nullptr) {
222 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
223 // taken from the implementation of LSR.
224 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
227 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
228 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
229 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
230 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
231 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
234 bool canMacroFuseCmp() { return false; }
236 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
237 DominatorTree *DT, AssumptionCache *AC,
238 TargetLibraryInfo *LibInfo) {
239 return false;
242 bool shouldFavorPostInc() const { return false; }
244 bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
246 bool isLegalMaskedStore(Type *DataType) { return false; }
248 bool isLegalMaskedLoad(Type *DataType) { return false; }
250 bool isLegalNTStore(Type *DataType, llvm::Align Alignment) {
251 // By default, assume nontemporal memory stores are available for stores
252 // that are aligned and have a size that is a power of 2.
253 unsigned DataSize = DL.getTypeStoreSize(DataType);
254 return Alignment >= DataSize && isPowerOf2_32(DataSize);
257 bool isLegalNTLoad(Type *DataType, llvm::Align Alignment) {
258 // By default, assume nontemporal memory loads are available for loads that
259 // are aligned and have a size that is a power of 2.
260 unsigned DataSize = DL.getTypeStoreSize(DataType);
261 return Alignment >= DataSize && isPowerOf2_32(DataSize);
264 bool isLegalMaskedScatter(Type *DataType) { return false; }
266 bool isLegalMaskedGather(Type *DataType) { return false; }
268 bool isLegalMaskedCompressStore(Type *DataType) { return false; }
270 bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
272 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
274 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
276 bool prefersVectorizedAddressing() { return true; }
278 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
279 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
280 // Guess that all legal addressing mode are free.
281 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
282 Scale, AddrSpace))
283 return 0;
284 return -1;
287 bool LSRWithInstrQueries() { return false; }
289 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
291 bool isProfitableToHoist(Instruction *I) { return true; }
293 bool useAA() { return false; }
295 bool isTypeLegal(Type *Ty) { return false; }
297 unsigned getJumpBufAlignment() { return 0; }
299 unsigned getJumpBufSize() { return 0; }
301 bool shouldBuildLookupTables() { return true; }
302 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
304 bool useColdCCForColdCall(Function &F) { return false; }
306 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
307 return 0;
310 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
311 unsigned VF) { return 0; }
313 bool supportsEfficientVectorElementLoadStore() { return false; }
315 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
317 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
318 bool IsZeroCmp) const {
319 return {};
322 bool enableInterleavedAccessVectorization() { return false; }
324 bool enableMaskedInterleavedAccessVectorization() { return false; }
326 bool isFPVectorizationPotentiallyUnsafe() { return false; }
328 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
329 unsigned BitWidth,
330 unsigned AddressSpace,
331 unsigned Alignment,
332 bool *Fast) { return false; }
334 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
335 return TTI::PSK_Software;
338 bool haveFastSqrt(Type *Ty) { return false; }
340 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
342 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
344 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
345 Type *Ty) {
346 return 0;
349 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
351 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
352 Type *Ty) {
353 return TTI::TCC_Free;
356 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
357 Type *Ty) {
358 return TTI::TCC_Free;
361 unsigned getNumberOfRegisters(bool Vector) { return 8; }
363 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
365 unsigned getMinVectorRegisterBitWidth() { return 128; }
367 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
369 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
371 bool
372 shouldConsiderAddressTypePromotion(const Instruction &I,
373 bool &AllowPromotionWithoutCommonHeader) {
374 AllowPromotionWithoutCommonHeader = false;
375 return false;
378 unsigned getCacheLineSize() { return 0; }
380 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
381 switch (Level) {
382 case TargetTransformInfo::CacheLevel::L1D:
383 LLVM_FALLTHROUGH;
384 case TargetTransformInfo::CacheLevel::L2D:
385 return llvm::Optional<unsigned>();
388 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
391 llvm::Optional<unsigned> getCacheAssociativity(
392 TargetTransformInfo::CacheLevel Level) {
393 switch (Level) {
394 case TargetTransformInfo::CacheLevel::L1D:
395 LLVM_FALLTHROUGH;
396 case TargetTransformInfo::CacheLevel::L2D:
397 return llvm::Optional<unsigned>();
400 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
403 unsigned getPrefetchDistance() { return 0; }
405 unsigned getMinPrefetchStride() { return 1; }
407 unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
409 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
411 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
412 TTI::OperandValueKind Opd1Info,
413 TTI::OperandValueKind Opd2Info,
414 TTI::OperandValueProperties Opd1PropInfo,
415 TTI::OperandValueProperties Opd2PropInfo,
416 ArrayRef<const Value *> Args) {
417 return 1;
420 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
421 Type *SubTp) {
422 return 1;
425 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
426 const Instruction *I) { return 1; }
428 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
429 VectorType *VecTy, unsigned Index) {
430 return 1;
433 unsigned getCFInstrCost(unsigned Opcode) { return 1; }
435 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
436 const Instruction *I) {
437 return 1;
440 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
441 return 1;
444 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
445 unsigned AddressSpace, const Instruction *I) {
446 return 1;
449 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
450 unsigned AddressSpace) {
451 return 1;
454 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
455 bool VariableMask,
456 unsigned Alignment) {
457 return 1;
460 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
461 unsigned Factor,
462 ArrayRef<unsigned> Indices,
463 unsigned Alignment, unsigned AddressSpace,
464 bool UseMaskForCond = false,
465 bool UseMaskForGaps = false) {
466 return 1;
469 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
470 ArrayRef<Type *> Tys, FastMathFlags FMF,
471 unsigned ScalarizationCostPassed) {
472 return 1;
474 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
475 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
476 return 1;
479 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
480 return 1;
483 unsigned getNumberOfParts(Type *Tp) { return 0; }
485 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
486 const SCEV *) {
487 return 0;
490 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
492 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
494 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
496 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
497 return false;
500 unsigned getAtomicMemIntrinsicMaxElementSize() const {
501 // Note for overrides: You must ensure for all element unordered-atomic
502 // memory intrinsics that all power-of-2 element sizes up to, and
503 // including, the return value of this method have a corresponding
504 // runtime lib call. These runtime lib call definitions can be found
505 // in RuntimeLibcalls.h
506 return 0;
509 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
510 Type *ExpectedType) {
511 return nullptr;
514 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
515 unsigned SrcAlign, unsigned DestAlign) const {
516 return Type::getInt8Ty(Context);
519 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
520 LLVMContext &Context,
521 unsigned RemainingBytes,
522 unsigned SrcAlign,
523 unsigned DestAlign) const {
524 for (unsigned i = 0; i != RemainingBytes; ++i)
525 OpsOut.push_back(Type::getInt8Ty(Context));
528 bool areInlineCompatible(const Function *Caller,
529 const Function *Callee) const {
530 return (Caller->getFnAttribute("target-cpu") ==
531 Callee->getFnAttribute("target-cpu")) &&
532 (Caller->getFnAttribute("target-features") ==
533 Callee->getFnAttribute("target-features"));
536 bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
537 SmallPtrSetImpl<Argument *> &Args) const {
538 return (Caller->getFnAttribute("target-cpu") ==
539 Callee->getFnAttribute("target-cpu")) &&
540 (Caller->getFnAttribute("target-features") ==
541 Callee->getFnAttribute("target-features"));
544 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
545 const DataLayout &DL) const {
546 return false;
549 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
550 const DataLayout &DL) const {
551 return false;
554 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
556 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
558 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
560 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
561 unsigned Alignment,
562 unsigned AddrSpace) const {
563 return true;
566 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
567 unsigned Alignment,
568 unsigned AddrSpace) const {
569 return true;
572 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
573 unsigned ChainSizeInBytes,
574 VectorType *VecTy) const {
575 return VF;
578 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
579 unsigned ChainSizeInBytes,
580 VectorType *VecTy) const {
581 return VF;
584 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
585 TTI::ReductionFlags Flags) const {
586 return false;
589 bool shouldExpandReduction(const IntrinsicInst *II) const {
590 return true;
593 unsigned getGISelRematGlobalCost() const {
594 return 1;
597 protected:
598 // Obtain the minimum required size to hold the value (without the sign)
599 // In case of a vector it returns the min required size for one element.
600 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
601 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
602 const auto* VectorValue = cast<Constant>(Val);
604 // In case of a vector need to pick the max between the min
605 // required size for each element
606 auto *VT = cast<VectorType>(Val->getType());
608 // Assume unsigned elements
609 isSigned = false;
611 // The max required size is the total vector width divided by num
612 // of elements in the vector
613 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
615 unsigned MinRequiredSize = 0;
616 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
617 if (auto* IntElement =
618 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
619 bool signedElement = IntElement->getValue().isNegative();
620 // Get the element min required size.
621 unsigned ElementMinRequiredSize =
622 IntElement->getValue().getMinSignedBits() - 1;
623 // In case one element is signed then all the vector is signed.
624 isSigned |= signedElement;
625 // Save the max required bit size between all the elements.
626 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
628 else {
629 // not an int constant element
630 return MaxRequiredSize;
633 return MinRequiredSize;
636 if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
637 isSigned = CI->getValue().isNegative();
638 return CI->getValue().getMinSignedBits() - 1;
641 if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
642 isSigned = true;
643 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
646 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
647 isSigned = false;
648 return Cast->getSrcTy()->getScalarSizeInBits();
651 isSigned = false;
652 return Val->getType()->getScalarSizeInBits();
655 bool isStridedAccess(const SCEV *Ptr) {
656 return Ptr && isa<SCEVAddRecExpr>(Ptr);
659 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
660 const SCEV *Ptr) {
661 if (!isStridedAccess(Ptr))
662 return nullptr;
663 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
664 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
667 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
668 int64_t MergeDistance) {
669 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
670 if (!Step)
671 return false;
672 APInt StrideVal = Step->getAPInt();
673 if (StrideVal.getBitWidth() > 64)
674 return false;
675 // FIXME: Need to take absolute value for negative stride case.
676 return StrideVal.getSExtValue() < MergeDistance;
680 /// CRTP base class for use as a mix-in that aids implementing
681 /// a TargetTransformInfo-compatible class.
682 template <typename T>
683 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
684 private:
685 typedef TargetTransformInfoImplBase BaseT;
687 protected:
688 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
690 public:
691 using BaseT::getCallCost;
693 unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
694 assert(F && "A concrete function must be provided to this routine.");
696 if (NumArgs < 0)
697 // Set the argument number to the number of explicit arguments in the
698 // function.
699 NumArgs = F->arg_size();
701 if (Intrinsic::ID IID = F->getIntrinsicID()) {
702 FunctionType *FTy = F->getFunctionType();
703 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
704 return static_cast<T *>(this)
705 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
708 if (!static_cast<T *>(this)->isLoweredToCall(F))
709 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
710 // directly.
712 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
715 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
716 const User *U) {
717 // Simply delegate to generic handling of the call.
718 // FIXME: We should use instsimplify or something else to catch calls which
719 // will constant fold with these arguments.
720 return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
723 using BaseT::getGEPCost;
725 int getGEPCost(Type *PointeeType, const Value *Ptr,
726 ArrayRef<const Value *> Operands) {
727 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
728 // TODO: will remove this when pointers have an opaque type.
729 assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
730 PointeeType &&
731 "explicit pointee type doesn't match operand's pointee type");
732 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
733 bool HasBaseReg = (BaseGV == nullptr);
735 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
736 APInt BaseOffset(PtrSizeBits, 0);
737 int64_t Scale = 0;
739 auto GTI = gep_type_begin(PointeeType, Operands);
740 Type *TargetType = nullptr;
742 // Handle the case where the GEP instruction has a single operand,
743 // the basis, therefore TargetType is a nullptr.
744 if (Operands.empty())
745 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
747 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
748 TargetType = GTI.getIndexedType();
749 // We assume that the cost of Scalar GEP with constant index and the
750 // cost of Vector GEP with splat constant index are the same.
751 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
752 if (!ConstIdx)
753 if (auto Splat = getSplatValue(*I))
754 ConstIdx = dyn_cast<ConstantInt>(Splat);
755 if (StructType *STy = GTI.getStructTypeOrNull()) {
756 // For structures the index is always splat or scalar constant
757 assert(ConstIdx && "Unexpected GEP index");
758 uint64_t Field = ConstIdx->getZExtValue();
759 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
760 } else {
761 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
762 if (ConstIdx) {
763 BaseOffset +=
764 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
765 } else {
766 // Needs scale register.
767 if (Scale != 0)
768 // No addressing mode takes two scale registers.
769 return TTI::TCC_Basic;
770 Scale = ElementSize;
775 if (static_cast<T *>(this)->isLegalAddressingMode(
776 TargetType, const_cast<GlobalValue *>(BaseGV),
777 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
778 Ptr->getType()->getPointerAddressSpace()))
779 return TTI::TCC_Free;
780 return TTI::TCC_Basic;
783 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
784 ArrayRef<Type *> ParamTys, const User *U) {
785 switch (IID) {
786 default:
787 // Intrinsics rarely (if ever) have normal argument setup constraints.
788 // Model them as having a basic instruction cost.
789 return TTI::TCC_Basic;
791 // TODO: other libc intrinsics.
792 case Intrinsic::memcpy:
793 return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
795 case Intrinsic::annotation:
796 case Intrinsic::assume:
797 case Intrinsic::sideeffect:
798 case Intrinsic::dbg_declare:
799 case Intrinsic::dbg_value:
800 case Intrinsic::dbg_label:
801 case Intrinsic::invariant_start:
802 case Intrinsic::invariant_end:
803 case Intrinsic::launder_invariant_group:
804 case Intrinsic::strip_invariant_group:
805 case Intrinsic::is_constant:
806 case Intrinsic::lifetime_start:
807 case Intrinsic::lifetime_end:
808 case Intrinsic::objectsize:
809 case Intrinsic::ptr_annotation:
810 case Intrinsic::var_annotation:
811 case Intrinsic::experimental_gc_result:
812 case Intrinsic::experimental_gc_relocate:
813 case Intrinsic::coro_alloc:
814 case Intrinsic::coro_begin:
815 case Intrinsic::coro_free:
816 case Intrinsic::coro_end:
817 case Intrinsic::coro_frame:
818 case Intrinsic::coro_size:
819 case Intrinsic::coro_suspend:
820 case Intrinsic::coro_param:
821 case Intrinsic::coro_subfn_addr:
822 // These intrinsics don't actually represent code after lowering.
823 return TTI::TCC_Free;
827 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
828 ArrayRef<const Value *> Arguments, const User *U) {
829 // Delegate to the generic intrinsic handling code. This mostly provides an
830 // opportunity for targets to (for example) special case the cost of
831 // certain intrinsics based on constants used as arguments.
832 SmallVector<Type *, 8> ParamTys;
833 ParamTys.reserve(Arguments.size());
834 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
835 ParamTys.push_back(Arguments[Idx]->getType());
836 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
839 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
840 if (isa<PHINode>(U))
841 return TTI::TCC_Free; // Model all PHI nodes as free.
843 if (isa<ExtractValueInst>(U))
844 return TTI::TCC_Free; // Model all ExtractValue nodes as free.
846 // Static alloca doesn't generate target instructions.
847 if (auto *A = dyn_cast<AllocaInst>(U))
848 if (A->isStaticAlloca())
849 return TTI::TCC_Free;
851 if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
852 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
853 GEP->getPointerOperand(),
854 Operands.drop_front());
857 if (auto CS = ImmutableCallSite(U)) {
858 const Function *F = CS.getCalledFunction();
859 if (!F) {
860 // Just use the called value type.
861 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
862 return static_cast<T *>(this)
863 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
866 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
867 return static_cast<T *>(this)->getCallCost(F, Arguments, U);
870 if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
871 // The old behaviour of generally treating extensions of icmp to be free
872 // has been removed. A target that needs it should override getUserCost().
873 return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
874 Operands.back());
876 return static_cast<T *>(this)->getOperationCost(
877 Operator::getOpcode(U), U->getType(),
878 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
881 int getInstructionLatency(const Instruction *I) {
882 SmallVector<const Value *, 4> Operands(I->value_op_begin(),
883 I->value_op_end());
884 if (getUserCost(I, Operands) == TTI::TCC_Free)
885 return 0;
887 if (isa<LoadInst>(I))
888 return 4;
890 Type *DstTy = I->getType();
892 // Usually an intrinsic is a simple instruction.
893 // A real function call is much slower.
894 if (auto *CI = dyn_cast<CallInst>(I)) {
895 const Function *F = CI->getCalledFunction();
896 if (!F || static_cast<T *>(this)->isLoweredToCall(F))
897 return 40;
898 // Some intrinsics return a value and a flag, we use the value type
899 // to decide its latency.
900 if (StructType* StructTy = dyn_cast<StructType>(DstTy))
901 DstTy = StructTy->getElementType(0);
902 // Fall through to simple instructions.
905 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
906 DstTy = VectorTy->getElementType();
907 if (DstTy->isFloatingPointTy())
908 return 3;
910 return 1;
915 #endif