1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
23 enum NodeType
: unsigned {
24 // Start the numbering from where ISD NodeType finishes.
25 FIRST_NUMBER
= ISD::BUILTIN_OP_END
,
38 PrintConvergentCallUni
,
65 LoadV2
= ISD::FIRST_TARGET_MEMORY_OPCODE
,
79 StoreParamS32
, // to sext and store a <32bit value, not used currently
80 StoreParamU32
, // to zext and store a <32bit value, not used currently
100 Tex1DArrayFloatFloatLevel
,
101 Tex1DArrayFloatFloatGrad
,
104 Tex1DArrayS32FloatLevel
,
105 Tex1DArrayS32FloatGrad
,
108 Tex1DArrayU32FloatLevel
,
109 Tex1DArrayU32FloatGrad
,
112 Tex2DFloatFloatLevel
,
123 Tex2DArrayFloatFloat
,
124 Tex2DArrayFloatFloatLevel
,
125 Tex2DArrayFloatFloatGrad
,
128 Tex2DArrayS32FloatLevel
,
129 Tex2DArrayS32FloatGrad
,
132 Tex2DArrayU32FloatLevel
,
133 Tex2DArrayU32FloatGrad
,
136 Tex3DFloatFloatLevel
,
147 TexCubeFloatFloatLevel
,
149 TexCubeS32FloatLevel
,
151 TexCubeU32FloatLevel
,
152 TexCubeArrayFloatFloat
,
153 TexCubeArrayFloatFloatLevel
,
154 TexCubeArrayS32Float
,
155 TexCubeArrayS32FloatLevel
,
156 TexCubeArrayU32Float
,
157 TexCubeArrayU32FloatLevel
,
170 TexUnified1DFloatS32
,
171 TexUnified1DFloatFloat
,
172 TexUnified1DFloatFloatLevel
,
173 TexUnified1DFloatFloatGrad
,
175 TexUnified1DS32Float
,
176 TexUnified1DS32FloatLevel
,
177 TexUnified1DS32FloatGrad
,
179 TexUnified1DU32Float
,
180 TexUnified1DU32FloatLevel
,
181 TexUnified1DU32FloatGrad
,
182 TexUnified1DArrayFloatS32
,
183 TexUnified1DArrayFloatFloat
,
184 TexUnified1DArrayFloatFloatLevel
,
185 TexUnified1DArrayFloatFloatGrad
,
186 TexUnified1DArrayS32S32
,
187 TexUnified1DArrayS32Float
,
188 TexUnified1DArrayS32FloatLevel
,
189 TexUnified1DArrayS32FloatGrad
,
190 TexUnified1DArrayU32S32
,
191 TexUnified1DArrayU32Float
,
192 TexUnified1DArrayU32FloatLevel
,
193 TexUnified1DArrayU32FloatGrad
,
194 TexUnified2DFloatS32
,
195 TexUnified2DFloatFloat
,
196 TexUnified2DFloatFloatLevel
,
197 TexUnified2DFloatFloatGrad
,
199 TexUnified2DS32Float
,
200 TexUnified2DS32FloatLevel
,
201 TexUnified2DS32FloatGrad
,
203 TexUnified2DU32Float
,
204 TexUnified2DU32FloatLevel
,
205 TexUnified2DU32FloatGrad
,
206 TexUnified2DArrayFloatS32
,
207 TexUnified2DArrayFloatFloat
,
208 TexUnified2DArrayFloatFloatLevel
,
209 TexUnified2DArrayFloatFloatGrad
,
210 TexUnified2DArrayS32S32
,
211 TexUnified2DArrayS32Float
,
212 TexUnified2DArrayS32FloatLevel
,
213 TexUnified2DArrayS32FloatGrad
,
214 TexUnified2DArrayU32S32
,
215 TexUnified2DArrayU32Float
,
216 TexUnified2DArrayU32FloatLevel
,
217 TexUnified2DArrayU32FloatGrad
,
218 TexUnified3DFloatS32
,
219 TexUnified3DFloatFloat
,
220 TexUnified3DFloatFloatLevel
,
221 TexUnified3DFloatFloatGrad
,
223 TexUnified3DS32Float
,
224 TexUnified3DS32FloatLevel
,
225 TexUnified3DS32FloatGrad
,
227 TexUnified3DU32Float
,
228 TexUnified3DU32FloatLevel
,
229 TexUnified3DU32FloatGrad
,
230 TexUnifiedCubeFloatFloat
,
231 TexUnifiedCubeFloatFloatLevel
,
232 TexUnifiedCubeS32Float
,
233 TexUnifiedCubeS32FloatLevel
,
234 TexUnifiedCubeU32Float
,
235 TexUnifiedCubeU32FloatLevel
,
236 TexUnifiedCubeArrayFloatFloat
,
237 TexUnifiedCubeArrayFloatFloatLevel
,
238 TexUnifiedCubeArrayS32Float
,
239 TexUnifiedCubeArrayS32FloatLevel
,
240 TexUnifiedCubeArrayU32Float
,
241 TexUnifiedCubeArrayU32FloatLevel
,
242 Tld4UnifiedR2DFloatFloat
,
243 Tld4UnifiedG2DFloatFloat
,
244 Tld4UnifiedB2DFloatFloat
,
245 Tld4UnifiedA2DFloatFloat
,
246 Tld4UnifiedR2DS64Float
,
247 Tld4UnifiedG2DS64Float
,
248 Tld4UnifiedB2DS64Float
,
249 Tld4UnifiedA2DS64Float
,
250 Tld4UnifiedR2DU64Float
,
251 Tld4UnifiedG2DU64Float
,
252 Tld4UnifiedB2DU64Float
,
253 Tld4UnifiedA2DU64Float
,
255 // Surface intrinsics
272 Suld1DArrayV2I8Clamp
,
273 Suld1DArrayV2I16Clamp
,
274 Suld1DArrayV2I32Clamp
,
275 Suld1DArrayV2I64Clamp
,
276 Suld1DArrayV4I8Clamp
,
277 Suld1DArrayV4I16Clamp
,
278 Suld1DArrayV4I32Clamp
,
296 Suld2DArrayV2I8Clamp
,
297 Suld2DArrayV2I16Clamp
,
298 Suld2DArrayV2I32Clamp
,
299 Suld2DArrayV2I64Clamp
,
300 Suld2DArrayV4I8Clamp
,
301 Suld2DArrayV4I16Clamp
,
302 Suld2DArrayV4I32Clamp
,
333 Suld1DArrayV2I16Trap
,
334 Suld1DArrayV2I32Trap
,
335 Suld1DArrayV2I64Trap
,
337 Suld1DArrayV4I16Trap
,
338 Suld1DArrayV4I32Trap
,
357 Suld2DArrayV2I16Trap
,
358 Suld2DArrayV2I32Trap
,
359 Suld2DArrayV2I64Trap
,
361 Suld2DArrayV4I16Trap
,
362 Suld2DArrayV4I32Trap
,
393 Suld1DArrayV2I16Zero
,
394 Suld1DArrayV2I32Zero
,
395 Suld1DArrayV2I64Zero
,
397 Suld1DArrayV4I16Zero
,
398 Suld1DArrayV4I32Zero
,
417 Suld2DArrayV2I16Zero
,
418 Suld2DArrayV2I32Zero
,
419 Suld2DArrayV2I64Zero
,
421 Suld2DArrayV4I16Zero
,
422 Suld2DArrayV4I32Zero
,
438 class NVPTXSubtarget
;
440 //===--------------------------------------------------------------------===//
441 // TargetLowering Implementation
442 //===--------------------------------------------------------------------===//
443 class NVPTXTargetLowering
: public TargetLowering
{
445 explicit NVPTXTargetLowering(const NVPTXTargetMachine
&TM
,
446 const NVPTXSubtarget
&STI
);
447 SDValue
LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const override
;
449 SDValue
LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const;
451 const char *getTargetNodeName(unsigned Opcode
) const override
;
453 bool getTgtMemIntrinsic(IntrinsicInfo
&Info
, const CallInst
&I
,
455 unsigned Intrinsic
) const override
;
457 /// getFunctionParamOptimizedAlign - since function arguments are passed via
458 /// .param space, we may want to increase their alignment in a way that
459 /// ensures that we can effectively vectorize their loads & stores. We can
460 /// increase alignment only if the function has internal or has private
461 /// linkage as for other linkage types callers may already rely on default
462 /// alignment. To allow using 128-bit vectorized loads/stores, this function
463 /// ensures that alignment is 16 or greater.
464 Align
getFunctionParamOptimizedAlign(const Function
*F
, Type
*ArgTy
,
465 const DataLayout
&DL
) const;
467 /// Helper for computing alignment of a device function byval parameter.
468 Align
getFunctionByValParamAlign(const Function
*F
, Type
*ArgTy
,
470 const DataLayout
&DL
) const;
472 // Helper for getting a function parameter name. Name is composed from
473 // its index and the function name. Negative index corresponds to special
474 // parameter (unsized array) used for passing variable arguments.
475 std::string
getParamName(const Function
*F
, int Idx
) const;
477 /// isLegalAddressingMode - Return true if the addressing mode represented
478 /// by AM is legal for this target, for a load/store of the specified type
479 /// Used to guide target specific optimizations, like loop strength
480 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
481 /// address mode (CodeGenPrepare.cpp)
482 bool isLegalAddressingMode(const DataLayout
&DL
, const AddrMode
&AM
, Type
*Ty
,
484 Instruction
*I
= nullptr) const override
;
486 bool isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const override
{
487 // Truncating 64-bit to 32-bit is free in SASS.
488 if (!SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
490 return SrcTy
->getPrimitiveSizeInBits() == 64 &&
491 DstTy
->getPrimitiveSizeInBits() == 32;
494 EVT
getSetCCResultType(const DataLayout
&DL
, LLVMContext
&Ctx
,
495 EVT VT
) const override
{
497 return EVT::getVectorVT(Ctx
, MVT::i1
, VT
.getVectorNumElements());
501 ConstraintType
getConstraintType(StringRef Constraint
) const override
;
502 std::pair
<unsigned, const TargetRegisterClass
*>
503 getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
504 StringRef Constraint
, MVT VT
) const override
;
506 SDValue
LowerFormalArguments(SDValue Chain
, CallingConv::ID CallConv
,
508 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
509 const SDLoc
&dl
, SelectionDAG
&DAG
,
510 SmallVectorImpl
<SDValue
> &InVals
) const override
;
512 SDValue
LowerCall(CallLoweringInfo
&CLI
,
513 SmallVectorImpl
<SDValue
> &InVals
) const override
;
516 getPrototype(const DataLayout
&DL
, Type
*, const ArgListTy
&,
517 const SmallVectorImpl
<ISD::OutputArg
> &, MaybeAlign retAlignment
,
518 std::optional
<std::pair
<unsigned, const APInt
&>> VAInfo
,
519 const CallBase
&CB
, unsigned UniqueCallSite
) const;
521 SDValue
LowerReturn(SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
522 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
523 const SmallVectorImpl
<SDValue
> &OutVals
, const SDLoc
&dl
,
524 SelectionDAG
&DAG
) const override
;
526 void LowerAsmOperandForConstraint(SDValue Op
, StringRef Constraint
,
527 std::vector
<SDValue
> &Ops
,
528 SelectionDAG
&DAG
) const override
;
530 const NVPTXTargetMachine
*nvTM
;
532 // PTX always uses 32-bit shift amounts
533 MVT
getScalarShiftAmountTy(const DataLayout
&, EVT
) const override
{
537 TargetLoweringBase::LegalizeTypeAction
538 getPreferredVectorAction(MVT VT
) const override
;
540 // Get the degree of precision we want from 32-bit floating point division
543 // 0 - Use ptx div.approx
544 // 1 - Use ptx.div.full (approximate, but less so than div.approx)
545 // 2 - Use IEEE-compliant div instructions, if available.
546 int getDivF32Level() const;
548 // Get whether we should use a precise or approximate 32-bit floating point
550 bool usePrecSqrtF32() const;
552 // Get whether we should use instructions that flush floating-point denormals
553 // to sign-preserving zero.
554 bool useF32FTZ(const MachineFunction
&MF
) const;
556 SDValue
getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
, int Enabled
,
557 int &ExtraSteps
, bool &UseOneConst
,
558 bool Reciprocal
) const override
;
560 unsigned combineRepeatedFPDivisors() const override
{ return 2; }
562 bool allowFMA(MachineFunction
&MF
, CodeGenOptLevel OptLevel
) const;
563 bool allowUnsafeFPMath(MachineFunction
&MF
) const;
565 bool isFMAFasterThanFMulAndFAdd(const MachineFunction
&MF
,
566 EVT
) const override
{
570 bool enableAggressiveFMAFusion(EVT VT
) const override
{ return true; }
572 // The default is to transform llvm.ctlz(x, false) (where false indicates that
573 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
574 // and avoids calling ctlz in that case. We have a dedicated ctlz
575 // instruction, so we say that ctlz is cheap to speculate.
576 bool isCheapToSpeculateCtlz(Type
*Ty
) const override
{ return true; }
578 AtomicExpansionKind
shouldCastAtomicLoadInIR(LoadInst
*LI
) const override
{
579 return AtomicExpansionKind::None
;
582 AtomicExpansionKind
shouldCastAtomicStoreInIR(StoreInst
*SI
) const override
{
583 return AtomicExpansionKind::None
;
587 shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const override
;
589 bool aggressivelyPreferBuildVectorSources(EVT VecVT
) const override
{
590 // There's rarely any point of packing something into a vector type if we
591 // already have the source data.
596 const NVPTXSubtarget
&STI
; // cache the subtarget here
597 SDValue
getParamSymbol(SelectionDAG
&DAG
, int idx
, EVT
) const;
599 SDValue
LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const;
600 SDValue
LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) const;
601 SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const;
602 SDValue
LowerINSERT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const;
603 SDValue
LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
) const;
605 SDValue
LowerFROUND(SDValue Op
, SelectionDAG
&DAG
) const;
606 SDValue
LowerFROUND32(SDValue Op
, SelectionDAG
&DAG
) const;
607 SDValue
LowerFROUND64(SDValue Op
, SelectionDAG
&DAG
) const;
609 SDValue
LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const;
610 SDValue
LowerLOADi1(SDValue Op
, SelectionDAG
&DAG
) const;
612 SDValue
LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const;
613 SDValue
LowerSTOREi1(SDValue Op
, SelectionDAG
&DAG
) const;
614 SDValue
LowerSTOREVector(SDValue Op
, SelectionDAG
&DAG
) const;
616 SDValue
LowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
) const;
617 SDValue
LowerShiftLeftParts(SDValue Op
, SelectionDAG
&DAG
) const;
619 SDValue
LowerSelect(SDValue Op
, SelectionDAG
&DAG
) const;
621 SDValue
LowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const;
622 SDValue
LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const;
624 void ReplaceNodeResults(SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
,
625 SelectionDAG
&DAG
) const override
;
626 SDValue
PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const override
;
628 Align
getArgumentAlignment(SDValue Callee
, const CallBase
*CB
, Type
*Ty
,
629 unsigned Idx
, const DataLayout
&DL
) const;