1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
23 enum NodeType
: unsigned {
24 // Start the numbering from where ISD NodeType finishes.
25 FIRST_NUMBER
= ISD::BUILTIN_OP_END
,
38 PrintConvergentCallUni
,
66 LoadV2
= ISD::FIRST_TARGET_MEMORY_OPCODE
,
80 StoreParamS32
, // to sext and store a <32bit value, not used currently
81 StoreParamU32
, // to zext and store a <32bit value, not used currently
100 Tex1DArrayFloatFloat
,
101 Tex1DArrayFloatFloatLevel
,
102 Tex1DArrayFloatFloatGrad
,
105 Tex1DArrayS32FloatLevel
,
106 Tex1DArrayS32FloatGrad
,
109 Tex1DArrayU32FloatLevel
,
110 Tex1DArrayU32FloatGrad
,
113 Tex2DFloatFloatLevel
,
124 Tex2DArrayFloatFloat
,
125 Tex2DArrayFloatFloatLevel
,
126 Tex2DArrayFloatFloatGrad
,
129 Tex2DArrayS32FloatLevel
,
130 Tex2DArrayS32FloatGrad
,
133 Tex2DArrayU32FloatLevel
,
134 Tex2DArrayU32FloatGrad
,
137 Tex3DFloatFloatLevel
,
148 TexCubeFloatFloatLevel
,
150 TexCubeS32FloatLevel
,
152 TexCubeU32FloatLevel
,
153 TexCubeArrayFloatFloat
,
154 TexCubeArrayFloatFloatLevel
,
155 TexCubeArrayS32Float
,
156 TexCubeArrayS32FloatLevel
,
157 TexCubeArrayU32Float
,
158 TexCubeArrayU32FloatLevel
,
171 TexUnified1DFloatS32
,
172 TexUnified1DFloatFloat
,
173 TexUnified1DFloatFloatLevel
,
174 TexUnified1DFloatFloatGrad
,
176 TexUnified1DS32Float
,
177 TexUnified1DS32FloatLevel
,
178 TexUnified1DS32FloatGrad
,
180 TexUnified1DU32Float
,
181 TexUnified1DU32FloatLevel
,
182 TexUnified1DU32FloatGrad
,
183 TexUnified1DArrayFloatS32
,
184 TexUnified1DArrayFloatFloat
,
185 TexUnified1DArrayFloatFloatLevel
,
186 TexUnified1DArrayFloatFloatGrad
,
187 TexUnified1DArrayS32S32
,
188 TexUnified1DArrayS32Float
,
189 TexUnified1DArrayS32FloatLevel
,
190 TexUnified1DArrayS32FloatGrad
,
191 TexUnified1DArrayU32S32
,
192 TexUnified1DArrayU32Float
,
193 TexUnified1DArrayU32FloatLevel
,
194 TexUnified1DArrayU32FloatGrad
,
195 TexUnified2DFloatS32
,
196 TexUnified2DFloatFloat
,
197 TexUnified2DFloatFloatLevel
,
198 TexUnified2DFloatFloatGrad
,
200 TexUnified2DS32Float
,
201 TexUnified2DS32FloatLevel
,
202 TexUnified2DS32FloatGrad
,
204 TexUnified2DU32Float
,
205 TexUnified2DU32FloatLevel
,
206 TexUnified2DU32FloatGrad
,
207 TexUnified2DArrayFloatS32
,
208 TexUnified2DArrayFloatFloat
,
209 TexUnified2DArrayFloatFloatLevel
,
210 TexUnified2DArrayFloatFloatGrad
,
211 TexUnified2DArrayS32S32
,
212 TexUnified2DArrayS32Float
,
213 TexUnified2DArrayS32FloatLevel
,
214 TexUnified2DArrayS32FloatGrad
,
215 TexUnified2DArrayU32S32
,
216 TexUnified2DArrayU32Float
,
217 TexUnified2DArrayU32FloatLevel
,
218 TexUnified2DArrayU32FloatGrad
,
219 TexUnified3DFloatS32
,
220 TexUnified3DFloatFloat
,
221 TexUnified3DFloatFloatLevel
,
222 TexUnified3DFloatFloatGrad
,
224 TexUnified3DS32Float
,
225 TexUnified3DS32FloatLevel
,
226 TexUnified3DS32FloatGrad
,
228 TexUnified3DU32Float
,
229 TexUnified3DU32FloatLevel
,
230 TexUnified3DU32FloatGrad
,
231 TexUnifiedCubeFloatFloat
,
232 TexUnifiedCubeFloatFloatLevel
,
233 TexUnifiedCubeS32Float
,
234 TexUnifiedCubeS32FloatLevel
,
235 TexUnifiedCubeU32Float
,
236 TexUnifiedCubeU32FloatLevel
,
237 TexUnifiedCubeArrayFloatFloat
,
238 TexUnifiedCubeArrayFloatFloatLevel
,
239 TexUnifiedCubeArrayS32Float
,
240 TexUnifiedCubeArrayS32FloatLevel
,
241 TexUnifiedCubeArrayU32Float
,
242 TexUnifiedCubeArrayU32FloatLevel
,
243 TexUnifiedCubeFloatFloatGrad
,
244 TexUnifiedCubeS32FloatGrad
,
245 TexUnifiedCubeU32FloatGrad
,
246 TexUnifiedCubeArrayFloatFloatGrad
,
247 TexUnifiedCubeArrayS32FloatGrad
,
248 TexUnifiedCubeArrayU32FloatGrad
,
249 Tld4UnifiedR2DFloatFloat
,
250 Tld4UnifiedG2DFloatFloat
,
251 Tld4UnifiedB2DFloatFloat
,
252 Tld4UnifiedA2DFloatFloat
,
253 Tld4UnifiedR2DS64Float
,
254 Tld4UnifiedG2DS64Float
,
255 Tld4UnifiedB2DS64Float
,
256 Tld4UnifiedA2DS64Float
,
257 Tld4UnifiedR2DU64Float
,
258 Tld4UnifiedG2DU64Float
,
259 Tld4UnifiedB2DU64Float
,
260 Tld4UnifiedA2DU64Float
,
262 // Surface intrinsics
279 Suld1DArrayV2I8Clamp
,
280 Suld1DArrayV2I16Clamp
,
281 Suld1DArrayV2I32Clamp
,
282 Suld1DArrayV2I64Clamp
,
283 Suld1DArrayV4I8Clamp
,
284 Suld1DArrayV4I16Clamp
,
285 Suld1DArrayV4I32Clamp
,
303 Suld2DArrayV2I8Clamp
,
304 Suld2DArrayV2I16Clamp
,
305 Suld2DArrayV2I32Clamp
,
306 Suld2DArrayV2I64Clamp
,
307 Suld2DArrayV4I8Clamp
,
308 Suld2DArrayV4I16Clamp
,
309 Suld2DArrayV4I32Clamp
,
340 Suld1DArrayV2I16Trap
,
341 Suld1DArrayV2I32Trap
,
342 Suld1DArrayV2I64Trap
,
344 Suld1DArrayV4I16Trap
,
345 Suld1DArrayV4I32Trap
,
364 Suld2DArrayV2I16Trap
,
365 Suld2DArrayV2I32Trap
,
366 Suld2DArrayV2I64Trap
,
368 Suld2DArrayV4I16Trap
,
369 Suld2DArrayV4I32Trap
,
400 Suld1DArrayV2I16Zero
,
401 Suld1DArrayV2I32Zero
,
402 Suld1DArrayV2I64Zero
,
404 Suld1DArrayV4I16Zero
,
405 Suld1DArrayV4I32Zero
,
424 Suld2DArrayV2I16Zero
,
425 Suld2DArrayV2I32Zero
,
426 Suld2DArrayV2I64Zero
,
428 Suld2DArrayV4I16Zero
,
429 Suld2DArrayV4I32Zero
,
445 class NVPTXSubtarget
;
447 //===--------------------------------------------------------------------===//
448 // TargetLowering Implementation
449 //===--------------------------------------------------------------------===//
450 class NVPTXTargetLowering
: public TargetLowering
{
452 explicit NVPTXTargetLowering(const NVPTXTargetMachine
&TM
,
453 const NVPTXSubtarget
&STI
);
454 SDValue
LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const override
;
456 SDValue
LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const;
458 const char *getTargetNodeName(unsigned Opcode
) const override
;
460 bool getTgtMemIntrinsic(IntrinsicInfo
&Info
, const CallInst
&I
,
462 unsigned Intrinsic
) const override
;
464 /// getFunctionParamOptimizedAlign - since function arguments are passed via
465 /// .param space, we may want to increase their alignment in a way that
466 /// ensures that we can effectively vectorize their loads & stores. We can
467 /// increase alignment only if the function has internal or has private
468 /// linkage as for other linkage types callers may already rely on default
469 /// alignment. To allow using 128-bit vectorized loads/stores, this function
470 /// ensures that alignment is 16 or greater.
471 Align
getFunctionParamOptimizedAlign(const Function
*F
, Type
*ArgTy
,
472 const DataLayout
&DL
) const;
474 /// Helper for computing alignment of a device function byval parameter.
475 Align
getFunctionByValParamAlign(const Function
*F
, Type
*ArgTy
,
477 const DataLayout
&DL
) const;
479 // Helper for getting a function parameter name. Name is composed from
480 // its index and the function name. Negative index corresponds to special
481 // parameter (unsized array) used for passing variable arguments.
482 std::string
getParamName(const Function
*F
, int Idx
) const;
484 /// isLegalAddressingMode - Return true if the addressing mode represented
485 /// by AM is legal for this target, for a load/store of the specified type
486 /// Used to guide target specific optimizations, like loop strength
487 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
488 /// address mode (CodeGenPrepare.cpp)
489 bool isLegalAddressingMode(const DataLayout
&DL
, const AddrMode
&AM
, Type
*Ty
,
491 Instruction
*I
= nullptr) const override
;
493 bool isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const override
{
494 // Truncating 64-bit to 32-bit is free in SASS.
495 if (!SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
497 return SrcTy
->getPrimitiveSizeInBits() == 64 &&
498 DstTy
->getPrimitiveSizeInBits() == 32;
501 EVT
getSetCCResultType(const DataLayout
&DL
, LLVMContext
&Ctx
,
502 EVT VT
) const override
{
504 return EVT::getVectorVT(Ctx
, MVT::i1
, VT
.getVectorNumElements());
508 ConstraintType
getConstraintType(StringRef Constraint
) const override
;
509 std::pair
<unsigned, const TargetRegisterClass
*>
510 getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
511 StringRef Constraint
, MVT VT
) const override
;
513 SDValue
LowerFormalArguments(SDValue Chain
, CallingConv::ID CallConv
,
515 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
516 const SDLoc
&dl
, SelectionDAG
&DAG
,
517 SmallVectorImpl
<SDValue
> &InVals
) const override
;
519 SDValue
LowerCall(CallLoweringInfo
&CLI
,
520 SmallVectorImpl
<SDValue
> &InVals
) const override
;
522 SDValue
LowerDYNAMIC_STACKALLOC(SDValue Op
, SelectionDAG
&DAG
) const;
525 getPrototype(const DataLayout
&DL
, Type
*, const ArgListTy
&,
526 const SmallVectorImpl
<ISD::OutputArg
> &, MaybeAlign retAlignment
,
527 std::optional
<std::pair
<unsigned, const APInt
&>> VAInfo
,
528 const CallBase
&CB
, unsigned UniqueCallSite
) const;
530 SDValue
LowerReturn(SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
531 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
532 const SmallVectorImpl
<SDValue
> &OutVals
, const SDLoc
&dl
,
533 SelectionDAG
&DAG
) const override
;
535 void LowerAsmOperandForConstraint(SDValue Op
, StringRef Constraint
,
536 std::vector
<SDValue
> &Ops
,
537 SelectionDAG
&DAG
) const override
;
539 const NVPTXTargetMachine
*nvTM
;
541 // PTX always uses 32-bit shift amounts
542 MVT
getScalarShiftAmountTy(const DataLayout
&, EVT
) const override
{
546 TargetLoweringBase::LegalizeTypeAction
547 getPreferredVectorAction(MVT VT
) const override
;
549 // Get the degree of precision we want from 32-bit floating point division
552 // 0 - Use ptx div.approx
553 // 1 - Use ptx.div.full (approximate, but less so than div.approx)
554 // 2 - Use IEEE-compliant div instructions, if available.
555 int getDivF32Level() const;
557 // Get whether we should use a precise or approximate 32-bit floating point
559 bool usePrecSqrtF32() const;
561 // Get whether we should use instructions that flush floating-point denormals
562 // to sign-preserving zero.
563 bool useF32FTZ(const MachineFunction
&MF
) const;
565 SDValue
getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
, int Enabled
,
566 int &ExtraSteps
, bool &UseOneConst
,
567 bool Reciprocal
) const override
;
569 unsigned combineRepeatedFPDivisors() const override
{ return 2; }
571 bool allowFMA(MachineFunction
&MF
, CodeGenOptLevel OptLevel
) const;
572 bool allowUnsafeFPMath(MachineFunction
&MF
) const;
574 bool isFMAFasterThanFMulAndFAdd(const MachineFunction
&MF
,
575 EVT
) const override
{
579 bool enableAggressiveFMAFusion(EVT VT
) const override
{ return true; }
581 // The default is to transform llvm.ctlz(x, false) (where false indicates that
582 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
583 // and avoids calling ctlz in that case. We have a dedicated ctlz
584 // instruction, so we say that ctlz is cheap to speculate.
585 bool isCheapToSpeculateCtlz(Type
*Ty
) const override
{ return true; }
587 AtomicExpansionKind
shouldCastAtomicLoadInIR(LoadInst
*LI
) const override
{
588 return AtomicExpansionKind::None
;
591 AtomicExpansionKind
shouldCastAtomicStoreInIR(StoreInst
*SI
) const override
{
592 return AtomicExpansionKind::None
;
596 shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const override
;
598 bool aggressivelyPreferBuildVectorSources(EVT VecVT
) const override
{
599 // There's rarely any point of packing something into a vector type if we
600 // already have the source data.
605 const NVPTXSubtarget
&STI
; // cache the subtarget here
606 SDValue
getParamSymbol(SelectionDAG
&DAG
, int idx
, EVT
) const;
608 SDValue
LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const;
609 SDValue
LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) const;
610 SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const;
611 SDValue
LowerINSERT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const;
612 SDValue
LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
) const;
614 SDValue
LowerFROUND(SDValue Op
, SelectionDAG
&DAG
) const;
615 SDValue
LowerFROUND32(SDValue Op
, SelectionDAG
&DAG
) const;
616 SDValue
LowerFROUND64(SDValue Op
, SelectionDAG
&DAG
) const;
618 SDValue
LowerINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) const;
619 SDValue
LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
) const;
621 SDValue
LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const;
622 SDValue
LowerLOADi1(SDValue Op
, SelectionDAG
&DAG
) const;
624 SDValue
LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const;
625 SDValue
LowerSTOREi1(SDValue Op
, SelectionDAG
&DAG
) const;
626 SDValue
LowerSTOREVector(SDValue Op
, SelectionDAG
&DAG
) const;
628 SDValue
LowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
) const;
629 SDValue
LowerShiftLeftParts(SDValue Op
, SelectionDAG
&DAG
) const;
631 SDValue
LowerSelect(SDValue Op
, SelectionDAG
&DAG
) const;
633 SDValue
LowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const;
634 SDValue
LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const;
636 void ReplaceNodeResults(SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
,
637 SelectionDAG
&DAG
) const override
;
638 SDValue
PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const override
;
640 Align
getArgumentAlignment(const CallBase
*CB
, Type
*Ty
, unsigned Idx
,
641 const DataLayout
&DL
) const;