1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
23 enum NodeType
: unsigned {
24 // Start the numbering from where ISD NodeType finishes.
25 FIRST_NUMBER
= ISD::BUILTIN_OP_END
,
38 PrintConvergentCallUni
,
62 LoadV2
= ISD::FIRST_TARGET_MEMORY_OPCODE
,
76 StoreParamS32
, // to sext and store a <32bit value, not used currently
77 StoreParamU32
, // to zext and store a <32bit value, not used currently
97 Tex1DArrayFloatFloatLevel
,
98 Tex1DArrayFloatFloatGrad
,
101 Tex1DArrayS32FloatLevel
,
102 Tex1DArrayS32FloatGrad
,
105 Tex1DArrayU32FloatLevel
,
106 Tex1DArrayU32FloatGrad
,
109 Tex2DFloatFloatLevel
,
120 Tex2DArrayFloatFloat
,
121 Tex2DArrayFloatFloatLevel
,
122 Tex2DArrayFloatFloatGrad
,
125 Tex2DArrayS32FloatLevel
,
126 Tex2DArrayS32FloatGrad
,
129 Tex2DArrayU32FloatLevel
,
130 Tex2DArrayU32FloatGrad
,
133 Tex3DFloatFloatLevel
,
144 TexCubeFloatFloatLevel
,
146 TexCubeS32FloatLevel
,
148 TexCubeU32FloatLevel
,
149 TexCubeArrayFloatFloat
,
150 TexCubeArrayFloatFloatLevel
,
151 TexCubeArrayS32Float
,
152 TexCubeArrayS32FloatLevel
,
153 TexCubeArrayU32Float
,
154 TexCubeArrayU32FloatLevel
,
167 TexUnified1DFloatS32
,
168 TexUnified1DFloatFloat
,
169 TexUnified1DFloatFloatLevel
,
170 TexUnified1DFloatFloatGrad
,
172 TexUnified1DS32Float
,
173 TexUnified1DS32FloatLevel
,
174 TexUnified1DS32FloatGrad
,
176 TexUnified1DU32Float
,
177 TexUnified1DU32FloatLevel
,
178 TexUnified1DU32FloatGrad
,
179 TexUnified1DArrayFloatS32
,
180 TexUnified1DArrayFloatFloat
,
181 TexUnified1DArrayFloatFloatLevel
,
182 TexUnified1DArrayFloatFloatGrad
,
183 TexUnified1DArrayS32S32
,
184 TexUnified1DArrayS32Float
,
185 TexUnified1DArrayS32FloatLevel
,
186 TexUnified1DArrayS32FloatGrad
,
187 TexUnified1DArrayU32S32
,
188 TexUnified1DArrayU32Float
,
189 TexUnified1DArrayU32FloatLevel
,
190 TexUnified1DArrayU32FloatGrad
,
191 TexUnified2DFloatS32
,
192 TexUnified2DFloatFloat
,
193 TexUnified2DFloatFloatLevel
,
194 TexUnified2DFloatFloatGrad
,
196 TexUnified2DS32Float
,
197 TexUnified2DS32FloatLevel
,
198 TexUnified2DS32FloatGrad
,
200 TexUnified2DU32Float
,
201 TexUnified2DU32FloatLevel
,
202 TexUnified2DU32FloatGrad
,
203 TexUnified2DArrayFloatS32
,
204 TexUnified2DArrayFloatFloat
,
205 TexUnified2DArrayFloatFloatLevel
,
206 TexUnified2DArrayFloatFloatGrad
,
207 TexUnified2DArrayS32S32
,
208 TexUnified2DArrayS32Float
,
209 TexUnified2DArrayS32FloatLevel
,
210 TexUnified2DArrayS32FloatGrad
,
211 TexUnified2DArrayU32S32
,
212 TexUnified2DArrayU32Float
,
213 TexUnified2DArrayU32FloatLevel
,
214 TexUnified2DArrayU32FloatGrad
,
215 TexUnified3DFloatS32
,
216 TexUnified3DFloatFloat
,
217 TexUnified3DFloatFloatLevel
,
218 TexUnified3DFloatFloatGrad
,
220 TexUnified3DS32Float
,
221 TexUnified3DS32FloatLevel
,
222 TexUnified3DS32FloatGrad
,
224 TexUnified3DU32Float
,
225 TexUnified3DU32FloatLevel
,
226 TexUnified3DU32FloatGrad
,
227 TexUnifiedCubeFloatFloat
,
228 TexUnifiedCubeFloatFloatLevel
,
229 TexUnifiedCubeS32Float
,
230 TexUnifiedCubeS32FloatLevel
,
231 TexUnifiedCubeU32Float
,
232 TexUnifiedCubeU32FloatLevel
,
233 TexUnifiedCubeArrayFloatFloat
,
234 TexUnifiedCubeArrayFloatFloatLevel
,
235 TexUnifiedCubeArrayS32Float
,
236 TexUnifiedCubeArrayS32FloatLevel
,
237 TexUnifiedCubeArrayU32Float
,
238 TexUnifiedCubeArrayU32FloatLevel
,
239 Tld4UnifiedR2DFloatFloat
,
240 Tld4UnifiedG2DFloatFloat
,
241 Tld4UnifiedB2DFloatFloat
,
242 Tld4UnifiedA2DFloatFloat
,
243 Tld4UnifiedR2DS64Float
,
244 Tld4UnifiedG2DS64Float
,
245 Tld4UnifiedB2DS64Float
,
246 Tld4UnifiedA2DS64Float
,
247 Tld4UnifiedR2DU64Float
,
248 Tld4UnifiedG2DU64Float
,
249 Tld4UnifiedB2DU64Float
,
250 Tld4UnifiedA2DU64Float
,
252 // Surface intrinsics
269 Suld1DArrayV2I8Clamp
,
270 Suld1DArrayV2I16Clamp
,
271 Suld1DArrayV2I32Clamp
,
272 Suld1DArrayV2I64Clamp
,
273 Suld1DArrayV4I8Clamp
,
274 Suld1DArrayV4I16Clamp
,
275 Suld1DArrayV4I32Clamp
,
293 Suld2DArrayV2I8Clamp
,
294 Suld2DArrayV2I16Clamp
,
295 Suld2DArrayV2I32Clamp
,
296 Suld2DArrayV2I64Clamp
,
297 Suld2DArrayV4I8Clamp
,
298 Suld2DArrayV4I16Clamp
,
299 Suld2DArrayV4I32Clamp
,
330 Suld1DArrayV2I16Trap
,
331 Suld1DArrayV2I32Trap
,
332 Suld1DArrayV2I64Trap
,
334 Suld1DArrayV4I16Trap
,
335 Suld1DArrayV4I32Trap
,
354 Suld2DArrayV2I16Trap
,
355 Suld2DArrayV2I32Trap
,
356 Suld2DArrayV2I64Trap
,
358 Suld2DArrayV4I16Trap
,
359 Suld2DArrayV4I32Trap
,
390 Suld1DArrayV2I16Zero
,
391 Suld1DArrayV2I32Zero
,
392 Suld1DArrayV2I64Zero
,
394 Suld1DArrayV4I16Zero
,
395 Suld1DArrayV4I32Zero
,
414 Suld2DArrayV2I16Zero
,
415 Suld2DArrayV2I32Zero
,
416 Suld2DArrayV2I64Zero
,
418 Suld2DArrayV4I16Zero
,
419 Suld2DArrayV4I32Zero
,
435 class NVPTXSubtarget
;
437 //===--------------------------------------------------------------------===//
438 // TargetLowering Implementation
439 //===--------------------------------------------------------------------===//
440 class NVPTXTargetLowering
: public TargetLowering
{
442 explicit NVPTXTargetLowering(const NVPTXTargetMachine
&TM
,
443 const NVPTXSubtarget
&STI
);
444 SDValue
LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const override
;
446 SDValue
LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
) const;
448 const char *getTargetNodeName(unsigned Opcode
) const override
;
450 bool getTgtMemIntrinsic(IntrinsicInfo
&Info
, const CallInst
&I
,
452 unsigned Intrinsic
) const override
;
454 /// isLegalAddressingMode - Return true if the addressing mode represented
455 /// by AM is legal for this target, for a load/store of the specified type
456 /// Used to guide target specific optimizations, like loop strength
457 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
458 /// address mode (CodeGenPrepare.cpp)
459 bool isLegalAddressingMode(const DataLayout
&DL
, const AddrMode
&AM
, Type
*Ty
,
461 Instruction
*I
= nullptr) const override
;
463 bool isTruncateFree(Type
*SrcTy
, Type
*DstTy
) const override
{
464 // Truncating 64-bit to 32-bit is free in SASS.
465 if (!SrcTy
->isIntegerTy() || !DstTy
->isIntegerTy())
467 return SrcTy
->getPrimitiveSizeInBits() == 64 &&
468 DstTy
->getPrimitiveSizeInBits() == 32;
471 EVT
getSetCCResultType(const DataLayout
&DL
, LLVMContext
&Ctx
,
472 EVT VT
) const override
{
474 return EVT::getVectorVT(Ctx
, MVT::i1
, VT
.getVectorNumElements());
478 ConstraintType
getConstraintType(StringRef Constraint
) const override
;
479 std::pair
<unsigned, const TargetRegisterClass
*>
480 getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
481 StringRef Constraint
, MVT VT
) const override
;
483 SDValue
LowerFormalArguments(SDValue Chain
, CallingConv::ID CallConv
,
485 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
486 const SDLoc
&dl
, SelectionDAG
&DAG
,
487 SmallVectorImpl
<SDValue
> &InVals
) const override
;
489 SDValue
LowerCall(CallLoweringInfo
&CLI
,
490 SmallVectorImpl
<SDValue
> &InVals
) const override
;
492 std::string
getPrototype(const DataLayout
&DL
, Type
*, const ArgListTy
&,
493 const SmallVectorImpl
<ISD::OutputArg
> &,
494 unsigned retAlignment
,
495 ImmutableCallSite CS
) const;
497 SDValue
LowerReturn(SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
498 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
499 const SmallVectorImpl
<SDValue
> &OutVals
, const SDLoc
&dl
,
500 SelectionDAG
&DAG
) const override
;
502 void LowerAsmOperandForConstraint(SDValue Op
, std::string
&Constraint
,
503 std::vector
<SDValue
> &Ops
,
504 SelectionDAG
&DAG
) const override
;
506 const NVPTXTargetMachine
*nvTM
;
508 // PTX always uses 32-bit shift amounts
509 MVT
getScalarShiftAmountTy(const DataLayout
&, EVT
) const override
{
513 TargetLoweringBase::LegalizeTypeAction
514 getPreferredVectorAction(MVT VT
) const override
;
516 // Get the degree of precision we want from 32-bit floating point division
519 // 0 - Use ptx div.approx
520 // 1 - Use ptx.div.full (approximate, but less so than div.approx)
521 // 2 - Use IEEE-compliant div instructions, if available.
522 int getDivF32Level() const;
524 // Get whether we should use a precise or approximate 32-bit floating point
526 bool usePrecSqrtF32() const;
528 // Get whether we should use instructions that flush floating-point denormals
529 // to sign-preserving zero.
530 bool useF32FTZ(const MachineFunction
&MF
) const;
532 SDValue
getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
, int Enabled
,
533 int &ExtraSteps
, bool &UseOneConst
,
534 bool Reciprocal
) const override
;
536 unsigned combineRepeatedFPDivisors() const override
{ return 2; }
538 bool allowFMA(MachineFunction
&MF
, CodeGenOpt::Level OptLevel
) const;
539 bool allowUnsafeFPMath(MachineFunction
&MF
) const;
541 bool isFMAFasterThanFMulAndFAdd(EVT
) const override
{ return true; }
543 bool enableAggressiveFMAFusion(EVT VT
) const override
{ return true; }
545 // The default is to transform llvm.ctlz(x, false) (where false indicates that
546 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
547 // and avoids calling ctlz in that case. We have a dedicated ctlz
548 // instruction, so we say that ctlz is cheap to speculate.
549 bool isCheapToSpeculateCtlz() const override
{ return true; }
552 const NVPTXSubtarget
&STI
; // cache the subtarget here
553 SDValue
getParamSymbol(SelectionDAG
&DAG
, int idx
, EVT
) const;
555 SDValue
LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) const;
556 SDValue
LowerCONCAT_VECTORS(SDValue Op
, SelectionDAG
&DAG
) const;
557 SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) const;
559 SDValue
LowerFROUND(SDValue Op
, SelectionDAG
&DAG
) const;
560 SDValue
LowerFROUND32(SDValue Op
, SelectionDAG
&DAG
) const;
561 SDValue
LowerFROUND64(SDValue Op
, SelectionDAG
&DAG
) const;
563 SDValue
LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const;
564 SDValue
LowerLOADi1(SDValue Op
, SelectionDAG
&DAG
) const;
566 SDValue
LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const;
567 SDValue
LowerSTOREi1(SDValue Op
, SelectionDAG
&DAG
) const;
568 SDValue
LowerSTOREVector(SDValue Op
, SelectionDAG
&DAG
) const;
570 SDValue
LowerShiftRightParts(SDValue Op
, SelectionDAG
&DAG
) const;
571 SDValue
LowerShiftLeftParts(SDValue Op
, SelectionDAG
&DAG
) const;
573 SDValue
LowerSelect(SDValue Op
, SelectionDAG
&DAG
) const;
575 void ReplaceNodeResults(SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
,
576 SelectionDAG
&DAG
) const override
;
577 SDValue
PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const override
;
579 unsigned getArgumentAlignment(SDValue Callee
, ImmutableCallSite CS
, Type
*Ty
,
580 unsigned Idx
, const DataLayout
&DL
) const;