[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / NVPTX / NVPTXISelLowering.h
blobef645fc1e541f86f2fc026d2976bbe4bc64b2b3a
1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
17 #include "NVPTX.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
21 namespace llvm {
22 namespace NVPTXISD {
23 enum NodeType : unsigned {
24 // Start the numbering from where ISD NodeType finishes.
25 FIRST_NUMBER = ISD::BUILTIN_OP_END,
26 Wrapper,
27 CALL,
28 RET_FLAG,
29 LOAD_PARAM,
30 DeclareParam,
31 DeclareScalarParam,
32 DeclareRetParam,
33 DeclareRet,
34 DeclareScalarRet,
35 PrintCall,
36 PrintConvergentCall,
37 PrintCallUni,
38 PrintConvergentCallUni,
39 CallArgBegin,
40 CallArg,
41 LastCallArg,
42 CallArgEnd,
43 CallVoid,
44 CallVal,
45 CallSymbol,
46 Prototype,
47 MoveParam,
48 PseudoUseParam,
49 RETURN,
50 CallSeqBegin,
51 CallSeqEnd,
52 CallPrototype,
53 ProxyReg,
54 FUN_SHFL_CLAMP,
55 FUN_SHFR_CLAMP,
56 MUL_WIDE_SIGNED,
57 MUL_WIDE_UNSIGNED,
58 IMAD,
59 SETP_F16X2,
60 Dummy,
62 LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
63 LoadV4,
64 LDGV2, // LDG.v2
65 LDGV4, // LDG.v4
66 LDUV2, // LDU.v2
67 LDUV4, // LDU.v4
68 StoreV2,
69 StoreV4,
70 LoadParam,
71 LoadParamV2,
72 LoadParamV4,
73 StoreParam,
74 StoreParamV2,
75 StoreParamV4,
76 StoreParamS32, // to sext and store a <32bit value, not used currently
77 StoreParamU32, // to zext and store a <32bit value, not used currently
78 StoreRetval,
79 StoreRetvalV2,
80 StoreRetvalV4,
82 // Texture intrinsics
83 Tex1DFloatS32,
84 Tex1DFloatFloat,
85 Tex1DFloatFloatLevel,
86 Tex1DFloatFloatGrad,
87 Tex1DS32S32,
88 Tex1DS32Float,
89 Tex1DS32FloatLevel,
90 Tex1DS32FloatGrad,
91 Tex1DU32S32,
92 Tex1DU32Float,
93 Tex1DU32FloatLevel,
94 Tex1DU32FloatGrad,
95 Tex1DArrayFloatS32,
96 Tex1DArrayFloatFloat,
97 Tex1DArrayFloatFloatLevel,
98 Tex1DArrayFloatFloatGrad,
99 Tex1DArrayS32S32,
100 Tex1DArrayS32Float,
101 Tex1DArrayS32FloatLevel,
102 Tex1DArrayS32FloatGrad,
103 Tex1DArrayU32S32,
104 Tex1DArrayU32Float,
105 Tex1DArrayU32FloatLevel,
106 Tex1DArrayU32FloatGrad,
107 Tex2DFloatS32,
108 Tex2DFloatFloat,
109 Tex2DFloatFloatLevel,
110 Tex2DFloatFloatGrad,
111 Tex2DS32S32,
112 Tex2DS32Float,
113 Tex2DS32FloatLevel,
114 Tex2DS32FloatGrad,
115 Tex2DU32S32,
116 Tex2DU32Float,
117 Tex2DU32FloatLevel,
118 Tex2DU32FloatGrad,
119 Tex2DArrayFloatS32,
120 Tex2DArrayFloatFloat,
121 Tex2DArrayFloatFloatLevel,
122 Tex2DArrayFloatFloatGrad,
123 Tex2DArrayS32S32,
124 Tex2DArrayS32Float,
125 Tex2DArrayS32FloatLevel,
126 Tex2DArrayS32FloatGrad,
127 Tex2DArrayU32S32,
128 Tex2DArrayU32Float,
129 Tex2DArrayU32FloatLevel,
130 Tex2DArrayU32FloatGrad,
131 Tex3DFloatS32,
132 Tex3DFloatFloat,
133 Tex3DFloatFloatLevel,
134 Tex3DFloatFloatGrad,
135 Tex3DS32S32,
136 Tex3DS32Float,
137 Tex3DS32FloatLevel,
138 Tex3DS32FloatGrad,
139 Tex3DU32S32,
140 Tex3DU32Float,
141 Tex3DU32FloatLevel,
142 Tex3DU32FloatGrad,
143 TexCubeFloatFloat,
144 TexCubeFloatFloatLevel,
145 TexCubeS32Float,
146 TexCubeS32FloatLevel,
147 TexCubeU32Float,
148 TexCubeU32FloatLevel,
149 TexCubeArrayFloatFloat,
150 TexCubeArrayFloatFloatLevel,
151 TexCubeArrayS32Float,
152 TexCubeArrayS32FloatLevel,
153 TexCubeArrayU32Float,
154 TexCubeArrayU32FloatLevel,
155 Tld4R2DFloatFloat,
156 Tld4G2DFloatFloat,
157 Tld4B2DFloatFloat,
158 Tld4A2DFloatFloat,
159 Tld4R2DS64Float,
160 Tld4G2DS64Float,
161 Tld4B2DS64Float,
162 Tld4A2DS64Float,
163 Tld4R2DU64Float,
164 Tld4G2DU64Float,
165 Tld4B2DU64Float,
166 Tld4A2DU64Float,
167 TexUnified1DFloatS32,
168 TexUnified1DFloatFloat,
169 TexUnified1DFloatFloatLevel,
170 TexUnified1DFloatFloatGrad,
171 TexUnified1DS32S32,
172 TexUnified1DS32Float,
173 TexUnified1DS32FloatLevel,
174 TexUnified1DS32FloatGrad,
175 TexUnified1DU32S32,
176 TexUnified1DU32Float,
177 TexUnified1DU32FloatLevel,
178 TexUnified1DU32FloatGrad,
179 TexUnified1DArrayFloatS32,
180 TexUnified1DArrayFloatFloat,
181 TexUnified1DArrayFloatFloatLevel,
182 TexUnified1DArrayFloatFloatGrad,
183 TexUnified1DArrayS32S32,
184 TexUnified1DArrayS32Float,
185 TexUnified1DArrayS32FloatLevel,
186 TexUnified1DArrayS32FloatGrad,
187 TexUnified1DArrayU32S32,
188 TexUnified1DArrayU32Float,
189 TexUnified1DArrayU32FloatLevel,
190 TexUnified1DArrayU32FloatGrad,
191 TexUnified2DFloatS32,
192 TexUnified2DFloatFloat,
193 TexUnified2DFloatFloatLevel,
194 TexUnified2DFloatFloatGrad,
195 TexUnified2DS32S32,
196 TexUnified2DS32Float,
197 TexUnified2DS32FloatLevel,
198 TexUnified2DS32FloatGrad,
199 TexUnified2DU32S32,
200 TexUnified2DU32Float,
201 TexUnified2DU32FloatLevel,
202 TexUnified2DU32FloatGrad,
203 TexUnified2DArrayFloatS32,
204 TexUnified2DArrayFloatFloat,
205 TexUnified2DArrayFloatFloatLevel,
206 TexUnified2DArrayFloatFloatGrad,
207 TexUnified2DArrayS32S32,
208 TexUnified2DArrayS32Float,
209 TexUnified2DArrayS32FloatLevel,
210 TexUnified2DArrayS32FloatGrad,
211 TexUnified2DArrayU32S32,
212 TexUnified2DArrayU32Float,
213 TexUnified2DArrayU32FloatLevel,
214 TexUnified2DArrayU32FloatGrad,
215 TexUnified3DFloatS32,
216 TexUnified3DFloatFloat,
217 TexUnified3DFloatFloatLevel,
218 TexUnified3DFloatFloatGrad,
219 TexUnified3DS32S32,
220 TexUnified3DS32Float,
221 TexUnified3DS32FloatLevel,
222 TexUnified3DS32FloatGrad,
223 TexUnified3DU32S32,
224 TexUnified3DU32Float,
225 TexUnified3DU32FloatLevel,
226 TexUnified3DU32FloatGrad,
227 TexUnifiedCubeFloatFloat,
228 TexUnifiedCubeFloatFloatLevel,
229 TexUnifiedCubeS32Float,
230 TexUnifiedCubeS32FloatLevel,
231 TexUnifiedCubeU32Float,
232 TexUnifiedCubeU32FloatLevel,
233 TexUnifiedCubeArrayFloatFloat,
234 TexUnifiedCubeArrayFloatFloatLevel,
235 TexUnifiedCubeArrayS32Float,
236 TexUnifiedCubeArrayS32FloatLevel,
237 TexUnifiedCubeArrayU32Float,
238 TexUnifiedCubeArrayU32FloatLevel,
239 Tld4UnifiedR2DFloatFloat,
240 Tld4UnifiedG2DFloatFloat,
241 Tld4UnifiedB2DFloatFloat,
242 Tld4UnifiedA2DFloatFloat,
243 Tld4UnifiedR2DS64Float,
244 Tld4UnifiedG2DS64Float,
245 Tld4UnifiedB2DS64Float,
246 Tld4UnifiedA2DS64Float,
247 Tld4UnifiedR2DU64Float,
248 Tld4UnifiedG2DU64Float,
249 Tld4UnifiedB2DU64Float,
250 Tld4UnifiedA2DU64Float,
252 // Surface intrinsics
253 Suld1DI8Clamp,
254 Suld1DI16Clamp,
255 Suld1DI32Clamp,
256 Suld1DI64Clamp,
257 Suld1DV2I8Clamp,
258 Suld1DV2I16Clamp,
259 Suld1DV2I32Clamp,
260 Suld1DV2I64Clamp,
261 Suld1DV4I8Clamp,
262 Suld1DV4I16Clamp,
263 Suld1DV4I32Clamp,
265 Suld1DArrayI8Clamp,
266 Suld1DArrayI16Clamp,
267 Suld1DArrayI32Clamp,
268 Suld1DArrayI64Clamp,
269 Suld1DArrayV2I8Clamp,
270 Suld1DArrayV2I16Clamp,
271 Suld1DArrayV2I32Clamp,
272 Suld1DArrayV2I64Clamp,
273 Suld1DArrayV4I8Clamp,
274 Suld1DArrayV4I16Clamp,
275 Suld1DArrayV4I32Clamp,
277 Suld2DI8Clamp,
278 Suld2DI16Clamp,
279 Suld2DI32Clamp,
280 Suld2DI64Clamp,
281 Suld2DV2I8Clamp,
282 Suld2DV2I16Clamp,
283 Suld2DV2I32Clamp,
284 Suld2DV2I64Clamp,
285 Suld2DV4I8Clamp,
286 Suld2DV4I16Clamp,
287 Suld2DV4I32Clamp,
289 Suld2DArrayI8Clamp,
290 Suld2DArrayI16Clamp,
291 Suld2DArrayI32Clamp,
292 Suld2DArrayI64Clamp,
293 Suld2DArrayV2I8Clamp,
294 Suld2DArrayV2I16Clamp,
295 Suld2DArrayV2I32Clamp,
296 Suld2DArrayV2I64Clamp,
297 Suld2DArrayV4I8Clamp,
298 Suld2DArrayV4I16Clamp,
299 Suld2DArrayV4I32Clamp,
301 Suld3DI8Clamp,
302 Suld3DI16Clamp,
303 Suld3DI32Clamp,
304 Suld3DI64Clamp,
305 Suld3DV2I8Clamp,
306 Suld3DV2I16Clamp,
307 Suld3DV2I32Clamp,
308 Suld3DV2I64Clamp,
309 Suld3DV4I8Clamp,
310 Suld3DV4I16Clamp,
311 Suld3DV4I32Clamp,
313 Suld1DI8Trap,
314 Suld1DI16Trap,
315 Suld1DI32Trap,
316 Suld1DI64Trap,
317 Suld1DV2I8Trap,
318 Suld1DV2I16Trap,
319 Suld1DV2I32Trap,
320 Suld1DV2I64Trap,
321 Suld1DV4I8Trap,
322 Suld1DV4I16Trap,
323 Suld1DV4I32Trap,
325 Suld1DArrayI8Trap,
326 Suld1DArrayI16Trap,
327 Suld1DArrayI32Trap,
328 Suld1DArrayI64Trap,
329 Suld1DArrayV2I8Trap,
330 Suld1DArrayV2I16Trap,
331 Suld1DArrayV2I32Trap,
332 Suld1DArrayV2I64Trap,
333 Suld1DArrayV4I8Trap,
334 Suld1DArrayV4I16Trap,
335 Suld1DArrayV4I32Trap,
337 Suld2DI8Trap,
338 Suld2DI16Trap,
339 Suld2DI32Trap,
340 Suld2DI64Trap,
341 Suld2DV2I8Trap,
342 Suld2DV2I16Trap,
343 Suld2DV2I32Trap,
344 Suld2DV2I64Trap,
345 Suld2DV4I8Trap,
346 Suld2DV4I16Trap,
347 Suld2DV4I32Trap,
349 Suld2DArrayI8Trap,
350 Suld2DArrayI16Trap,
351 Suld2DArrayI32Trap,
352 Suld2DArrayI64Trap,
353 Suld2DArrayV2I8Trap,
354 Suld2DArrayV2I16Trap,
355 Suld2DArrayV2I32Trap,
356 Suld2DArrayV2I64Trap,
357 Suld2DArrayV4I8Trap,
358 Suld2DArrayV4I16Trap,
359 Suld2DArrayV4I32Trap,
361 Suld3DI8Trap,
362 Suld3DI16Trap,
363 Suld3DI32Trap,
364 Suld3DI64Trap,
365 Suld3DV2I8Trap,
366 Suld3DV2I16Trap,
367 Suld3DV2I32Trap,
368 Suld3DV2I64Trap,
369 Suld3DV4I8Trap,
370 Suld3DV4I16Trap,
371 Suld3DV4I32Trap,
373 Suld1DI8Zero,
374 Suld1DI16Zero,
375 Suld1DI32Zero,
376 Suld1DI64Zero,
377 Suld1DV2I8Zero,
378 Suld1DV2I16Zero,
379 Suld1DV2I32Zero,
380 Suld1DV2I64Zero,
381 Suld1DV4I8Zero,
382 Suld1DV4I16Zero,
383 Suld1DV4I32Zero,
385 Suld1DArrayI8Zero,
386 Suld1DArrayI16Zero,
387 Suld1DArrayI32Zero,
388 Suld1DArrayI64Zero,
389 Suld1DArrayV2I8Zero,
390 Suld1DArrayV2I16Zero,
391 Suld1DArrayV2I32Zero,
392 Suld1DArrayV2I64Zero,
393 Suld1DArrayV4I8Zero,
394 Suld1DArrayV4I16Zero,
395 Suld1DArrayV4I32Zero,
397 Suld2DI8Zero,
398 Suld2DI16Zero,
399 Suld2DI32Zero,
400 Suld2DI64Zero,
401 Suld2DV2I8Zero,
402 Suld2DV2I16Zero,
403 Suld2DV2I32Zero,
404 Suld2DV2I64Zero,
405 Suld2DV4I8Zero,
406 Suld2DV4I16Zero,
407 Suld2DV4I32Zero,
409 Suld2DArrayI8Zero,
410 Suld2DArrayI16Zero,
411 Suld2DArrayI32Zero,
412 Suld2DArrayI64Zero,
413 Suld2DArrayV2I8Zero,
414 Suld2DArrayV2I16Zero,
415 Suld2DArrayV2I32Zero,
416 Suld2DArrayV2I64Zero,
417 Suld2DArrayV4I8Zero,
418 Suld2DArrayV4I16Zero,
419 Suld2DArrayV4I32Zero,
421 Suld3DI8Zero,
422 Suld3DI16Zero,
423 Suld3DI32Zero,
424 Suld3DI64Zero,
425 Suld3DV2I8Zero,
426 Suld3DV2I16Zero,
427 Suld3DV2I32Zero,
428 Suld3DV2I64Zero,
429 Suld3DV4I8Zero,
430 Suld3DV4I16Zero,
431 Suld3DV4I32Zero
435 class NVPTXSubtarget;
437 //===--------------------------------------------------------------------===//
438 // TargetLowering Implementation
439 //===--------------------------------------------------------------------===//
440 class NVPTXTargetLowering : public TargetLowering {
441 public:
442 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
443 const NVPTXSubtarget &STI);
444 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
446 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
448 const char *getTargetNodeName(unsigned Opcode) const override;
450 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
451 MachineFunction &MF,
452 unsigned Intrinsic) const override;
454 /// isLegalAddressingMode - Return true if the addressing mode represented
455 /// by AM is legal for this target, for a load/store of the specified type
456 /// Used to guide target specific optimizations, like loop strength
457 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
458 /// address mode (CodeGenPrepare.cpp)
459 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
460 unsigned AS,
461 Instruction *I = nullptr) const override;
463 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
464 // Truncating 64-bit to 32-bit is free in SASS.
465 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
466 return false;
467 return SrcTy->getPrimitiveSizeInBits() == 64 &&
468 DstTy->getPrimitiveSizeInBits() == 32;
471 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
472 EVT VT) const override {
473 if (VT.isVector())
474 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
475 return MVT::i1;
478 ConstraintType getConstraintType(StringRef Constraint) const override;
479 std::pair<unsigned, const TargetRegisterClass *>
480 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
481 StringRef Constraint, MVT VT) const override;
483 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
484 bool isVarArg,
485 const SmallVectorImpl<ISD::InputArg> &Ins,
486 const SDLoc &dl, SelectionDAG &DAG,
487 SmallVectorImpl<SDValue> &InVals) const override;
489 SDValue LowerCall(CallLoweringInfo &CLI,
490 SmallVectorImpl<SDValue> &InVals) const override;
492 std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
493 const SmallVectorImpl<ISD::OutputArg> &,
494 unsigned retAlignment,
495 ImmutableCallSite CS) const;
497 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
498 const SmallVectorImpl<ISD::OutputArg> &Outs,
499 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
500 SelectionDAG &DAG) const override;
502 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
503 std::vector<SDValue> &Ops,
504 SelectionDAG &DAG) const override;
506 const NVPTXTargetMachine *nvTM;
508 // PTX always uses 32-bit shift amounts
509 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
510 return MVT::i32;
513 TargetLoweringBase::LegalizeTypeAction
514 getPreferredVectorAction(MVT VT) const override;
516 // Get the degree of precision we want from 32-bit floating point division
517 // operations.
519 // 0 - Use ptx div.approx
520 // 1 - Use ptx.div.full (approximate, but less so than div.approx)
521 // 2 - Use IEEE-compliant div instructions, if available.
522 int getDivF32Level() const;
524 // Get whether we should use a precise or approximate 32-bit floating point
525 // sqrt instruction.
526 bool usePrecSqrtF32() const;
528 // Get whether we should use instructions that flush floating-point denormals
529 // to sign-preserving zero.
530 bool useF32FTZ(const MachineFunction &MF) const;
532 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
533 int &ExtraSteps, bool &UseOneConst,
534 bool Reciprocal) const override;
536 unsigned combineRepeatedFPDivisors() const override { return 2; }
538 bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
539 bool allowUnsafeFPMath(MachineFunction &MF) const;
541 bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
543 bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
545 // The default is to transform llvm.ctlz(x, false) (where false indicates that
546 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
547 // and avoids calling ctlz in that case. We have a dedicated ctlz
548 // instruction, so we say that ctlz is cheap to speculate.
549 bool isCheapToSpeculateCtlz() const override { return true; }
551 private:
552 const NVPTXSubtarget &STI; // cache the subtarget here
553 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
555 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
556 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
557 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
559 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
560 SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
561 SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
563 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
564 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
566 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
567 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
568 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
570 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
571 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
573 SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
575 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
576 SelectionDAG &DAG) const override;
577 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
579 unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty,
580 unsigned Idx, const DataLayout &DL) const;
582 } // namespace llvm
584 #endif