1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10 float f = (float)N->getValueAPF().convertToFloat();
14 def immFloat1 : PatLeaf<(fpimm), [{
15 float f = (float)N->getValueAPF().convertToFloat();
19 def immDouble0 : PatLeaf<(fpimm), [{
20 double d = (double)N->getValueAPF().convertToDouble();
24 def immDouble1 : PatLeaf<(fpimm), [{
25 double d = (double)N->getValueAPF().convertToDouble();
31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
41 // A node that will be replaced with the current PTX version.
43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
46 // (i32 0) will be XForm'ed to the currently used PTX version.
47 dag version = (PTXVerXform (i32 0));
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54 list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
55 [prefix # !add(n, -1)]),
59 class THREADMASK_INFO<bit sync> {
60 list<bit> ret = !if(sync, [0,1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = 1 in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
69 [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
72 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74 "bar.sync \t$src1, $src2;",
75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
78 ".reg .pred \t%p1; \n\t",
79 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
85 ".reg .pred \t%p1; \n\t",
86 ".reg .pred \t%p2; \n\t",
87 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
94 ".reg .pred \t%p1; \n\t",
95 ".reg .pred \t%p2; \n\t",
96 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103 [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106 [(int_nvvm_bar_warp_sync imm:$i)]>,
107 Requires<[hasPTX60, hasSM30]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110 Requires<[hasPTX60, hasSM30]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113 [(int_nvvm_barrier_sync imm:$i)]>,
114 Requires<[hasPTX60, hasSM30]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116 [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117 Requires<[hasPTX60, hasSM30]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120 "barrier.sync \t$id, $cnt;",
121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122 Requires<[hasPTX60, hasSM30]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124 "barrier.sync \t$id, $cnt;",
125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126 Requires<[hasPTX60, hasSM30]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128 "barrier.sync \t$id, $cnt;",
129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130 Requires<[hasPTX60, hasSM30]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132 "barrier.sync \t$id, $cnt;",
133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134 Requires<[hasPTX60, hasSM30]>;
136 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137 bit offset_imm, bit mask_imm, bit threadmask_imm>
138 : NVPTXInst<(outs), (ins), "?", []> {
139 NVPTXRegClass rc = !cond(
140 !eq(reg, "i32"): Int32Regs,
141 !eq(reg, "f32"): Float32Regs);
142 string IntrName = "int_nvvm_shfl_"
143 # !if(sync, "sync_", "")
146 # !if(return_pred, "p", "");
147 Intrinsic Intr = !cast<Intrinsic>(IntrName);
148 let InOperandList = !con(
150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157 let AsmString = "shfl."
158 # !if(sync, "sync.", "")
161 # !if(return_pred, "|$pred", "") # ", "
162 # "$src, $offset, $mask"
163 # !if(sync, ", $threadmask", "")
167 !foreach(tmp, OutOperandList,
169 !subst(i32imm, imm, tmp))),
170 (set !foreach(tmp, InOperandList,
172 !subst(i32imm, imm, tmp))))
176 foreach sync = [0, 1] in {
177 foreach mode = ["up", "down", "bfly", "idx"] in {
178 foreach regclass = ["i32", "f32"] in {
179 foreach return_pred = [0, 1] in {
180 foreach offset_imm = [0, 1] in {
181 foreach mask_imm = [0, 1] in {
182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183 def : SHFL_INSTR<sync, mode, regclass, return_pred,
184 offset_imm, mask_imm, threadmask_imm>,
185 Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
194 // vote.{all,any,uni,ballot}
195 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197 "vote." # mode # " \t$dest, $pred;",
198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199 Requires<[hasPTX60, hasSM30]>;
202 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
207 // vote.sync.{all,any,uni,ballot}
208 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210 "vote.sync." # mode # " \t$dest, $pred, $mask;",
211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212 Requires<[hasPTX60, hasSM30]>;
213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214 "vote.sync." # mode #" \t$dest, $pred, $mask;",
215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216 Requires<[hasPTX60, hasSM30]>;
219 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
224 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
226 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229 Requires<[hasPTX60, hasSM70]>;
230 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233 Requires<[hasPTX60, hasSM70]>;
234 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237 Requires<[hasPTX60, hasSM70]>;
238 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241 Requires<[hasPTX60, hasSM70]>;
244 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
246 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
249 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
251 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252 (ins i32imm:$mask, ImmOp:$value),
253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255 Requires<[hasPTX60, hasSM70]>;
256 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257 (ins Int32Regs:$mask, ImmOp:$value),
258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260 Requires<[hasPTX60, hasSM70]>;
261 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262 (ins i32imm:$mask, regclass:$value),
263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265 Requires<[hasPTX60, hasSM70]>;
266 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267 (ins Int32Regs:$mask, regclass:$value),
268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270 Requires<[hasPTX60, hasSM70]>;
272 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
274 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
277 } // isConvergent = 1
279 //-----------------------------------
280 // Explicit Memory Fence Functions
281 //-----------------------------------
282 class MEMBAR<string StrOp, Intrinsic IntOP> :
283 NVPTXInst<(outs), (ins),
286 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
287 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
288 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
291 //-----------------------------------
293 //-----------------------------------
295 // Map min(1.0, max(0.0, x)) to sat(x)
296 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
298 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
299 // Same story for fmax, fmin.
301 def : Pat<(int_nvvm_fmin_f immFloat1,
302 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
303 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
304 def : Pat<(int_nvvm_fmin_f immFloat1,
305 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
306 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
307 def : Pat<(int_nvvm_fmin_f
308 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
309 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
310 def : Pat<(int_nvvm_fmin_f
311 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
312 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
314 def : Pat<(int_nvvm_fmin_d immDouble1,
315 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
316 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
317 def : Pat<(int_nvvm_fmin_d immDouble1,
318 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
319 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
320 def : Pat<(int_nvvm_fmin_d
321 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
322 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
323 def : Pat<(int_nvvm_fmin_d
324 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
325 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
328 // We need a full string for OpcStr here because we need to deal with case like
330 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
331 NVPTXRegClass src_regclass, Intrinsic IntOP>
332 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
334 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
336 // We need a full string for OpcStr here because we need to deal with the case
337 // like INT_PTX_NATIVE_POWR_F.
338 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
339 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
340 : NVPTXInst<(outs t_regclass:$dst),
341 (ins s0_regclass:$src0, s1_regclass:$src1),
343 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
345 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
346 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
347 NVPTXRegClass s2_regclass, Intrinsic IntOP>
348 : NVPTXInst<(outs t_regclass:$dst),
349 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
351 [(set t_regclass:$dst,
352 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
358 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
359 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
365 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
366 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
367 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
368 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
370 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
371 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
372 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
373 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
375 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
376 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
377 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
378 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
385 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
386 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
387 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
388 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
390 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
391 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
392 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
393 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
395 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
396 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
397 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
398 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
399 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
400 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
401 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
402 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
403 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
404 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
405 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
406 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
407 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
408 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
409 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
410 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
412 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
413 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
414 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
415 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
416 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
417 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
418 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
419 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
421 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
422 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
423 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
424 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
430 def INT_NVVM_DIV_APPROX_FTZ_F
431 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
432 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
433 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
434 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
436 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
437 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
438 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
439 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
440 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
441 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
442 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
443 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
444 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
445 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
446 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
447 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
448 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
449 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
450 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
451 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
453 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
454 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
455 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
456 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
457 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
458 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
459 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
460 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
466 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
467 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
468 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
469 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
475 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
476 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
477 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
478 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
479 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
480 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
482 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
483 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
484 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
485 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
486 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
487 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
493 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
494 Float32Regs, int_nvvm_fabs_ftz_f>;
495 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
496 Float32Regs, int_nvvm_fabs_f>;
498 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
499 Float64Regs, int_nvvm_fabs_d>;
505 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
506 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
507 def : Pat<(int_nvvm_round_f Float32Regs:$a),
508 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
509 def : Pat<(int_nvvm_round_d Float64Regs:$a),
510 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
516 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
517 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
518 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
519 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
520 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
521 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
527 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
528 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
529 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
532 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
539 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
540 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
541 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
542 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
543 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
545 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
546 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
547 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
548 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
549 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
550 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
556 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
557 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
558 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
559 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
561 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
562 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
563 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
564 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
570 def INT_NVVM_FMA_RN_FTZ_F
571 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
572 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
573 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
574 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
575 def INT_NVVM_FMA_RZ_FTZ_F
576 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
577 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
578 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
579 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
580 def INT_NVVM_FMA_RM_FTZ_F
581 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
582 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
583 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
584 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
585 def INT_NVVM_FMA_RP_FTZ_F
586 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
587 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
588 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
589 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
591 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
592 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
593 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
594 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
595 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
596 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
597 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
598 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
604 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
605 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
606 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
607 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
608 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
609 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
610 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
611 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
612 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
613 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
614 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
615 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
616 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
617 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
618 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
619 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
621 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
622 Float64Regs, int_nvvm_rcp_rn_d>;
623 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
624 Float64Regs, int_nvvm_rcp_rz_d>;
625 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
626 Float64Regs, int_nvvm_rcp_rm_d>;
627 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
628 Float64Regs, int_nvvm_rcp_rp_d>;
630 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
631 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
637 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
638 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
639 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
640 Float32Regs, int_nvvm_sqrt_rn_f>;
641 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
642 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
643 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
644 Float32Regs, int_nvvm_sqrt_rz_f>;
645 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
646 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
647 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
648 Float32Regs, int_nvvm_sqrt_rm_f>;
649 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
650 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
651 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
652 Float32Regs, int_nvvm_sqrt_rp_f>;
653 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
654 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
655 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
656 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
658 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
659 Float64Regs, int_nvvm_sqrt_rn_d>;
660 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
661 Float64Regs, int_nvvm_sqrt_rz_d>;
662 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
663 Float64Regs, int_nvvm_sqrt_rm_d>;
664 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
665 Float64Regs, int_nvvm_sqrt_rp_d>;
667 // nvvm_sqrt intrinsic
668 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
669 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
670 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
671 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
672 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
673 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
674 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
675 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
681 def INT_NVVM_RSQRT_APPROX_FTZ_F
682 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
683 int_nvvm_rsqrt_approx_ftz_f>;
684 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
685 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
686 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
687 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
693 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
694 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
695 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
696 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
697 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
698 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
699 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
700 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
701 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
702 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
703 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
704 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
705 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
706 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
707 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
708 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
710 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
711 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
712 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
713 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
714 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
715 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
716 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
717 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
723 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
724 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
725 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
726 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
727 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
728 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
729 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
730 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
731 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
732 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
733 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
734 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
735 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
736 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
737 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
738 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
740 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
741 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
742 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
743 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
744 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
745 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
746 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
747 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
749 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
750 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
751 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
752 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
753 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
754 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
755 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
756 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
758 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
759 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
760 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
761 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
762 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
763 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
764 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
765 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
767 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
768 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
769 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
770 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
771 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
772 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
773 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
774 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
776 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
777 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
778 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
779 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
780 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
781 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
782 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
783 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
784 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
785 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
786 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
787 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
788 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
789 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
790 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
791 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
793 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
794 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
795 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
796 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
797 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
798 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
799 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
800 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
801 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
802 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
803 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
804 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
805 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
806 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
807 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
808 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
810 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
811 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
812 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
813 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
814 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
815 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
816 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
817 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
819 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
820 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
821 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
822 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
823 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
824 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
825 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
826 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
828 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
829 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
831 def INT_NVVM_D2I_LO : F_MATH_1<
833 ".reg .b32 %temp; \n\t",
834 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
836 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
837 def INT_NVVM_D2I_HI : F_MATH_1<
839 ".reg .b32 %temp; \n\t",
840 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
842 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
844 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
845 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
846 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
847 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
848 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
849 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
850 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
851 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
852 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
853 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
854 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
855 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
856 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
857 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
858 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
859 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
861 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
862 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
863 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
864 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
865 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
866 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
867 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
868 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
869 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
870 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
871 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
872 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
873 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
874 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
875 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
876 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
878 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
879 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
880 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
881 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
882 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
883 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
884 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
885 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
887 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
888 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
889 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
890 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
891 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
892 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
893 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
894 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
896 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
897 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
898 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
899 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
900 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
901 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
902 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
903 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
905 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
906 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
907 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
908 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
909 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
910 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
911 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
912 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
914 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
915 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
916 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
917 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
918 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
919 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
920 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
921 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
923 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
924 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
925 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
926 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
927 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
928 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
929 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
930 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
933 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
934 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
935 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
936 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
942 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
943 Float32Regs, int_nvvm_bitcast_f2i>;
944 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
945 Int32Regs, int_nvvm_bitcast_i2f>;
947 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
948 Int64Regs, int_nvvm_bitcast_ll2d>;
949 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
950 Float64Regs, int_nvvm_bitcast_d2ll>;
956 class INT_FNS_MBO<dag ins, dag Operands>
957 : NVPTXInst<(outs Int32Regs:$dst), ins,
958 "fns.b32 \t$dst, $mask, $base, $offset;",
959 [(set Int32Regs:$dst, Operands )]>,
960 Requires<[hasPTX60, hasSM30]>;
962 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
963 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
964 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
965 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
966 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
967 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
968 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
969 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
970 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
971 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
972 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
973 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
974 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
975 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
976 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
977 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
979 //-----------------------------------
981 //-----------------------------------
983 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
984 : PatFrag<ops, frag, AS_match.global>;
985 class ATOMIC_SHARED_CHK <dag ops, dag frag>
986 : PatFrag<ops, frag, AS_match.shared>;
987 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
988 : PatFrag<ops, frag, AS_match.generic>;
990 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
991 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
992 Operand IMMType, SDNode IMM, list<Predicate> Pred> {
993 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
994 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
995 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
997 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
998 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
999 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1002 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1003 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1004 list<Predicate> Pred = []> {
1005 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1006 IntOp, IMMType, IMM, Pred>;
1007 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1008 IntOp, IMMType, IMM, Pred>;
1011 // has 2 operands, neg the second one
1012 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1013 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1014 Operand IMMType, list<Predicate> Pred> {
1015 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1018 ".reg \t.s", TypeStr, " temp; \n\t",
1019 "neg.s", TypeStr, " \ttemp, $b; \n\t",
1020 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1022 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1025 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1026 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1027 list<Predicate> Pred = []> {
1028 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1029 IntOp, IMMType, Pred> ;
1030 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1031 IntOp, IMMType, Pred> ;
1035 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1036 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1037 Operand IMMType, list<Predicate> Pred> {
1038 def reg : NVPTXInst<(outs regclass:$dst),
1039 (ins ptrclass:$addr, regclass:$b, regclass:$c),
1040 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1041 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1044 def imm1 : NVPTXInst<(outs regclass:$dst),
1045 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1046 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1047 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1050 def imm2 : NVPTXInst<(outs regclass:$dst),
1051 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1052 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1053 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1056 def imm3 : NVPTXInst<(outs regclass:$dst),
1057 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1058 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1059 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1062 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1063 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1064 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1065 IntOp, IMMType, Pred>;
1066 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1067 IntOp, IMMType, Pred>;
1072 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1073 (atomic_load_add_32 node:$a, node:$b)>;
1074 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1075 (atomic_load_add_32 node:$a, node:$b)>;
1076 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1077 (atomic_load_add_32 node:$a, node:$b)>;
1078 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1079 (atomic_load_add_64 node:$a, node:$b)>;
1080 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1081 (atomic_load_add_64 node:$a, node:$b)>;
1082 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1083 (atomic_load_add_64 node:$a, node:$b)>;
1084 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1085 (atomic_load_fadd node:$a, node:$b)>;
1086 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1087 (atomic_load_fadd node:$a, node:$b)>;
1088 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1089 (atomic_load_fadd node:$a, node:$b)>;
1091 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1092 atomic_load_add_32_g, i32imm, imm>;
1093 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1094 atomic_load_add_32_s, i32imm, imm>;
1095 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1096 atomic_load_add_32_gen, i32imm, imm>;
1097 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1098 ".add", atomic_load_add_32_gen, i32imm, imm>;
1100 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1101 atomic_load_add_64_g, i64imm, imm>;
1102 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1103 atomic_load_add_64_s, i64imm, imm>;
1104 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1105 atomic_load_add_64_gen, i64imm, imm>;
1106 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1107 ".add", atomic_load_add_64_gen, i64imm, imm>;
1109 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1110 atomic_load_add_g, f32imm, fpimm>;
1111 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1112 atomic_load_add_s, f32imm, fpimm>;
1113 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1114 atomic_load_add_gen, f32imm, fpimm>;
1116 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1117 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1118 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1119 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1120 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1121 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1125 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126 (atomic_load_sub_32 node:$a, node:$b)>;
1127 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128 (atomic_load_sub_32 node:$a, node:$b)>;
1129 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130 (atomic_load_sub_32 node:$a, node:$b)>;
1131 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132 (atomic_load_sub_64 node:$a, node:$b)>;
1133 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134 (atomic_load_sub_64 node:$a, node:$b)>;
1135 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136 (atomic_load_sub_64 node:$a, node:$b)>;
1138 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1139 atomic_load_sub_32_g, i32imm>;
1140 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1141 atomic_load_sub_64_g, i64imm>;
1142 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1143 atomic_load_sub_32_gen, i32imm>;
1144 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1145 ".add", atomic_load_sub_32_gen, i32imm>;
1146 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1147 atomic_load_sub_32_s, i32imm>;
1148 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1149 atomic_load_sub_64_s, i64imm>;
1150 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1151 atomic_load_sub_64_gen, i64imm>;
1152 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1153 ".add", atomic_load_sub_64_gen, i64imm>;
1157 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1158 (atomic_swap_32 node:$a, node:$b)>;
1159 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1160 (atomic_swap_32 node:$a, node:$b)>;
1161 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1162 (atomic_swap_32 node:$a, node:$b)>;
1163 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1164 (atomic_swap_64 node:$a, node:$b)>;
1165 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1166 (atomic_swap_64 node:$a, node:$b)>;
1167 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1168 (atomic_swap_64 node:$a, node:$b)>;
1170 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1171 atomic_swap_32_g, i32imm, imm>;
1172 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1173 atomic_swap_32_s, i32imm, imm>;
1174 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1175 atomic_swap_32_gen, i32imm, imm>;
1176 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1177 ".exch", atomic_swap_32_gen, i32imm, imm>;
1178 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1179 atomic_swap_64_g, i64imm, imm>;
1180 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1181 atomic_swap_64_s, i64imm, imm>;
1182 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1183 atomic_swap_64_gen, i64imm, imm>;
1184 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1185 ".exch", atomic_swap_64_gen, i64imm, imm>;
1189 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1190 , (atomic_load_max_32 node:$a, node:$b)>;
1191 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1192 (atomic_load_max_32 node:$a, node:$b)>;
1193 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1194 (atomic_load_max_32 node:$a, node:$b)>;
1195 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1196 , (atomic_load_max_64 node:$a, node:$b)>;
1197 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1198 (atomic_load_max_64 node:$a, node:$b)>;
1199 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1200 (atomic_load_max_64 node:$a, node:$b)>;
1201 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1202 (atomic_load_umax_32 node:$a, node:$b)>;
1203 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1204 (atomic_load_umax_32 node:$a, node:$b)>;
1205 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1206 (atomic_load_umax_32 node:$a, node:$b)>;
1207 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1208 (atomic_load_umax_64 node:$a, node:$b)>;
1209 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1210 (atomic_load_umax_64 node:$a, node:$b)>;
1211 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1212 (atomic_load_umax_64 node:$a, node:$b)>;
1214 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1215 ".max", atomic_load_max_32_g, i32imm, imm>;
1216 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1217 ".max", atomic_load_max_32_s, i32imm, imm>;
1218 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1219 atomic_load_max_32_gen, i32imm, imm>;
1220 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1221 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1222 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1223 ".max", atomic_load_max_64_g, i64imm, imm>;
1224 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1225 ".max", atomic_load_max_64_s, i64imm, imm>;
1226 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1227 atomic_load_max_64_gen, i64imm, imm>;
1228 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1229 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1230 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1231 ".max", atomic_load_umax_32_g, i32imm, imm>;
1232 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1233 ".max", atomic_load_umax_32_s, i32imm, imm>;
1234 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1235 atomic_load_umax_32_gen, i32imm, imm>;
1236 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1237 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1238 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1239 ".max", atomic_load_umax_64_g, i64imm, imm>;
1240 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1241 ".max", atomic_load_umax_64_s, i64imm, imm>;
1242 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1243 atomic_load_umax_64_gen, i64imm, imm>;
1244 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1245 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1249 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1250 (atomic_load_min_32 node:$a, node:$b)>;
1251 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1252 (atomic_load_min_32 node:$a, node:$b)>;
1253 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1254 (atomic_load_min_32 node:$a, node:$b)>;
1255 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1256 (atomic_load_min_64 node:$a, node:$b)>;
1257 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1258 (atomic_load_min_64 node:$a, node:$b)>;
1259 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1260 (atomic_load_min_64 node:$a, node:$b)>;
1261 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1262 (atomic_load_umin_32 node:$a, node:$b)>;
1263 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1264 (atomic_load_umin_32 node:$a, node:$b)>;
1265 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1266 (atomic_load_umin_32 node:$a, node:$b)>;
1267 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1268 (atomic_load_umin_64 node:$a, node:$b)>;
1269 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1270 (atomic_load_umin_64 node:$a, node:$b)>;
1271 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1272 (atomic_load_umin_64 node:$a, node:$b)>;
1274 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1275 ".min", atomic_load_min_32_g, i32imm, imm>;
1276 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1277 ".min", atomic_load_min_32_s, i32imm, imm>;
1278 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1279 atomic_load_min_32_gen, i32imm, imm>;
1280 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1281 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1282 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1283 ".min", atomic_load_min_64_g, i64imm, imm>;
1284 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1285 ".min", atomic_load_min_64_s, i64imm, imm>;
1286 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1287 atomic_load_min_64_gen, i64imm, imm>;
1288 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1289 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1290 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1291 ".min", atomic_load_umin_32_g, i32imm, imm>;
1292 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1293 ".min", atomic_load_umin_32_s, i32imm, imm>;
1294 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1295 atomic_load_umin_32_gen, i32imm, imm>;
1296 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1297 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1298 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1299 ".min", atomic_load_umin_64_g, i64imm, imm>;
1300 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1301 ".min", atomic_load_umin_64_s, i64imm, imm>;
1302 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1303 atomic_load_umin_64_gen, i64imm, imm>;
1304 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1305 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1307 // atom_inc atom_dec
1309 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1310 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1311 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1312 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1313 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1314 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1315 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1316 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1317 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1318 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1319 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1320 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1322 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1323 atomic_load_inc_32_g, i32imm, imm>;
1324 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1325 atomic_load_inc_32_s, i32imm, imm>;
1326 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1327 atomic_load_inc_32_gen, i32imm, imm>;
1328 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1329 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1330 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1331 atomic_load_dec_32_g, i32imm, imm>;
1332 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1333 atomic_load_dec_32_s, i32imm, imm>;
1334 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1335 atomic_load_dec_32_gen, i32imm, imm>;
1336 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1337 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1341 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1342 (atomic_load_and_32 node:$a, node:$b)>;
1343 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1344 (atomic_load_and_32 node:$a, node:$b)>;
1345 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1346 (atomic_load_and_32 node:$a, node:$b)>;
1347 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1348 (atomic_load_and_64 node:$a, node:$b)>;
1349 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1350 (atomic_load_and_64 node:$a, node:$b)>;
1351 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1352 (atomic_load_and_64 node:$a, node:$b)>;
1354 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1355 atomic_load_and_32_g, i32imm, imm>;
1356 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1357 atomic_load_and_32_s, i32imm, imm>;
1358 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1359 atomic_load_and_32_gen, i32imm, imm>;
1360 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1361 ".and", atomic_load_and_32_gen, i32imm, imm>;
1362 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1363 atomic_load_and_64_g, i64imm, imm>;
1364 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1365 atomic_load_and_64_s, i64imm, imm>;
1366 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1367 atomic_load_and_64_gen, i64imm, imm>;
1368 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1369 ".and", atomic_load_and_64_gen, i64imm, imm>;
1373 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1374 (atomic_load_or_32 node:$a, node:$b)>;
1375 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1376 (atomic_load_or_32 node:$a, node:$b)>;
1377 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1378 (atomic_load_or_32 node:$a, node:$b)>;
1379 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1380 (atomic_load_or_64 node:$a, node:$b)>;
1381 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1382 (atomic_load_or_64 node:$a, node:$b)>;
1383 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1384 (atomic_load_or_64 node:$a, node:$b)>;
1386 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1387 atomic_load_or_32_g, i32imm, imm>;
1388 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1389 atomic_load_or_32_gen, i32imm, imm>;
1390 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1391 ".or", atomic_load_or_32_gen, i32imm, imm>;
1392 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1393 atomic_load_or_32_s, i32imm, imm>;
1394 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1395 atomic_load_or_64_g, i64imm, imm>;
1396 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1397 atomic_load_or_64_gen, i64imm, imm>;
1398 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1399 ".or", atomic_load_or_64_gen, i64imm, imm>;
1400 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1401 atomic_load_or_64_s, i64imm, imm>;
1405 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1406 (atomic_load_xor_32 node:$a, node:$b)>;
1407 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1408 (atomic_load_xor_32 node:$a, node:$b)>;
1409 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1410 (atomic_load_xor_32 node:$a, node:$b)>;
1411 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1412 (atomic_load_xor_64 node:$a, node:$b)>;
1413 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1414 (atomic_load_xor_64 node:$a, node:$b)>;
1415 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1416 (atomic_load_xor_64 node:$a, node:$b)>;
1418 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1419 atomic_load_xor_32_g, i32imm, imm>;
1420 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1421 atomic_load_xor_32_s, i32imm, imm>;
1422 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1423 atomic_load_xor_32_gen, i32imm, imm>;
1424 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1425 ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1426 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1427 atomic_load_xor_64_g, i64imm, imm>;
1428 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1429 atomic_load_xor_64_s, i64imm, imm>;
1430 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1431 atomic_load_xor_64_gen, i64imm, imm>;
1432 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1433 ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1437 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1438 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1439 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1440 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1441 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1442 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1443 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1444 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1445 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1446 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1447 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1448 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1450 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1451 atomic_cmp_swap_32_g, i32imm>;
1452 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1453 atomic_cmp_swap_32_s, i32imm>;
1454 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1455 atomic_cmp_swap_32_gen, i32imm>;
1456 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1457 ".cas", atomic_cmp_swap_32_gen, i32imm>;
1458 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1459 atomic_cmp_swap_64_g, i64imm>;
1460 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1461 atomic_cmp_swap_64_s, i64imm>;
1462 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1463 atomic_cmp_swap_64_gen, i64imm>;
1464 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1465 ".cas", atomic_cmp_swap_64_gen, i64imm>;
1467 // Support for scoped atomic operations. Matches
1468 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1469 // and converts it into the appropriate instruction.
1470 // NOTE: not all possible combinations are implemented
1471 // 'space' is limited to generic as it's the only one needed to support CUDA.
1472 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1473 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1474 dag ins, dag Operands>
1475 : NVPTXInst<(outs regclass:$result), ins,
1477 [(set regclass:$result, Operands)]>,
1480 // Define instruction variants for all addressing modes.
1481 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1482 NVPTXRegClass regclass, Operand ImmType,
1483 SDNode Imm, ValueType ImmTy,
1484 list<Predicate> Preds> {
1485 let AddedComplexity = 1 in {
1486 def : ATOM23_impl<AsmStr, regclass, Preds,
1487 (ins Int32Regs:$src, regclass:$b),
1488 (Intr Int32Regs:$src, regclass:$b)>;
1489 def : ATOM23_impl<AsmStr, regclass, Preds,
1490 (ins Int64Regs:$src, regclass:$b),
1491 (Intr Int64Regs:$src, regclass:$b)>;
1493 // tablegen can't infer argument types from Intrinsic (though it can
1494 // from Instruction) so we have to enforce specific type on
1495 // immediates via explicit cast to ImmTy.
1496 def : ATOM23_impl<AsmStr, regclass, Preds,
1497 (ins Int32Regs:$src, ImmType:$b),
1498 (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1499 def : ATOM23_impl<AsmStr, regclass, Preds,
1500 (ins Int64Regs:$src, ImmType:$b),
1501 (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1504 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1505 NVPTXRegClass regclass, Operand ImmType,
1506 SDNode Imm, ValueType ImmTy,
1507 list<Predicate> Preds> {
1508 // Variants for register/immediate permutations of $b and $c
1509 let AddedComplexity = 2 in {
1510 def : ATOM23_impl<AsmStr, regclass, Preds,
1511 (ins Int32Regs:$src, regclass:$b, regclass:$c),
1512 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1513 def : ATOM23_impl<AsmStr, regclass, Preds,
1514 (ins Int64Regs:$src, regclass:$b, regclass:$c),
1515 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1517 let AddedComplexity = 1 in {
1518 def : ATOM23_impl<AsmStr, regclass, Preds,
1519 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1520 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1521 def : ATOM23_impl<AsmStr, regclass, Preds,
1522 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1523 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1524 def : ATOM23_impl<AsmStr, regclass, Preds,
1525 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1526 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1527 def : ATOM23_impl<AsmStr, regclass, Preds,
1528 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1529 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1531 def : ATOM23_impl<AsmStr, regclass, Preds,
1532 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1533 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1534 def : ATOM23_impl<AsmStr, regclass, Preds,
1535 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1536 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1539 // Constructs instrinsic name and instruction asm strings.
1540 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1541 string ScopeStr, string SpaceStr,
1542 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1543 ValueType ImmTy, list<Predicate> Preds> {
1544 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1545 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1546 # "." # OpStr # "." # TypeStr
1547 # " \t$result, [$src], $b;",
1549 "int_nvvm_atomic_" # OpStr
1550 # "_" # SpaceStr # "_" # IntTypeStr
1551 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1552 regclass, ImmType, Imm, ImmTy, Preds>;
1554 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1555 string ScopeStr, string SpaceStr,
1556 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1557 ValueType ImmTy, list<Predicate> Preds> {
1558 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1559 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1560 # "." # OpStr # "." # TypeStr
1561 # " \t$result, [$src], $b, $c;",
1563 "int_nvvm_atomic_" # OpStr
1564 # "_" # SpaceStr # "_" # IntTypeStr
1565 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1566 regclass, ImmType, Imm, ImmTy, Preds>;
1569 // Constructs variants for different address spaces.
1570 // For now we only need variants for generic space pointers.
1571 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1572 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1573 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1574 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1575 regclass, ImmType, Imm, ImmTy, Preds>;
1577 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1578 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1579 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1580 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1581 regclass, ImmType, Imm, ImmTy, Preds>;
1584 // Constructs variants for different scopes of atomic op.
1585 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1586 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1587 ValueType ImmTy, list<Predicate> Preds> {
1588 // .gpu scope is default and is currently covered by existing
1589 // atomics w/o explicitly specified scope.
1590 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1591 regclass, ImmType, Imm, ImmTy,
1592 !listconcat(Preds,[hasAtomScope])>;
1593 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1594 regclass, ImmType, Imm, ImmTy,
1595 !listconcat(Preds,[hasAtomScope])>;
1597 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1598 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1599 list<Predicate> Preds> {
1600 // No need to define ".gpu"-scoped atomics. They do the same thing
1601 // as the regular, non-scoped atomics defined elsewhere.
1602 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1603 regclass, ImmType, Imm, ImmTy,
1604 !listconcat(Preds,[hasAtomScope])>;
1605 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1606 regclass, ImmType, Imm, ImmTy,
1607 !listconcat(Preds,[hasAtomScope])>;
1611 multiclass ATOM2_add_impl<string OpStr> {
1612 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1613 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1614 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1615 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1617 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1621 // atom.{and,or,xor}
1622 multiclass ATOM2_bitwise_impl<string OpStr> {
1623 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1624 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1625 [hasAtomBitwise64]>;
1629 multiclass ATOM2_exch_impl<string OpStr> {
1630 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1631 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1635 multiclass ATOM2_minmax_impl<string OpStr> {
1636 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1637 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1638 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1640 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1645 multiclass ATOM2_incdec_impl<string OpStr> {
1646 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1650 multiclass ATOM3_cas_impl<string OpStr> {
1651 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1652 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1655 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1656 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1657 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1658 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1659 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1660 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1661 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1662 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1663 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1664 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1666 //-----------------------------------
1667 // Support for ldu on sm_20 or later
1668 //-----------------------------------
1670 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1671 // read-only in a kernel.
1675 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1676 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1677 !strconcat("ldu.global.", TyStr),
1678 []>, Requires<[hasLDU]>;
1679 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1680 !strconcat("ldu.global.", TyStr),
1681 []>, Requires<[hasLDU]>;
1682 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1683 !strconcat("ldu.global.", TyStr),
1684 []>, Requires<[hasLDU]>;
1685 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1686 !strconcat("ldu.global.", TyStr),
1687 []>, Requires<[hasLDU]>;
1688 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1689 !strconcat("ldu.global.", TyStr),
1690 []>, Requires<[hasLDU]>;
1693 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1694 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1695 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1696 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1697 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1698 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1699 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1700 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1701 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1702 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1706 // Elementized vector ldu
1707 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1708 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1709 (ins Int32Regs:$src),
1710 !strconcat("ldu.global.", TyStr), []>;
1711 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1712 (ins Int64Regs:$src),
1713 !strconcat("ldu.global.", TyStr), []>;
1714 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1716 !strconcat("ldu.global.", TyStr), []>;
1717 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1719 !strconcat("ldu.global.", TyStr), []>;
1720 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1722 !strconcat("ldu.global.", TyStr), []>;
1725 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1726 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1727 regclass:$dst4), (ins Int32Regs:$src),
1728 !strconcat("ldu.global.", TyStr), []>;
1729 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1730 regclass:$dst4), (ins Int64Regs:$src),
1731 !strconcat("ldu.global.", TyStr), []>;
1732 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1733 regclass:$dst4), (ins MEMri:$src),
1734 !strconcat("ldu.global.", TyStr), []>;
1735 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1736 regclass:$dst4), (ins MEMri64:$src),
1737 !strconcat("ldu.global.", TyStr), []>;
1738 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1739 regclass:$dst4), (ins imemAny:$src),
1740 !strconcat("ldu.global.", TyStr), []>;
1743 defm INT_PTX_LDU_G_v2i8_ELE
1744 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1745 defm INT_PTX_LDU_G_v2i16_ELE
1746 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1747 defm INT_PTX_LDU_G_v2i32_ELE
1748 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1749 defm INT_PTX_LDU_G_v2f16_ELE
1750 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1751 defm INT_PTX_LDU_G_v2f16x2_ELE
1752 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1753 defm INT_PTX_LDU_G_v2f32_ELE
1754 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1755 defm INT_PTX_LDU_G_v2i64_ELE
1756 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1757 defm INT_PTX_LDU_G_v2f64_ELE
1758 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1759 defm INT_PTX_LDU_G_v4i8_ELE
1760 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1761 defm INT_PTX_LDU_G_v4i16_ELE
1762 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1764 defm INT_PTX_LDU_G_v4i32_ELE
1765 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1767 defm INT_PTX_LDU_G_v4f16_ELE
1768 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1770 defm INT_PTX_LDU_G_v4f16x2_ELE
1771 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1773 defm INT_PTX_LDU_G_v4f32_ELE
1774 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1778 //-----------------------------------
1779 // Support for ldg on sm_35 or later
1780 //-----------------------------------
1782 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1783 // non-coherent texture cache, and therefore the values read must be read-only
1784 // during the lifetime of the kernel.
1786 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1787 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1788 !strconcat("ld.global.nc.", TyStr),
1789 []>, Requires<[hasLDG]>;
1790 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1791 !strconcat("ld.global.nc.", TyStr),
1792 []>, Requires<[hasLDG]>;
1793 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1794 !strconcat("ld.global.nc.", TyStr),
1795 []>, Requires<[hasLDG]>;
1796 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1797 !strconcat("ld.global.nc.", TyStr),
1798 []>, Requires<[hasLDG]>;
1799 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1800 !strconcat("ld.global.nc.", TyStr),
1801 []>, Requires<[hasLDG]>;
1804 defm INT_PTX_LDG_GLOBAL_i8
1805 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1806 defm INT_PTX_LDG_GLOBAL_i16
1807 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1808 defm INT_PTX_LDG_GLOBAL_i32
1809 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1810 defm INT_PTX_LDG_GLOBAL_i64
1811 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1812 defm INT_PTX_LDG_GLOBAL_f16
1813 : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1814 defm INT_PTX_LDG_GLOBAL_f16x2
1815 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1816 defm INT_PTX_LDG_GLOBAL_f32
1817 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1818 defm INT_PTX_LDG_GLOBAL_f64
1819 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1820 defm INT_PTX_LDG_GLOBAL_p32
1821 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1822 defm INT_PTX_LDG_GLOBAL_p64
1823 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1827 // Elementized vector ldg
1828 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1829 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1830 (ins Int32Regs:$src),
1831 !strconcat("ld.global.nc.", TyStr), []>;
1832 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1833 (ins Int64Regs:$src),
1834 !strconcat("ld.global.nc.", TyStr), []>;
1835 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1837 !strconcat("ld.global.nc.", TyStr), []>;
1838 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1840 !strconcat("ld.global.nc.", TyStr), []>;
1841 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1843 !strconcat("ld.global.nc.", TyStr), []>;
1846 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1847 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1848 regclass:$dst4), (ins Int32Regs:$src),
1849 !strconcat("ld.global.nc.", TyStr), []>;
1850 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1851 regclass:$dst4), (ins Int64Regs:$src),
1852 !strconcat("ld.global.nc.", TyStr), []>;
1853 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1854 regclass:$dst4), (ins MEMri:$src),
1855 !strconcat("ld.global.nc.", TyStr), []>;
1856 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1857 regclass:$dst4), (ins MEMri64:$src),
1858 !strconcat("ld.global.nc.", TyStr), []>;
1859 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1860 regclass:$dst4), (ins imemAny:$src),
1861 !strconcat("ld.global.nc.", TyStr), []>;
1864 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1865 defm INT_PTX_LDG_G_v2i8_ELE
1866 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1867 defm INT_PTX_LDG_G_v2i16_ELE
1868 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1869 defm INT_PTX_LDG_G_v2i32_ELE
1870 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1871 defm INT_PTX_LDG_G_v2f16_ELE
1872 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1873 defm INT_PTX_LDG_G_v2f16x2_ELE
1874 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1875 defm INT_PTX_LDG_G_v2f32_ELE
1876 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1877 defm INT_PTX_LDG_G_v2i64_ELE
1878 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1879 defm INT_PTX_LDG_G_v2f64_ELE
1880 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1881 defm INT_PTX_LDG_G_v4i8_ELE
1882 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1883 defm INT_PTX_LDG_G_v4i16_ELE
1884 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1885 defm INT_PTX_LDG_G_v4i32_ELE
1886 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1887 defm INT_PTX_LDG_G_v4f16_ELE
1888 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1889 defm INT_PTX_LDG_G_v4f16x2_ELE
1890 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1891 defm INT_PTX_LDG_G_v4f32_ELE
1892 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1895 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1896 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1897 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1898 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1899 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1900 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1901 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1902 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1903 "{{ .reg .b64 %tmp;\n\t"
1904 #" cvt.u64.u32 \t%tmp, $src;\n\t"
1905 #" cvta." # Str # ".u64 \t$result, %tmp; }}",
1906 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1907 Requires<[useShortPtr]>;
1910 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1911 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1912 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1913 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1914 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1915 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1916 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1917 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1918 "{{ .reg .b64 %tmp;\n\t"
1919 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1920 #" cvt.u32.u64 \t$result, %tmp; }}",
1921 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1922 Requires<[useShortPtr]>;
1925 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1926 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1927 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1928 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1930 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1931 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1932 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1933 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1936 // nvvm.ptr.gen.to.param
1937 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1938 (ins Int32Regs:$src),
1939 "mov.u32 \t$result, $src;",
1940 [(set Int32Regs:$result,
1941 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1942 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1943 (ins Int64Regs:$src),
1944 "mov.u64 \t$result, $src;",
1945 [(set Int64Regs:$result,
1946 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1949 // nvvm.move intrinsicc
1950 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1951 "mov.b16 \t$r, $s;",
1953 (int_nvvm_move_i16 Int16Regs:$s))]>;
1954 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1955 "mov.b32 \t$r, $s;",
1957 (int_nvvm_move_i32 Int32Regs:$s))]>;
1958 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1959 "mov.b64 \t$r, $s;",
1961 (int_nvvm_move_i64 Int64Regs:$s))]>;
1962 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1963 "mov.f32 \t$r, $s;",
1964 [(set Float32Regs:$r,
1965 (int_nvvm_move_float Float32Regs:$s))]>;
1966 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1967 "mov.f64 \t$r, $s;",
1968 [(set Float64Regs:$r,
1969 (int_nvvm_move_double Float64Regs:$s))]>;
1970 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1971 "mov.u32 \t$r, $s;",
1973 (int_nvvm_move_ptr Int32Regs:$s))]>;
1974 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1975 "mov.u64 \t$r, $s;",
1977 (int_nvvm_move_ptr Int64Regs:$s))]>;
1979 // @TODO: Are these actually needed, or will we always just see symbols
1980 // copied to registers first?
1981 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1982 "mov.u32 \t$r, $s;",
1984 (int_nvvm_move_ptr texternalsym:$s))]>;
1985 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1986 "mov.u64 \t$r, $s;",
1988 (int_nvvm_move_ptr texternalsym:$s))]>;*/
1991 // MoveParam %r1, param
1992 // ptr_local_to_gen %r2, %r1
1993 // ptr_gen_to_local %r3, %r2
1997 // @TODO: Revisit this. There is a type
1998 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1999 // instructions are not currently defined. However, we can use the ptr
2000 // variants and the asm printer will do the right thing.
2001 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2002 (MoveParam texternalsym:$src)))),
2003 (nvvm_move_ptr64 texternalsym:$src)>;
2004 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2005 (MoveParam texternalsym:$src)))),
2006 (nvvm_move_ptr32 texternalsym:$src)>;
2009 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2010 "mov.u64 \t$result, $src;", []>;
2012 //-----------------------------------
2013 // Compiler Error Warn
2014 // - Just ignore them in codegen
2015 //-----------------------------------
2017 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2018 "// llvm.nvvm.compiler.warn()",
2019 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2020 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2021 "// llvm.nvvm.compiler.warn()",
2022 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2023 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2024 "// llvm.nvvm.compiler.error()",
2025 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2026 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2027 "// llvm.nvvm.compiler.error()",
2028 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2033 def ISSPACEP_CONST_32
2034 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2035 "isspacep.const \t$d, $a;",
2036 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2037 Requires<[hasPTX31]>;
2038 def ISSPACEP_CONST_64
2039 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2040 "isspacep.const \t$d, $a;",
2041 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2042 Requires<[hasPTX31]>;
2043 def ISSPACEP_GLOBAL_32
2044 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2045 "isspacep.global \t$d, $a;",
2046 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2047 def ISSPACEP_GLOBAL_64
2048 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2049 "isspacep.global \t$d, $a;",
2050 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2051 def ISSPACEP_LOCAL_32
2052 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2053 "isspacep.local \t$d, $a;",
2054 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2055 def ISSPACEP_LOCAL_64
2056 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2057 "isspacep.local \t$d, $a;",
2058 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2059 def ISSPACEP_SHARED_32
2060 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2061 "isspacep.shared \t$d, $a;",
2062 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2063 def ISSPACEP_SHARED_64
2064 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2065 "isspacep.shared \t$d, $a;",
2066 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2069 // Special register reads
2070 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2071 (ins SpecialRegs:$r),
2072 "mov.b32 \t$d, $r;", []>;
2074 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2075 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2076 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2077 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2078 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2079 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2080 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2081 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2082 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2083 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2084 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2085 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2086 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2087 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2088 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2089 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2090 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2091 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2092 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2093 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2094 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2095 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2096 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2097 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2098 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2099 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2100 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2101 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2102 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2103 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2104 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2105 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2108 // rotate builtin support
2110 def ROTATE_B32_HW_IMM
2111 : NVPTXInst<(outs Int32Regs:$dst),
2112 (ins Int32Regs:$src, i32imm:$amt),
2113 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2114 [(set Int32Regs:$dst,
2115 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2116 Requires<[hasHWROT32]> ;
2118 def ROTATE_B32_HW_REG
2119 : NVPTXInst<(outs Int32Regs:$dst),
2120 (ins Int32Regs:$src, Int32Regs:$amt),
2121 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2122 [(set Int32Regs:$dst,
2123 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2124 Requires<[hasHWROT32]> ;
2126 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2127 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2128 Requires<[noHWROT32]> ;
2130 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2131 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2132 Requires<[noHWROT32]> ;
2134 let hasSideEffects = 0 in {
2135 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2136 !strconcat("{{\n\t",
2137 ".reg .b32 %dummy;\n\t",
2138 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2142 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2143 !strconcat("{{\n\t",
2144 ".reg .b32 %dummy;\n\t",
2145 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2150 let hasSideEffects = 0 in {
2152 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2153 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2156 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2157 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2158 (GET_LO_INT64 Int64Regs:$src))> ;
2160 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2162 let hasSideEffects = 0 in {
2163 def SHF_L_WRAP_B32_IMM
2164 : NVPTXInst<(outs Int32Regs:$dst),
2165 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2166 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2167 Requires<[hasHWROT32]>;
2169 def SHF_L_WRAP_B32_REG
2170 : NVPTXInst<(outs Int32Regs:$dst),
2171 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2172 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2173 Requires<[hasHWROT32]>;
2175 def SHF_R_WRAP_B32_IMM
2176 : NVPTXInst<(outs Int32Regs:$dst),
2177 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2178 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2179 Requires<[hasHWROT32]>;
2181 def SHF_R_WRAP_B32_REG
2182 : NVPTXInst<(outs Int32Regs:$dst),
2183 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2184 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2185 Requires<[hasHWROT32]>;
2188 // HW version of rotate 64
2189 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2191 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2192 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2193 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2194 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2195 Requires<[hasHWROT32]>;
2197 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2199 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2200 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2201 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2202 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2203 Requires<[hasHWROT32]>;
2206 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2208 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2209 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2210 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2211 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2212 Requires<[hasHWROT32]>;
2214 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2216 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2217 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2218 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2219 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2220 Requires<[hasHWROT32]>;
2222 // SW version of rotate 64
2223 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2224 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2225 Requires<[noHWROT32]>;
2226 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2227 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2228 Requires<[noHWROT32]>;
2229 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2230 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2231 Requires<[noHWROT32]>;
2232 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2233 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2234 Requires<[noHWROT32]>;
2237 //-----------------------------------
2238 // Texture Intrinsics
2239 //-----------------------------------
2241 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2242 // also defined in NVPTXReplaceImageHandles.cpp
2244 // texmode_independent
2245 let IsTex = 1, IsTexModeUnified = 0 in {
2246 // Texture fetch instructions using handles
2248 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2249 Float32Regs:$b, Float32Regs:$a),
2250 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2251 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2254 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2255 Float32Regs:$b, Float32Regs:$a),
2256 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2257 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2259 def TEX_1D_F32_F32_LEVEL
2260 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2261 Float32Regs:$b, Float32Regs:$a),
2262 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2263 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2264 "[$t, $s, \\{$x\\}], $lod;",
2266 def TEX_1D_F32_F32_GRAD
2267 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2268 Float32Regs:$b, Float32Regs:$a),
2269 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2270 Float32Regs:$gradx, Float32Regs:$grady),
2271 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2272 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2275 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2276 Int32Regs:$b, Int32Regs:$a),
2277 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2278 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2281 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2282 Int32Regs:$b, Int32Regs:$a),
2283 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2284 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2286 def TEX_1D_S32_F32_LEVEL
2287 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2288 Int32Regs:$b, Int32Regs:$a),
2289 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2291 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2292 "[$t, $s, \\{$x\\}], $lod;",
2294 def TEX_1D_S32_F32_GRAD
2295 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2296 Int32Regs:$b, Int32Regs:$a),
2297 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2298 Float32Regs:$gradx, Float32Regs:$grady),
2299 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2300 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2303 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2304 Int32Regs:$b, Int32Regs:$a),
2305 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2306 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2309 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2310 Int32Regs:$b, Int32Regs:$a),
2311 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2312 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2314 def TEX_1D_U32_F32_LEVEL
2315 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2316 Int32Regs:$b, Int32Regs:$a),
2317 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2319 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2320 "[$t, $s, \\{$x\\}], $lod;",
2322 def TEX_1D_U32_F32_GRAD
2323 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2324 Int32Regs:$b, Int32Regs:$a),
2325 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2326 Float32Regs:$gradx, Float32Regs:$grady),
2327 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2328 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2331 def TEX_1D_ARRAY_F32_S32
2332 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2333 Float32Regs:$b, Float32Regs:$a),
2334 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2335 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2336 "[$t, $s, \\{$l, $x\\}];",
2338 def TEX_1D_ARRAY_F32_F32
2339 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2340 Float32Regs:$b, Float32Regs:$a),
2341 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2342 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2343 "[$t, $s, \\{$l, $x\\}];",
2345 def TEX_1D_ARRAY_F32_F32_LEVEL
2346 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2347 Float32Regs:$b, Float32Regs:$a),
2348 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2350 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2351 "[$t, $s, \\{$l, $x\\}], $lod;",
2353 def TEX_1D_ARRAY_F32_F32_GRAD
2354 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2355 Float32Regs:$b, Float32Regs:$a),
2356 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2357 Float32Regs:$gradx, Float32Regs:$grady),
2358 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2359 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2361 def TEX_1D_ARRAY_S32_S32
2362 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363 Int32Regs:$b, Int32Regs:$a),
2364 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2365 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2366 "[$t, $s, \\{$l, $x\\}];",
2368 def TEX_1D_ARRAY_S32_F32
2369 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2370 Int32Regs:$b, Int32Regs:$a),
2371 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2372 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2373 "[$t, $s, \\{$l, $x\\}];",
2375 def TEX_1D_ARRAY_S32_F32_LEVEL
2376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2377 Int32Regs:$b, Int32Regs:$a),
2378 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2380 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2381 "[$t, $s, \\{$l, $x\\}], $lod;",
2383 def TEX_1D_ARRAY_S32_F32_GRAD
2384 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2385 Int32Regs:$b, Int32Regs:$a),
2386 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2387 Float32Regs:$gradx, Float32Regs:$grady),
2388 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2389 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2391 def TEX_1D_ARRAY_U32_S32
2392 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2393 Int32Regs:$b, Int32Regs:$a),
2394 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2395 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2396 "[$t, $s, \\{$l, $x\\}];",
2398 def TEX_1D_ARRAY_U32_F32
2399 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2400 Int32Regs:$b, Int32Regs:$a),
2401 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2402 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2403 "[$t, $s, \\{$l, $x\\}];",
2405 def TEX_1D_ARRAY_U32_F32_LEVEL
2406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2407 Int32Regs:$b, Int32Regs:$a),
2408 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2410 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2411 "[$t, $s, \\{$l, $x\\}], $lod;",
2413 def TEX_1D_ARRAY_U32_F32_GRAD
2414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2415 Int32Regs:$b, Int32Regs:$a),
2416 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2417 Float32Regs:$gradx, Float32Regs:$grady),
2418 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2419 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2423 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2424 Float32Regs:$b, Float32Regs:$a),
2425 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2426 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2427 "[$t, $s, \\{$x, $y\\}];",
2430 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2431 Float32Regs:$b, Float32Regs:$a),
2432 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2433 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2434 "[$t, $s, \\{$x, $y\\}];",
2436 def TEX_2D_F32_F32_LEVEL
2437 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2438 Float32Regs:$b, Float32Regs:$a),
2439 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2441 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2442 "[$t, $s, \\{$x, $y\\}], $lod;",
2444 def TEX_2D_F32_F32_GRAD
2445 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2446 Float32Regs:$b, Float32Regs:$a),
2447 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2448 Float32Regs:$gradx0, Float32Regs:$gradx1,
2449 Float32Regs:$grady0, Float32Regs:$grady1),
2450 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2451 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2452 "\\{$grady0, $grady1\\};",
2455 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2456 Int32Regs:$b, Int32Regs:$a),
2457 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2458 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2459 "[$t, $s, \\{$x, $y\\}];",
2462 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2463 Int32Regs:$b, Int32Regs:$a),
2464 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2465 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2466 "[$t, $s, \\{$x, $y\\}];",
2468 def TEX_2D_S32_F32_LEVEL
2469 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2470 Int32Regs:$b, Int32Regs:$a),
2471 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2473 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2474 "[$t, $s, \\{$x, $y\\}], $lod;",
2476 def TEX_2D_S32_F32_GRAD
2477 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2478 Int32Regs:$b, Int32Regs:$a),
2479 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2480 Float32Regs:$gradx0, Float32Regs:$gradx1,
2481 Float32Regs:$grady0, Float32Regs:$grady1),
2482 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2483 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2484 "\\{$grady0, $grady1\\};",
2487 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2488 Int32Regs:$b, Int32Regs:$a),
2489 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2490 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2491 "[$t, $s, \\{$x, $y\\}];",
2494 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2495 Int32Regs:$b, Int32Regs:$a),
2496 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2497 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2498 "[$t, $s, \\{$x, $y\\}];",
2500 def TEX_2D_U32_F32_LEVEL
2501 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2502 Int32Regs:$b, Int32Regs:$a),
2503 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2505 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2506 "[$t, $s, \\{$x, $y\\}], $lod;",
2508 def TEX_2D_U32_F32_GRAD
2509 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2510 Int32Regs:$b, Int32Regs:$a),
2511 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2512 Float32Regs:$gradx0, Float32Regs:$gradx1,
2513 Float32Regs:$grady0, Float32Regs:$grady1),
2514 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2515 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2516 "\\{$grady0, $grady1\\};",
2519 def TEX_2D_ARRAY_F32_S32
2520 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2521 Float32Regs:$b, Float32Regs:$a),
2522 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2524 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2525 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2527 def TEX_2D_ARRAY_F32_F32
2528 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2529 Float32Regs:$b, Float32Regs:$a),
2530 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2532 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2533 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2535 def TEX_2D_ARRAY_F32_F32_LEVEL
2536 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2537 Float32Regs:$b, Float32Regs:$a),
2538 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2539 Float32Regs:$y, Float32Regs:$lod),
2540 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2541 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2543 def TEX_2D_ARRAY_F32_F32_GRAD
2544 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2545 Float32Regs:$b, Float32Regs:$a),
2546 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2547 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2548 Float32Regs:$grady0, Float32Regs:$grady1),
2549 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2550 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2551 "\\{$grady0, $grady1\\};",
2553 def TEX_2D_ARRAY_S32_S32
2554 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2555 Int32Regs:$b, Int32Regs:$a),
2556 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2558 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2559 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2561 def TEX_2D_ARRAY_S32_F32
2562 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2563 Int32Regs:$b, Int32Regs:$a),
2564 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2566 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2567 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2569 def TEX_2D_ARRAY_S32_F32_LEVEL
2570 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2571 Int32Regs:$b, Int32Regs:$a),
2572 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2573 Float32Regs:$y, Float32Regs:$lod),
2574 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2575 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2577 def TEX_2D_ARRAY_S32_F32_GRAD
2578 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2579 Int32Regs:$b, Int32Regs:$a),
2580 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2582 Float32Regs:$gradx0, Float32Regs:$gradx1,
2583 Float32Regs:$grady0, Float32Regs:$grady1),
2584 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2585 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2586 "\\{$grady0, $grady1\\};",
2588 def TEX_2D_ARRAY_U32_S32
2589 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2590 Int32Regs:$b, Int32Regs:$a),
2591 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2593 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2594 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2596 def TEX_2D_ARRAY_U32_F32
2597 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2598 Int32Regs:$b, Int32Regs:$a),
2599 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2601 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2602 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2604 def TEX_2D_ARRAY_U32_F32_LEVEL
2605 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606 Int32Regs:$b, Int32Regs:$a),
2607 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2608 Float32Regs:$y, Float32Regs:$lod),
2609 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2610 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2612 def TEX_2D_ARRAY_U32_F32_GRAD
2613 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614 Int32Regs:$b, Int32Regs:$a),
2615 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2617 Float32Regs:$gradx0, Float32Regs:$gradx1,
2618 Float32Regs:$grady0, Float32Regs:$grady1),
2619 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2620 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2621 "\\{$grady0, $grady1\\};",
2625 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2626 Float32Regs:$b, Float32Regs:$a),
2627 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2629 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2630 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2633 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2634 Float32Regs:$b, Float32Regs:$a),
2635 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2637 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2638 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2640 def TEX_3D_F32_F32_LEVEL
2641 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2642 Float32Regs:$b, Float32Regs:$a),
2643 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2644 Float32Regs:$z, Float32Regs:$lod),
2645 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2646 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2648 def TEX_3D_F32_F32_GRAD
2649 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2650 Float32Regs:$b, Float32Regs:$a),
2651 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2653 Float32Regs:$gradx0, Float32Regs:$gradx1,
2654 Float32Regs:$gradx2, Float32Regs:$grady0,
2655 Float32Regs:$grady1, Float32Regs:$grady2),
2656 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2657 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2658 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2659 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2662 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2663 Int32Regs:$b, Int32Regs:$a),
2664 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2666 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2667 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2670 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2671 Int32Regs:$b, Int32Regs:$a),
2672 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2674 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2675 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2677 def TEX_3D_S32_F32_LEVEL
2678 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679 Int32Regs:$b, Int32Regs:$a),
2680 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2681 Float32Regs:$z, Float32Regs:$lod),
2682 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2683 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2685 def TEX_3D_S32_F32_GRAD
2686 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2687 Int32Regs:$b, Int32Regs:$a),
2688 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2690 Float32Regs:$gradx0, Float32Regs:$gradx1,
2691 Float32Regs:$gradx2, Float32Regs:$grady0,
2692 Float32Regs:$grady1, Float32Regs:$grady2),
2693 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2694 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2695 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2696 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2699 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2700 Int32Regs:$b, Int32Regs:$a),
2701 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2703 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2704 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2707 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2708 Int32Regs:$b, Int32Regs:$a),
2709 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2711 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2712 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2714 def TEX_3D_U32_F32_LEVEL
2715 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716 Int32Regs:$b, Int32Regs:$a),
2717 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2718 Float32Regs:$z, Float32Regs:$lod),
2719 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2720 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2722 def TEX_3D_U32_F32_GRAD
2723 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2724 Int32Regs:$b, Int32Regs:$a),
2725 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2727 Float32Regs:$gradx0, Float32Regs:$gradx1,
2728 Float32Regs:$gradx2, Float32Regs:$grady0,
2729 Float32Regs:$grady1, Float32Regs:$grady2),
2730 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2731 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2732 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2733 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2736 def TEX_CUBE_F32_F32
2737 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2738 Float32Regs:$b, Float32Regs:$a),
2739 (ins Int64Regs:$t, Int64Regs:$s,
2740 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2741 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2742 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2744 def TEX_CUBE_F32_F32_LEVEL
2745 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2746 Float32Regs:$b, Float32Regs:$a),
2747 (ins Int64Regs:$t, Int64Regs:$s,
2748 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2750 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2751 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2753 def TEX_CUBE_S32_F32
2754 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2755 Int32Regs:$b, Int32Regs:$a),
2756 (ins Int64Regs:$t, Int64Regs:$s,
2757 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2758 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2759 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2761 def TEX_CUBE_S32_F32_LEVEL
2762 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2763 Int32Regs:$b, Int32Regs:$a),
2764 (ins Int64Regs:$t, Int64Regs:$s,
2765 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2767 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2768 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2770 def TEX_CUBE_U32_F32
2771 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2772 Int32Regs:$b, Int32Regs:$a),
2773 (ins Int64Regs:$t, Int64Regs:$s,
2774 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2775 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2776 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2778 def TEX_CUBE_U32_F32_LEVEL
2779 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2780 Int32Regs:$b, Int32Regs:$a),
2781 (ins Int64Regs:$t, Int64Regs:$s,
2782 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2784 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2785 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2788 def TEX_CUBE_ARRAY_F32_F32
2789 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2790 Float32Regs:$b, Float32Regs:$a),
2791 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2792 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2793 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2794 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2796 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2797 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2798 Float32Regs:$b, Float32Regs:$a),
2799 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2800 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2802 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2803 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2805 def TEX_CUBE_ARRAY_S32_F32
2806 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2807 Int32Regs:$b, Int32Regs:$a),
2808 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2809 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2810 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2811 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2813 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2814 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2815 Int32Regs:$b, Int32Regs:$a),
2816 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2817 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2819 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2820 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2822 def TEX_CUBE_ARRAY_U32_F32
2823 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2824 Int32Regs:$b, Int32Regs:$a),
2825 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2826 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2827 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2828 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2830 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2831 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2832 Int32Regs:$b, Int32Regs:$a),
2833 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2834 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2836 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2837 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2840 def TLD4_R_2D_F32_F32
2841 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2842 Float32Regs:$v2, Float32Regs:$v3),
2843 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2844 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2845 "[$t, $s, \\{$x, $y\\}];",
2847 def TLD4_G_2D_F32_F32
2848 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2849 Float32Regs:$v2, Float32Regs:$v3),
2850 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2851 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2852 "[$t, $s, \\{$x, $y\\}];",
2854 def TLD4_B_2D_F32_F32
2855 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2856 Float32Regs:$v2, Float32Regs:$v3),
2857 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2858 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2859 "[$t, $s, \\{$x, $y\\}];",
2861 def TLD4_A_2D_F32_F32
2862 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2863 Float32Regs:$v2, Float32Regs:$v3),
2864 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2865 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2866 "[$t, $s, \\{$x, $y\\}];",
2868 def TLD4_R_2D_S32_F32
2869 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2870 Int32Regs:$v2, Int32Regs:$v3),
2871 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2872 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2873 "[$t, $s, \\{$x, $y\\}];",
2875 def TLD4_G_2D_S32_F32
2876 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2877 Int32Regs:$v2, Int32Regs:$v3),
2878 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2879 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2880 "[$t, $s, \\{$x, $y\\}];",
2882 def TLD4_B_2D_S32_F32
2883 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2884 Int32Regs:$v2, Int32Regs:$v3),
2885 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2886 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2887 "[$t, $s, \\{$x, $y\\}];",
2889 def TLD4_A_2D_S32_F32
2890 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2891 Int32Regs:$v2, Int32Regs:$v3),
2892 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2893 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2894 "[$t, $s, \\{$x, $y\\}];",
2896 def TLD4_R_2D_U32_F32
2897 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2898 Int32Regs:$v2, Int32Regs:$v3),
2899 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2900 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2901 "[$t, $s, \\{$x, $y\\}];",
2903 def TLD4_G_2D_U32_F32
2904 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2905 Int32Regs:$v2, Int32Regs:$v3),
2906 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2907 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2908 "[$t, $s, \\{$x, $y\\}];",
2910 def TLD4_B_2D_U32_F32
2911 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2912 Int32Regs:$v2, Int32Regs:$v3),
2913 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2914 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2915 "[$t, $s, \\{$x, $y\\}];",
2917 def TLD4_A_2D_U32_F32
2918 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2919 Int32Regs:$v2, Int32Regs:$v3),
2920 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2921 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2922 "[$t, $s, \\{$x, $y\\}];",
2928 let IsTex = 1, IsTexModeUnified = 1 in {
2929 // Texture fetch instructions using handles
2930 def TEX_UNIFIED_1D_F32_S32
2931 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2932 Float32Regs:$b, Float32Regs:$a),
2933 (ins Int64Regs:$t, Int32Regs:$x),
2934 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2936 def TEX_UNIFIED_1D_F32_F32
2937 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2938 Float32Regs:$b, Float32Regs:$a),
2939 (ins Int64Regs:$t, Float32Regs:$x),
2940 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2942 def TEX_UNIFIED_1D_F32_F32_LEVEL
2943 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2944 Float32Regs:$b, Float32Regs:$a),
2945 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2946 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2947 "[$t, \\{$x\\}], $lod;",
2949 def TEX_UNIFIED_1D_F32_F32_GRAD
2950 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2951 Float32Regs:$b, Float32Regs:$a),
2952 (ins Int64Regs:$t, Float32Regs:$x,
2953 Float32Regs:$gradx, Float32Regs:$grady),
2954 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2955 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2957 def TEX_UNIFIED_1D_S32_S32
2958 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2959 Int32Regs:$b, Int32Regs:$a),
2960 (ins Int64Regs:$t, Int32Regs:$x),
2961 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2963 def TEX_UNIFIED_1D_S32_F32
2964 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2965 Int32Regs:$b, Int32Regs:$a),
2966 (ins Int64Regs:$t, Float32Regs:$x),
2967 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2969 def TEX_UNIFIED_1D_S32_F32_LEVEL
2970 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2971 Int32Regs:$b, Int32Regs:$a),
2972 (ins Int64Regs:$t, Float32Regs:$x,
2974 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2975 "[$t, \\{$x\\}], $lod;",
2977 def TEX_UNIFIED_1D_S32_F32_GRAD
2978 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2979 Int32Regs:$b, Int32Regs:$a),
2980 (ins Int64Regs:$t, Float32Regs:$x,
2981 Float32Regs:$gradx, Float32Regs:$grady),
2982 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2983 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2985 def TEX_UNIFIED_1D_U32_S32
2986 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2987 Int32Regs:$b, Int32Regs:$a),
2988 (ins Int64Regs:$t, Int32Regs:$x),
2989 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2991 def TEX_UNIFIED_1D_U32_F32
2992 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2993 Int32Regs:$b, Int32Regs:$a),
2994 (ins Int64Regs:$t, Float32Regs:$x),
2995 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2997 def TEX_UNIFIED_1D_U32_F32_LEVEL
2998 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2999 Int32Regs:$b, Int32Regs:$a),
3000 (ins Int64Regs:$t, Float32Regs:$x,
3002 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3003 "[$t, \\{$x\\}], $lod;",
3005 def TEX_UNIFIED_1D_U32_F32_GRAD
3006 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3007 Int32Regs:$b, Int32Regs:$a),
3008 (ins Int64Regs:$t, Float32Regs:$x,
3009 Float32Regs:$gradx, Float32Regs:$grady),
3010 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3011 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3014 def TEX_UNIFIED_1D_ARRAY_F32_S32
3015 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3016 Float32Regs:$b, Float32Regs:$a),
3017 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3018 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3019 "[$t, \\{$l, $x\\}];",
3021 def TEX_UNIFIED_1D_ARRAY_F32_F32
3022 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3023 Float32Regs:$b, Float32Regs:$a),
3024 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3025 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3026 "[$t, \\{$l, $x\\}];",
3028 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3029 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3030 Float32Regs:$b, Float32Regs:$a),
3031 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3033 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3034 "[$t, \\{$l, $x\\}], $lod;",
3036 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3037 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3038 Float32Regs:$b, Float32Regs:$a),
3039 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3040 Float32Regs:$gradx, Float32Regs:$grady),
3041 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3042 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3044 def TEX_UNIFIED_1D_ARRAY_S32_S32
3045 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046 Int32Regs:$b, Int32Regs:$a),
3047 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3048 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3049 "[$t, \\{$l, $x\\}];",
3051 def TEX_UNIFIED_1D_ARRAY_S32_F32
3052 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3053 Int32Regs:$b, Int32Regs:$a),
3054 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3055 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3056 "[$t, \\{$l, $x\\}];",
3058 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3059 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3060 Int32Regs:$b, Int32Regs:$a),
3061 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3063 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3064 "[$t, \\{$l, $x\\}], $lod;",
3066 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3067 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3068 Int32Regs:$b, Int32Regs:$a),
3069 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3070 Float32Regs:$gradx, Float32Regs:$grady),
3071 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3072 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3074 def TEX_UNIFIED_1D_ARRAY_U32_S32
3075 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3076 Int32Regs:$b, Int32Regs:$a),
3077 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3078 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3079 "[$t, \\{$l, $x\\}];",
3081 def TEX_UNIFIED_1D_ARRAY_U32_F32
3082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3083 Int32Regs:$b, Int32Regs:$a),
3084 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3085 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3086 "[$t, \\{$l, $x\\}];",
3088 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3089 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3090 Int32Regs:$b, Int32Regs:$a),
3091 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3093 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3094 "[$t, \\{$l, $x\\}], $lod;",
3096 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3097 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3098 Int32Regs:$b, Int32Regs:$a),
3099 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3100 Float32Regs:$gradx, Float32Regs:$grady),
3101 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3102 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3105 def TEX_UNIFIED_2D_F32_S32
3106 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3107 Float32Regs:$b, Float32Regs:$a),
3108 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3109 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3110 "[$t, \\{$x, $y\\}];",
3112 def TEX_UNIFIED_2D_F32_F32
3113 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3114 Float32Regs:$b, Float32Regs:$a),
3115 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3116 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3117 "[$t, \\{$x, $y\\}];",
3119 def TEX_UNIFIED_2D_F32_F32_LEVEL
3120 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3121 Float32Regs:$b, Float32Regs:$a),
3122 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3124 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3125 "[$t, \\{$x, $y\\}], $lod;",
3127 def TEX_UNIFIED_2D_F32_F32_GRAD
3128 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3129 Float32Regs:$b, Float32Regs:$a),
3130 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3131 Float32Regs:$gradx0, Float32Regs:$gradx1,
3132 Float32Regs:$grady0, Float32Regs:$grady1),
3133 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3134 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3135 "\\{$grady0, $grady1\\};",
3137 def TEX_UNIFIED_2D_S32_S32
3138 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3139 Int32Regs:$b, Int32Regs:$a),
3140 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3141 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3142 "[$t, \\{$x, $y\\}];",
3144 def TEX_UNIFIED_2D_S32_F32
3145 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3146 Int32Regs:$b, Int32Regs:$a),
3147 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3148 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3149 "[$t, \\{$x, $y\\}];",
3151 def TEX_UNIFIED_2D_S32_F32_LEVEL
3152 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3153 Int32Regs:$b, Int32Regs:$a),
3154 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3156 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3157 "[$t, \\{$x, $y\\}], $lod;",
3159 def TEX_UNIFIED_2D_S32_F32_GRAD
3160 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3161 Int32Regs:$b, Int32Regs:$a),
3162 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3163 Float32Regs:$gradx0, Float32Regs:$gradx1,
3164 Float32Regs:$grady0, Float32Regs:$grady1),
3165 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3166 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3167 "\\{$grady0, $grady1\\};",
3169 def TEX_UNIFIED_2D_U32_S32
3170 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3171 Int32Regs:$b, Int32Regs:$a),
3172 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3173 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3174 "[$t, \\{$x, $y\\}];",
3176 def TEX_UNIFIED_2D_U32_F32
3177 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3178 Int32Regs:$b, Int32Regs:$a),
3179 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3180 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3181 "[$t, \\{$x, $y\\}];",
3183 def TEX_UNIFIED_2D_U32_F32_LEVEL
3184 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3185 Int32Regs:$b, Int32Regs:$a),
3186 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3188 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3189 "[$t, \\{$x, $y\\}], $lod;",
3191 def TEX_UNIFIED_2D_U32_F32_GRAD
3192 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3193 Int32Regs:$b, Int32Regs:$a),
3194 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3195 Float32Regs:$gradx0, Float32Regs:$gradx1,
3196 Float32Regs:$grady0, Float32Regs:$grady1),
3197 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3198 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3199 "\\{$grady0, $grady1\\};",
3202 def TEX_UNIFIED_2D_ARRAY_F32_S32
3203 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3204 Float32Regs:$b, Float32Regs:$a),
3205 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3207 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3208 "[$t, \\{$l, $x, $y, $y\\}];",
3210 def TEX_UNIFIED_2D_ARRAY_F32_F32
3211 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3212 Float32Regs:$b, Float32Regs:$a),
3213 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3215 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3216 "[$t, \\{$l, $x, $y, $y\\}];",
3218 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3219 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3220 Float32Regs:$b, Float32Regs:$a),
3221 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3222 Float32Regs:$y, Float32Regs:$lod),
3223 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3224 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3226 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3227 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3228 Float32Regs:$b, Float32Regs:$a),
3229 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3230 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3231 Float32Regs:$grady0, Float32Regs:$grady1),
3232 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3233 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3234 "\\{$grady0, $grady1\\};",
3236 def TEX_UNIFIED_2D_ARRAY_S32_S32
3237 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3238 Int32Regs:$b, Int32Regs:$a),
3239 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3241 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3242 "[$t, \\{$l, $x, $y, $y\\}];",
3244 def TEX_UNIFIED_2D_ARRAY_S32_F32
3245 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3246 Int32Regs:$b, Int32Regs:$a),
3247 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3249 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3250 "[$t, \\{$l, $x, $y, $y\\}];",
3252 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3253 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3254 Int32Regs:$b, Int32Regs:$a),
3255 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3256 Float32Regs:$y, Float32Regs:$lod),
3257 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3258 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3260 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3261 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3262 Int32Regs:$b, Int32Regs:$a),
3263 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3265 Float32Regs:$gradx0, Float32Regs:$gradx1,
3266 Float32Regs:$grady0, Float32Regs:$grady1),
3267 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3268 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3269 "\\{$grady0, $grady1\\};",
3271 def TEX_UNIFIED_2D_ARRAY_U32_S32
3272 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3273 Int32Regs:$b, Int32Regs:$a),
3274 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3276 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3277 "[$t, \\{$l, $x, $y, $y\\}];",
3279 def TEX_UNIFIED_2D_ARRAY_U32_F32
3280 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3281 Int32Regs:$b, Int32Regs:$a),
3282 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3284 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3285 "[$t, \\{$l, $x, $y, $y\\}];",
3287 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3288 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289 Int32Regs:$b, Int32Regs:$a),
3290 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3291 Float32Regs:$y, Float32Regs:$lod),
3292 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3293 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3295 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3296 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297 Int32Regs:$b, Int32Regs:$a),
3298 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3300 Float32Regs:$gradx0, Float32Regs:$gradx1,
3301 Float32Regs:$grady0, Float32Regs:$grady1),
3302 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3303 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3304 "\\{$grady0, $grady1\\};",
3307 def TEX_UNIFIED_3D_F32_S32
3308 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3309 Float32Regs:$b, Float32Regs:$a),
3310 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3312 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3313 "[$t, \\{$x, $y, $z, $z\\}];",
3315 def TEX_UNIFIED_3D_F32_F32
3316 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3317 Float32Regs:$b, Float32Regs:$a),
3318 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3320 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3321 "[$t, \\{$x, $y, $z, $z\\}];",
3323 def TEX_UNIFIED_3D_F32_F32_LEVEL
3324 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3325 Float32Regs:$b, Float32Regs:$a),
3326 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3327 Float32Regs:$z, Float32Regs:$lod),
3328 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3329 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3331 def TEX_UNIFIED_3D_F32_F32_GRAD
3332 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3333 Float32Regs:$b, Float32Regs:$a),
3334 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3336 Float32Regs:$gradx0, Float32Regs:$gradx1,
3337 Float32Regs:$gradx2, Float32Regs:$grady0,
3338 Float32Regs:$grady1, Float32Regs:$grady2),
3339 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3340 "[$t, \\{$x, $y, $z, $z\\}], "
3341 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3342 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3344 def TEX_UNIFIED_3D_S32_S32
3345 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3346 Int32Regs:$b, Int32Regs:$a),
3347 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3349 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3350 "[$t, \\{$x, $y, $z, $z\\}];",
3352 def TEX_UNIFIED_3D_S32_F32
3353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3354 Int32Regs:$b, Int32Regs:$a),
3355 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3357 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3358 "[$t, \\{$x, $y, $z, $z\\}];",
3360 def TEX_UNIFIED_3D_S32_F32_LEVEL
3361 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3362 Int32Regs:$b, Int32Regs:$a),
3363 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3364 Float32Regs:$z, Float32Regs:$lod),
3365 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3366 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3368 def TEX_UNIFIED_3D_S32_F32_GRAD
3369 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3370 Int32Regs:$b, Int32Regs:$a),
3371 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3373 Float32Regs:$gradx0, Float32Regs:$gradx1,
3374 Float32Regs:$gradx2, Float32Regs:$grady0,
3375 Float32Regs:$grady1, Float32Regs:$grady2),
3376 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3377 "[$t, \\{$x, $y, $z, $z\\}], "
3378 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3379 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3381 def TEX_UNIFIED_3D_U32_S32
3382 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3383 Int32Regs:$b, Int32Regs:$a),
3384 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3386 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3387 "[$t, \\{$x, $y, $z, $z\\}];",
3389 def TEX_UNIFIED_3D_U32_F32
3390 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3391 Int32Regs:$b, Int32Regs:$a),
3392 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3394 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3395 "[$t, \\{$x, $y, $z, $z\\}];",
3397 def TEX_UNIFIED_3D_U32_F32_LEVEL
3398 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399 Int32Regs:$b, Int32Regs:$a),
3400 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3401 Float32Regs:$z, Float32Regs:$lod),
3402 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3403 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3405 def TEX_UNIFIED_3D_U32_F32_GRAD
3406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3407 Int32Regs:$b, Int32Regs:$a),
3408 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3410 Float32Regs:$gradx0, Float32Regs:$gradx1,
3411 Float32Regs:$gradx2, Float32Regs:$grady0,
3412 Float32Regs:$grady1, Float32Regs:$grady2),
3413 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3414 "[$t, \\{$x, $y, $z, $z\\}], "
3415 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3416 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3419 def TEX_UNIFIED_CUBE_F32_F32
3420 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3421 Float32Regs:$b, Float32Regs:$a),
3423 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3424 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3425 "[$t, \\{$x, $y, $z, $z\\}];",
3427 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3428 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3429 Float32Regs:$b, Float32Regs:$a),
3431 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3433 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3434 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3436 def TEX_UNIFIED_CUBE_S32_F32
3437 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3438 Int32Regs:$b, Int32Regs:$a),
3440 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3441 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3442 "[$t, \\{$x, $y, $z, $z\\}];",
3444 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3445 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3446 Int32Regs:$b, Int32Regs:$a),
3448 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3450 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3451 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3453 def TEX_UNIFIED_CUBE_U32_F32
3454 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3455 Int32Regs:$b, Int32Regs:$a),
3457 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3458 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3459 "[$t, \\{$x, $y, $z, $z\\}];",
3461 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3462 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3463 Int32Regs:$b, Int32Regs:$a),
3465 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3467 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3468 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3471 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3472 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3473 Float32Regs:$b, Float32Regs:$a),
3474 (ins Int64Regs:$t, Int32Regs:$l,
3475 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3476 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3477 "[$t, \\{$l, $x, $y, $z\\}];",
3479 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3480 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3481 Float32Regs:$b, Float32Regs:$a),
3482 (ins Int64Regs:$t, Int32Regs:$l,
3483 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3485 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3486 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3488 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3489 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3490 Int32Regs:$b, Int32Regs:$a),
3491 (ins Int64Regs:$t, Int32Regs:$l,
3492 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3493 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3494 "[$t, \\{$l, $x, $y, $z\\}];",
3496 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3497 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3498 Int32Regs:$b, Int32Regs:$a),
3499 (ins Int64Regs:$t, Int32Regs:$l,
3500 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3502 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3503 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3505 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3506 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3507 Int32Regs:$b, Int32Regs:$a),
3508 (ins Int64Regs:$t, Int32Regs:$l,
3509 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3510 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3511 "[$t, \\{$l, $x, $y, $z\\}];",
3513 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3514 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3515 Int32Regs:$b, Int32Regs:$a),
3516 (ins Int64Regs:$t, Int32Regs:$l,
3517 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3519 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3520 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3523 def TLD4_UNIFIED_R_2D_F32_F32
3524 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3525 Float32Regs:$v2, Float32Regs:$v3),
3526 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3527 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3528 "[$t, \\{$x, $y\\}];",
3530 def TLD4_UNIFIED_G_2D_F32_F32
3531 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3532 Float32Regs:$v2, Float32Regs:$v3),
3533 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3534 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3535 "[$t, \\{$x, $y\\}];",
3537 def TLD4_UNIFIED_B_2D_F32_F32
3538 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3539 Float32Regs:$v2, Float32Regs:$v3),
3540 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3541 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3542 "[$t, \\{$x, $y\\}];",
3544 def TLD4_UNIFIED_A_2D_F32_F32
3545 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3546 Float32Regs:$v2, Float32Regs:$v3),
3547 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3548 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3549 "[$t, \\{$x, $y\\}];",
3551 def TLD4_UNIFIED_R_2D_S32_F32
3552 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3553 Int32Regs:$v2, Int32Regs:$v3),
3554 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3555 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3556 "[$t, \\{$x, $y\\}];",
3558 def TLD4_UNIFIED_G_2D_S32_F32
3559 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3560 Int32Regs:$v2, Int32Regs:$v3),
3561 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3562 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3563 "[$t, \\{$x, $y\\}];",
3565 def TLD4_UNIFIED_B_2D_S32_F32
3566 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3567 Int32Regs:$v2, Int32Regs:$v3),
3568 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3569 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3570 "[$t, \\{$x, $y\\}];",
3572 def TLD4_UNIFIED_A_2D_S32_F32
3573 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3574 Int32Regs:$v2, Int32Regs:$v3),
3575 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3576 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3577 "[$t, \\{$x, $y\\}];",
3579 def TLD4_UNIFIED_R_2D_U32_F32
3580 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3581 Int32Regs:$v2, Int32Regs:$v3),
3582 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3583 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3584 "[$t, \\{$x, $y\\}];",
3586 def TLD4_UNIFIED_G_2D_U32_F32
3587 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3588 Int32Regs:$v2, Int32Regs:$v3),
3589 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3590 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3591 "[$t, \\{$x, $y\\}];",
3593 def TLD4_UNIFIED_B_2D_U32_F32
3594 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3595 Int32Regs:$v2, Int32Regs:$v3),
3596 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3597 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3598 "[$t, \\{$x, $y\\}];",
3600 def TLD4_UNIFIED_A_2D_U32_F32
3601 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3602 Int32Regs:$v2, Int32Regs:$v3),
3603 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3604 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3605 "[$t, \\{$x, $y\\}];",
3611 //=== Surface load instructions
3614 def SULD_1D_I8_CLAMP
3615 : NVPTXInst<(outs Int16Regs:$r),
3616 (ins Int64Regs:$s, Int32Regs:$x),
3617 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3619 def SULD_1D_I16_CLAMP
3620 : NVPTXInst<(outs Int16Regs:$r),
3621 (ins Int64Regs:$s, Int32Regs:$x),
3622 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3624 def SULD_1D_I32_CLAMP
3625 : NVPTXInst<(outs Int32Regs:$r),
3626 (ins Int64Regs:$s, Int32Regs:$x),
3627 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3629 def SULD_1D_I64_CLAMP
3630 : NVPTXInst<(outs Int64Regs:$r),
3631 (ins Int64Regs:$s, Int32Regs:$x),
3632 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3635 def SULD_1D_ARRAY_I8_CLAMP
3636 : NVPTXInst<(outs Int16Regs:$r),
3637 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3638 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3640 def SULD_1D_ARRAY_I16_CLAMP
3641 : NVPTXInst<(outs Int16Regs:$r),
3642 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3643 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3645 def SULD_1D_ARRAY_I32_CLAMP
3646 : NVPTXInst<(outs Int32Regs:$r),
3647 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3648 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3650 def SULD_1D_ARRAY_I64_CLAMP
3651 : NVPTXInst<(outs Int64Regs:$r),
3652 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3653 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3656 def SULD_2D_I8_CLAMP
3657 : NVPTXInst<(outs Int16Regs:$r),
3658 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3659 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3661 def SULD_2D_I16_CLAMP
3662 : NVPTXInst<(outs Int16Regs:$r),
3663 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3664 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3666 def SULD_2D_I32_CLAMP
3667 : NVPTXInst<(outs Int32Regs:$r),
3668 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3669 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3671 def SULD_2D_I64_CLAMP
3672 : NVPTXInst<(outs Int64Regs:$r),
3673 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3674 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3677 def SULD_2D_ARRAY_I8_CLAMP
3678 : NVPTXInst<(outs Int16Regs:$r),
3679 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3680 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3682 def SULD_2D_ARRAY_I16_CLAMP
3683 : NVPTXInst<(outs Int16Regs:$r),
3684 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3685 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3687 def SULD_2D_ARRAY_I32_CLAMP
3688 : NVPTXInst<(outs Int32Regs:$r),
3689 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3690 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3692 def SULD_2D_ARRAY_I64_CLAMP
3693 : NVPTXInst<(outs Int64Regs:$r),
3694 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3695 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3698 def SULD_3D_I8_CLAMP
3699 : NVPTXInst<(outs Int16Regs:$r),
3700 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3701 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3703 def SULD_3D_I16_CLAMP
3704 : NVPTXInst<(outs Int16Regs:$r),
3705 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3706 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3708 def SULD_3D_I32_CLAMP
3709 : NVPTXInst<(outs Int32Regs:$r),
3710 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3711 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3713 def SULD_3D_I64_CLAMP
3714 : NVPTXInst<(outs Int64Regs:$r),
3715 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3716 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3721 def SULD_1D_V2I8_CLAMP
3722 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3723 (ins Int64Regs:$s, Int32Regs:$x),
3724 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3726 def SULD_1D_V2I16_CLAMP
3727 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3728 (ins Int64Regs:$s, Int32Regs:$x),
3729 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3731 def SULD_1D_V2I32_CLAMP
3732 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3733 (ins Int64Regs:$s, Int32Regs:$x),
3734 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3736 def SULD_1D_V2I64_CLAMP
3737 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3738 (ins Int64Regs:$s, Int32Regs:$x),
3739 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3742 def SULD_1D_ARRAY_V2I8_CLAMP
3743 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3744 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3745 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3747 def SULD_1D_ARRAY_V2I16_CLAMP
3748 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3749 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3750 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3752 def SULD_1D_ARRAY_V2I32_CLAMP
3753 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3754 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3755 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3757 def SULD_1D_ARRAY_V2I64_CLAMP
3758 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3759 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3760 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3763 def SULD_2D_V2I8_CLAMP
3764 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3765 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3766 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3768 def SULD_2D_V2I16_CLAMP
3769 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3770 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3771 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3773 def SULD_2D_V2I32_CLAMP
3774 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3775 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3776 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3778 def SULD_2D_V2I64_CLAMP
3779 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3780 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3781 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3784 def SULD_2D_ARRAY_V2I8_CLAMP
3785 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3786 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3787 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3788 "[$s, \\{$l, $x, $y, $y\\}];",
3790 def SULD_2D_ARRAY_V2I16_CLAMP
3791 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3792 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3793 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3794 "[$s, \\{$l, $x, $y, $y\\}];",
3796 def SULD_2D_ARRAY_V2I32_CLAMP
3797 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3798 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3799 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3800 "[$s, \\{$l, $x, $y, $y\\}];",
3802 def SULD_2D_ARRAY_V2I64_CLAMP
3803 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3804 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3805 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3806 "[$s, \\{$l, $x, $y, $y\\}];",
3809 def SULD_3D_V2I8_CLAMP
3810 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3811 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3812 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3814 def SULD_3D_V2I16_CLAMP
3815 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3816 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3817 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3819 def SULD_3D_V2I32_CLAMP
3820 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3821 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3822 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3824 def SULD_3D_V2I64_CLAMP
3825 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3826 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3827 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3832 def SULD_1D_V4I8_CLAMP
3833 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3834 (ins Int64Regs:$s, Int32Regs:$x),
3835 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3837 def SULD_1D_V4I16_CLAMP
3838 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3839 (ins Int64Regs:$s, Int32Regs:$x),
3840 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3842 def SULD_1D_V4I32_CLAMP
3843 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3844 (ins Int64Regs:$s, Int32Regs:$x),
3845 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3848 def SULD_1D_ARRAY_V4I8_CLAMP
3849 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3850 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3851 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3852 "[$s, \\{$l, $x\\}];",
3854 def SULD_1D_ARRAY_V4I16_CLAMP
3855 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3856 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3857 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3858 "[$s, \\{$l, $x\\}];",
3860 def SULD_1D_ARRAY_V4I32_CLAMP
3861 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3862 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3863 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3864 "[$s, \\{$l, $x\\}];",
3867 def SULD_2D_V4I8_CLAMP
3868 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3869 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3870 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3872 def SULD_2D_V4I16_CLAMP
3873 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3875 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3877 def SULD_2D_V4I32_CLAMP
3878 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3880 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3883 def SULD_2D_ARRAY_V4I8_CLAMP
3884 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3885 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3886 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3887 "[$s, \\{$l, $x, $y, $y\\}];",
3889 def SULD_2D_ARRAY_V4I16_CLAMP
3890 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3891 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3892 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3893 "[$s, \\{$l, $x, $y, $y\\}];",
3895 def SULD_2D_ARRAY_V4I32_CLAMP
3896 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3897 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3898 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3899 "[$s, \\{$l, $x, $y, $y\\}];",
3903 def SULD_3D_V4I8_CLAMP
3904 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3905 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3906 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3907 "[$s, \\{$x, $y, $z, $z\\}];",
3909 def SULD_3D_V4I16_CLAMP
3910 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3911 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3912 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3913 "[$s, \\{$x, $y, $z, $z\\}];",
3915 def SULD_3D_V4I32_CLAMP
3916 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3917 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3918 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3919 "[$s, \\{$x, $y, $z, $z\\}];",
3927 : NVPTXInst<(outs Int16Regs:$r),
3928 (ins Int64Regs:$s, Int32Regs:$x),
3929 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3931 def SULD_1D_I16_TRAP
3932 : NVPTXInst<(outs Int16Regs:$r),
3933 (ins Int64Regs:$s, Int32Regs:$x),
3934 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3936 def SULD_1D_I32_TRAP
3937 : NVPTXInst<(outs Int32Regs:$r),
3938 (ins Int64Regs:$s, Int32Regs:$x),
3939 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3941 def SULD_1D_I64_TRAP
3942 : NVPTXInst<(outs Int64Regs:$r),
3943 (ins Int64Regs:$s, Int32Regs:$x),
3944 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3947 def SULD_1D_ARRAY_I8_TRAP
3948 : NVPTXInst<(outs Int16Regs:$r),
3949 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3950 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3952 def SULD_1D_ARRAY_I16_TRAP
3953 : NVPTXInst<(outs Int16Regs:$r),
3954 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3955 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3957 def SULD_1D_ARRAY_I32_TRAP
3958 : NVPTXInst<(outs Int32Regs:$r),
3959 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3960 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3962 def SULD_1D_ARRAY_I64_TRAP
3963 : NVPTXInst<(outs Int64Regs:$r),
3964 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3965 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3969 : NVPTXInst<(outs Int16Regs:$r),
3970 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3971 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3973 def SULD_2D_I16_TRAP
3974 : NVPTXInst<(outs Int16Regs:$r),
3975 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3976 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3978 def SULD_2D_I32_TRAP
3979 : NVPTXInst<(outs Int32Regs:$r),
3980 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3981 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3983 def SULD_2D_I64_TRAP
3984 : NVPTXInst<(outs Int64Regs:$r),
3985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3986 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3989 def SULD_2D_ARRAY_I8_TRAP
3990 : NVPTXInst<(outs Int16Regs:$r),
3991 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3992 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3994 def SULD_2D_ARRAY_I16_TRAP
3995 : NVPTXInst<(outs Int16Regs:$r),
3996 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3997 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3999 def SULD_2D_ARRAY_I32_TRAP
4000 : NVPTXInst<(outs Int32Regs:$r),
4001 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4002 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4004 def SULD_2D_ARRAY_I64_TRAP
4005 : NVPTXInst<(outs Int64Regs:$r),
4006 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4007 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4011 : NVPTXInst<(outs Int16Regs:$r),
4012 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4013 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4015 def SULD_3D_I16_TRAP
4016 : NVPTXInst<(outs Int16Regs:$r),
4017 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4018 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4020 def SULD_3D_I32_TRAP
4021 : NVPTXInst<(outs Int32Regs:$r),
4022 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4023 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4025 def SULD_3D_I64_TRAP
4026 : NVPTXInst<(outs Int64Regs:$r),
4027 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4028 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4033 def SULD_1D_V2I8_TRAP
4034 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4035 (ins Int64Regs:$s, Int32Regs:$x),
4036 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4038 def SULD_1D_V2I16_TRAP
4039 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4040 (ins Int64Regs:$s, Int32Regs:$x),
4041 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4043 def SULD_1D_V2I32_TRAP
4044 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4045 (ins Int64Regs:$s, Int32Regs:$x),
4046 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4048 def SULD_1D_V2I64_TRAP
4049 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4050 (ins Int64Regs:$s, Int32Regs:$x),
4051 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4054 def SULD_1D_ARRAY_V2I8_TRAP
4055 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4056 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4057 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4059 def SULD_1D_ARRAY_V2I16_TRAP
4060 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4061 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4062 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4064 def SULD_1D_ARRAY_V2I32_TRAP
4065 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4066 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4067 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4069 def SULD_1D_ARRAY_V2I64_TRAP
4070 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4071 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4072 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4075 def SULD_2D_V2I8_TRAP
4076 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4077 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4078 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4080 def SULD_2D_V2I16_TRAP
4081 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4082 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4083 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4085 def SULD_2D_V2I32_TRAP
4086 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4087 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4088 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4090 def SULD_2D_V2I64_TRAP
4091 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4092 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4093 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4096 def SULD_2D_ARRAY_V2I8_TRAP
4097 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4098 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4099 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4100 "[$s, \\{$l, $x, $y, $y\\}];",
4102 def SULD_2D_ARRAY_V2I16_TRAP
4103 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4104 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4105 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4106 "[$s, \\{$l, $x, $y, $y\\}];",
4108 def SULD_2D_ARRAY_V2I32_TRAP
4109 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4110 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4111 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4112 "[$s, \\{$l, $x, $y, $y\\}];",
4114 def SULD_2D_ARRAY_V2I64_TRAP
4115 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4116 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4117 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4118 "[$s, \\{$l, $x, $y, $y\\}];",
4121 def SULD_3D_V2I8_TRAP
4122 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4123 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4124 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4126 def SULD_3D_V2I16_TRAP
4127 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4128 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4129 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4131 def SULD_3D_V2I32_TRAP
4132 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4133 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4134 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4136 def SULD_3D_V2I64_TRAP
4137 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4138 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4139 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4144 def SULD_1D_V4I8_TRAP
4145 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4146 (ins Int64Regs:$s, Int32Regs:$x),
4147 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4149 def SULD_1D_V4I16_TRAP
4150 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4151 (ins Int64Regs:$s, Int32Regs:$x),
4152 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4154 def SULD_1D_V4I32_TRAP
4155 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4156 (ins Int64Regs:$s, Int32Regs:$x),
4157 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4160 def SULD_1D_ARRAY_V4I8_TRAP
4161 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4162 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4163 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4164 "[$s, \\{$l, $x\\}];",
4166 def SULD_1D_ARRAY_V4I16_TRAP
4167 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4168 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4169 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4170 "[$s, \\{$l, $x\\}];",
4172 def SULD_1D_ARRAY_V4I32_TRAP
4173 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4174 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4175 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4176 "[$s, \\{$l, $x\\}];",
4179 def SULD_2D_V4I8_TRAP
4180 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4182 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4184 def SULD_2D_V4I16_TRAP
4185 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4186 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4187 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4189 def SULD_2D_V4I32_TRAP
4190 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4192 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4195 def SULD_2D_ARRAY_V4I8_TRAP
4196 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4197 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4198 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4199 "[$s, \\{$l, $x, $y, $y\\}];",
4201 def SULD_2D_ARRAY_V4I16_TRAP
4202 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4203 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4204 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4205 "[$s, \\{$l, $x, $y, $y\\}];",
4207 def SULD_2D_ARRAY_V4I32_TRAP
4208 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4209 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4210 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4211 "[$s, \\{$l, $x, $y, $y\\}];",
4215 def SULD_3D_V4I8_TRAP
4216 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4217 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4218 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4219 "[$s, \\{$x, $y, $z, $z\\}];",
4221 def SULD_3D_V4I16_TRAP
4222 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4223 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4224 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4225 "[$s, \\{$x, $y, $z, $z\\}];",
4227 def SULD_3D_V4I32_TRAP
4228 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4229 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4230 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4231 "[$s, \\{$x, $y, $z, $z\\}];",
4238 : NVPTXInst<(outs Int16Regs:$r),
4239 (ins Int64Regs:$s, Int32Regs:$x),
4240 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4242 def SULD_1D_I16_ZERO
4243 : NVPTXInst<(outs Int16Regs:$r),
4244 (ins Int64Regs:$s, Int32Regs:$x),
4245 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4247 def SULD_1D_I32_ZERO
4248 : NVPTXInst<(outs Int32Regs:$r),
4249 (ins Int64Regs:$s, Int32Regs:$x),
4250 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4252 def SULD_1D_I64_ZERO
4253 : NVPTXInst<(outs Int64Regs:$r),
4254 (ins Int64Regs:$s, Int32Regs:$x),
4255 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4258 def SULD_1D_ARRAY_I8_ZERO
4259 : NVPTXInst<(outs Int16Regs:$r),
4260 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4261 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4263 def SULD_1D_ARRAY_I16_ZERO
4264 : NVPTXInst<(outs Int16Regs:$r),
4265 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4266 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4268 def SULD_1D_ARRAY_I32_ZERO
4269 : NVPTXInst<(outs Int32Regs:$r),
4270 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4271 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4273 def SULD_1D_ARRAY_I64_ZERO
4274 : NVPTXInst<(outs Int64Regs:$r),
4275 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4276 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4280 : NVPTXInst<(outs Int16Regs:$r),
4281 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4282 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4284 def SULD_2D_I16_ZERO
4285 : NVPTXInst<(outs Int16Regs:$r),
4286 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4287 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4289 def SULD_2D_I32_ZERO
4290 : NVPTXInst<(outs Int32Regs:$r),
4291 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4292 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4294 def SULD_2D_I64_ZERO
4295 : NVPTXInst<(outs Int64Regs:$r),
4296 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4297 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4300 def SULD_2D_ARRAY_I8_ZERO
4301 : NVPTXInst<(outs Int16Regs:$r),
4302 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4303 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4305 def SULD_2D_ARRAY_I16_ZERO
4306 : NVPTXInst<(outs Int16Regs:$r),
4307 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4308 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4310 def SULD_2D_ARRAY_I32_ZERO
4311 : NVPTXInst<(outs Int32Regs:$r),
4312 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4313 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4315 def SULD_2D_ARRAY_I64_ZERO
4316 : NVPTXInst<(outs Int64Regs:$r),
4317 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4318 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4322 : NVPTXInst<(outs Int16Regs:$r),
4323 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4324 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4326 def SULD_3D_I16_ZERO
4327 : NVPTXInst<(outs Int16Regs:$r),
4328 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4329 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4331 def SULD_3D_I32_ZERO
4332 : NVPTXInst<(outs Int32Regs:$r),
4333 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4334 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4336 def SULD_3D_I64_ZERO
4337 : NVPTXInst<(outs Int64Regs:$r),
4338 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4339 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4344 def SULD_1D_V2I8_ZERO
4345 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4346 (ins Int64Regs:$s, Int32Regs:$x),
4347 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4349 def SULD_1D_V2I16_ZERO
4350 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4351 (ins Int64Regs:$s, Int32Regs:$x),
4352 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4354 def SULD_1D_V2I32_ZERO
4355 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4356 (ins Int64Regs:$s, Int32Regs:$x),
4357 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4359 def SULD_1D_V2I64_ZERO
4360 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4361 (ins Int64Regs:$s, Int32Regs:$x),
4362 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4365 def SULD_1D_ARRAY_V2I8_ZERO
4366 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4367 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4368 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4370 def SULD_1D_ARRAY_V2I16_ZERO
4371 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4372 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4373 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4375 def SULD_1D_ARRAY_V2I32_ZERO
4376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4377 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4378 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4380 def SULD_1D_ARRAY_V2I64_ZERO
4381 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4382 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4383 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4386 def SULD_2D_V2I8_ZERO
4387 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4388 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4389 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4391 def SULD_2D_V2I16_ZERO
4392 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4393 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4394 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4396 def SULD_2D_V2I32_ZERO
4397 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4398 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4399 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4401 def SULD_2D_V2I64_ZERO
4402 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4403 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4404 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4407 def SULD_2D_ARRAY_V2I8_ZERO
4408 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4409 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4410 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4411 "[$s, \\{$l, $x, $y, $y\\}];",
4413 def SULD_2D_ARRAY_V2I16_ZERO
4414 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4415 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4416 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4417 "[$s, \\{$l, $x, $y, $y\\}];",
4419 def SULD_2D_ARRAY_V2I32_ZERO
4420 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4421 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4422 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4423 "[$s, \\{$l, $x, $y, $y\\}];",
4425 def SULD_2D_ARRAY_V2I64_ZERO
4426 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4427 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4428 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4429 "[$s, \\{$l, $x, $y, $y\\}];",
4432 def SULD_3D_V2I8_ZERO
4433 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4434 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4435 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4437 def SULD_3D_V2I16_ZERO
4438 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4439 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4440 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4442 def SULD_3D_V2I32_ZERO
4443 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4444 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4447 def SULD_3D_V2I64_ZERO
4448 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4449 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4450 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4455 def SULD_1D_V4I8_ZERO
4456 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4457 (ins Int64Regs:$s, Int32Regs:$x),
4458 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4460 def SULD_1D_V4I16_ZERO
4461 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4462 (ins Int64Regs:$s, Int32Regs:$x),
4463 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4465 def SULD_1D_V4I32_ZERO
4466 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4467 (ins Int64Regs:$s, Int32Regs:$x),
4468 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4471 def SULD_1D_ARRAY_V4I8_ZERO
4472 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4473 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4474 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4475 "[$s, \\{$l, $x\\}];",
4477 def SULD_1D_ARRAY_V4I16_ZERO
4478 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4479 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4480 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4481 "[$s, \\{$l, $x\\}];",
4483 def SULD_1D_ARRAY_V4I32_ZERO
4484 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4485 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4486 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4487 "[$s, \\{$l, $x\\}];",
4490 def SULD_2D_V4I8_ZERO
4491 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4492 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4493 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4495 def SULD_2D_V4I16_ZERO
4496 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4497 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4498 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4500 def SULD_2D_V4I32_ZERO
4501 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4503 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4506 def SULD_2D_ARRAY_V4I8_ZERO
4507 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4508 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4509 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4510 "[$s, \\{$l, $x, $y, $y\\}];",
4512 def SULD_2D_ARRAY_V4I16_ZERO
4513 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4514 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4515 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4516 "[$s, \\{$l, $x, $y, $y\\}];",
4518 def SULD_2D_ARRAY_V4I32_ZERO
4519 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4520 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4521 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4522 "[$s, \\{$l, $x, $y, $y\\}];",
4526 def SULD_3D_V4I8_ZERO
4527 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4528 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4529 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4530 "[$s, \\{$x, $y, $z, $z\\}];",
4532 def SULD_3D_V4I16_ZERO
4533 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4534 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4535 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4536 "[$s, \\{$x, $y, $z, $z\\}];",
4538 def SULD_3D_V4I32_ZERO
4539 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4540 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4541 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4542 "[$s, \\{$x, $y, $z, $z\\}];",
4546 //-----------------------------------
4547 // Texture Query Intrinsics
4548 //-----------------------------------
4550 let IsSurfTexQuery = 1 in {
4551 def TXQ_CHANNEL_ORDER
4552 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4553 "txq.channel_order.b32 \t$d, [$a];",
4555 def TXQ_CHANNEL_DATA_TYPE
4556 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4557 "txq.channel_data_type.b32 \t$d, [$a];",
4560 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4561 "txq.width.b32 \t$d, [$a];",
4564 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4565 "txq.height.b32 \t$d, [$a];",
4568 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4569 "txq.depth.b32 \t$d, [$a];",
4572 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4573 "txq.array_size.b32 \t$d, [$a];",
4576 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4577 "txq.num_samples.b32 \t$d, [$a];",
4579 def TXQ_NUM_MIPMAP_LEVELS
4580 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4581 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4585 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4586 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4587 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4588 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4589 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4590 (TXQ_WIDTH Int64Regs:$a)>;
4591 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4592 (TXQ_HEIGHT Int64Regs:$a)>;
4593 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4594 (TXQ_DEPTH Int64Regs:$a)>;
4595 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4596 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4597 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4598 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4599 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4600 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4603 //-----------------------------------
4604 // Surface Query Intrinsics
4605 //-----------------------------------
4607 let IsSurfTexQuery = 1 in {
4608 def SUQ_CHANNEL_ORDER
4609 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4610 "suq.channel_order.b32 \t$d, [$a];",
4612 def SUQ_CHANNEL_DATA_TYPE
4613 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4614 "suq.channel_data_type.b32 \t$d, [$a];",
4617 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4618 "suq.width.b32 \t$d, [$a];",
4621 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4622 "suq.height.b32 \t$d, [$a];",
4625 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4626 "suq.depth.b32 \t$d, [$a];",
4629 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4630 "suq.array_size.b32 \t$d, [$a];",
4634 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4635 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4636 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4637 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4638 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4639 (SUQ_WIDTH Int64Regs:$a)>;
4640 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4641 (SUQ_HEIGHT Int64Regs:$a)>;
4642 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4643 (SUQ_DEPTH Int64Regs:$a)>;
4644 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4645 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4648 //===- Handle Query -------------------------------------------------------===//
4650 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4652 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4653 "istypep.samplerref \t$d, $a;",
4654 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4656 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4657 "istypep.surfref \t$d, $a;",
4658 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4660 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4661 "istypep.texref \t$d, $a;",
4662 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4664 //===- Surface Stores -----------------------------------------------------===//
4669 def SUST_B_1D_B8_CLAMP
4671 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4672 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4674 def SUST_B_1D_B16_CLAMP
4676 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4677 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4679 def SUST_B_1D_B32_CLAMP
4681 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4682 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4684 def SUST_B_1D_B64_CLAMP
4686 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4687 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4689 def SUST_B_1D_V2B8_CLAMP
4691 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4692 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4694 def SUST_B_1D_V2B16_CLAMP
4696 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4697 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4699 def SUST_B_1D_V2B32_CLAMP
4701 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4702 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4704 def SUST_B_1D_V2B64_CLAMP
4706 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4707 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4709 def SUST_B_1D_V4B8_CLAMP
4711 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4712 Int16Regs:$b, Int16Regs:$a),
4713 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4715 def SUST_B_1D_V4B16_CLAMP
4717 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4718 Int16Regs:$b, Int16Regs:$a),
4719 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4721 def SUST_B_1D_V4B32_CLAMP
4723 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4724 Int32Regs:$b, Int32Regs:$a),
4725 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4729 def SUST_B_1D_ARRAY_B8_CLAMP
4731 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4732 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4734 def SUST_B_1D_ARRAY_B16_CLAMP
4736 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4737 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4739 def SUST_B_1D_ARRAY_B32_CLAMP
4741 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4742 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4744 def SUST_B_1D_ARRAY_B64_CLAMP
4746 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4747 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4749 def SUST_B_1D_ARRAY_V2B8_CLAMP
4751 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4753 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4755 def SUST_B_1D_ARRAY_V2B16_CLAMP
4757 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4759 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4761 def SUST_B_1D_ARRAY_V2B32_CLAMP
4763 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4765 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4767 def SUST_B_1D_ARRAY_V2B64_CLAMP
4769 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4771 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4773 def SUST_B_1D_ARRAY_V4B8_CLAMP
4775 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4776 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4777 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4778 "\\{$r, $g, $b, $a\\};",
4780 def SUST_B_1D_ARRAY_V4B16_CLAMP
4782 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4783 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4784 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4785 "\\{$r, $g, $b, $a\\};",
4787 def SUST_B_1D_ARRAY_V4B32_CLAMP
4789 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4790 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4791 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4792 "\\{$r, $g, $b, $a\\};",
4796 def SUST_B_2D_B8_CLAMP
4798 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4799 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4801 def SUST_B_2D_B16_CLAMP
4803 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4804 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4806 def SUST_B_2D_B32_CLAMP
4808 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4809 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4811 def SUST_B_2D_B64_CLAMP
4813 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4814 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4816 def SUST_B_2D_V2B8_CLAMP
4818 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4820 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4822 def SUST_B_2D_V2B16_CLAMP
4824 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4826 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4828 def SUST_B_2D_V2B32_CLAMP
4830 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4832 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4834 def SUST_B_2D_V2B64_CLAMP
4836 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4838 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4840 def SUST_B_2D_V4B8_CLAMP
4842 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4843 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4844 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4845 "\\{$r, $g, $b, $a\\};",
4847 def SUST_B_2D_V4B16_CLAMP
4849 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4850 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4851 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4852 "\\{$r, $g, $b, $a\\};",
4854 def SUST_B_2D_V4B32_CLAMP
4856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4857 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4858 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4859 "\\{$r, $g, $b, $a\\};",
4863 def SUST_B_2D_ARRAY_B8_CLAMP
4865 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4867 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4869 def SUST_B_2D_ARRAY_B16_CLAMP
4871 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4873 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4875 def SUST_B_2D_ARRAY_B32_CLAMP
4877 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4879 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4881 def SUST_B_2D_ARRAY_B64_CLAMP
4883 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4885 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4887 def SUST_B_2D_ARRAY_V2B8_CLAMP
4889 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4890 Int16Regs:$r, Int16Regs:$g),
4891 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4894 def SUST_B_2D_ARRAY_V2B16_CLAMP
4896 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4897 Int16Regs:$r, Int16Regs:$g),
4898 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4901 def SUST_B_2D_ARRAY_V2B32_CLAMP
4903 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4904 Int32Regs:$r, Int32Regs:$g),
4905 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4908 def SUST_B_2D_ARRAY_V2B64_CLAMP
4910 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4911 Int64Regs:$r, Int64Regs:$g),
4912 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4915 def SUST_B_2D_ARRAY_V4B8_CLAMP
4917 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4918 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4919 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4920 "\\{$r, $g, $b, $a\\};",
4922 def SUST_B_2D_ARRAY_V4B16_CLAMP
4924 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4925 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4926 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4927 "\\{$r, $g, $b, $a\\};",
4929 def SUST_B_2D_ARRAY_V4B32_CLAMP
4931 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4932 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4933 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4934 "\\{$r, $g, $b, $a\\};",
4938 def SUST_B_3D_B8_CLAMP
4940 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4942 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4944 def SUST_B_3D_B16_CLAMP
4946 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4948 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4950 def SUST_B_3D_B32_CLAMP
4952 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4954 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4956 def SUST_B_3D_B64_CLAMP
4958 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4960 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4962 def SUST_B_3D_V2B8_CLAMP
4964 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4965 Int16Regs:$r, Int16Regs:$g),
4966 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4969 def SUST_B_3D_V2B16_CLAMP
4971 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4972 Int16Regs:$r, Int16Regs:$g),
4973 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4976 def SUST_B_3D_V2B32_CLAMP
4978 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4979 Int32Regs:$r, Int32Regs:$g),
4980 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4983 def SUST_B_3D_V2B64_CLAMP
4985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4986 Int64Regs:$r, Int64Regs:$g),
4987 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4990 def SUST_B_3D_V4B8_CLAMP
4992 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4993 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4994 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4995 "\\{$r, $g, $b, $a\\};",
4997 def SUST_B_3D_V4B16_CLAMP
4999 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5000 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5001 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5002 "\\{$r, $g, $b, $a\\};",
5004 def SUST_B_3D_V4B32_CLAMP
5006 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5007 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5008 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5009 "\\{$r, $g, $b, $a\\};",
5014 def SUST_B_1D_B8_TRAP
5016 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5017 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5019 def SUST_B_1D_B16_TRAP
5021 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5022 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5024 def SUST_B_1D_B32_TRAP
5026 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5027 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5029 def SUST_B_1D_B64_TRAP
5031 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5032 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5034 def SUST_B_1D_V2B8_TRAP
5036 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5037 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5039 def SUST_B_1D_V2B16_TRAP
5041 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5042 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5044 def SUST_B_1D_V2B32_TRAP
5046 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5047 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5049 def SUST_B_1D_V2B64_TRAP
5051 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5052 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5054 def SUST_B_1D_V4B8_TRAP
5056 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5057 Int16Regs:$b, Int16Regs:$a),
5058 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5060 def SUST_B_1D_V4B16_TRAP
5062 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5063 Int16Regs:$b, Int16Regs:$a),
5064 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5066 def SUST_B_1D_V4B32_TRAP
5068 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5069 Int32Regs:$b, Int32Regs:$a),
5070 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5074 def SUST_B_1D_ARRAY_B8_TRAP
5076 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5077 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5079 def SUST_B_1D_ARRAY_B16_TRAP
5081 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5082 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5084 def SUST_B_1D_ARRAY_B32_TRAP
5086 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5087 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5089 def SUST_B_1D_ARRAY_B64_TRAP
5091 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5092 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5094 def SUST_B_1D_ARRAY_V2B8_TRAP
5096 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5098 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5100 def SUST_B_1D_ARRAY_V2B16_TRAP
5102 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5104 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5106 def SUST_B_1D_ARRAY_V2B32_TRAP
5108 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5110 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5112 def SUST_B_1D_ARRAY_V2B64_TRAP
5114 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5116 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5118 def SUST_B_1D_ARRAY_V4B8_TRAP
5120 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5121 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5122 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5123 "\\{$r, $g, $b, $a\\};",
5125 def SUST_B_1D_ARRAY_V4B16_TRAP
5127 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5128 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5129 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5130 "\\{$r, $g, $b, $a\\};",
5132 def SUST_B_1D_ARRAY_V4B32_TRAP
5134 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5135 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5136 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5137 "\\{$r, $g, $b, $a\\};",
5141 def SUST_B_2D_B8_TRAP
5143 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5144 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5146 def SUST_B_2D_B16_TRAP
5148 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5149 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5151 def SUST_B_2D_B32_TRAP
5153 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5154 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5156 def SUST_B_2D_B64_TRAP
5158 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5159 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5161 def SUST_B_2D_V2B8_TRAP
5163 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5165 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5167 def SUST_B_2D_V2B16_TRAP
5169 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5171 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5173 def SUST_B_2D_V2B32_TRAP
5175 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5177 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5179 def SUST_B_2D_V2B64_TRAP
5181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5183 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5185 def SUST_B_2D_V4B8_TRAP
5187 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5188 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5189 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5190 "\\{$r, $g, $b, $a\\};",
5192 def SUST_B_2D_V4B16_TRAP
5194 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5195 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5196 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5197 "\\{$r, $g, $b, $a\\};",
5199 def SUST_B_2D_V4B32_TRAP
5201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5202 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5203 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5204 "\\{$r, $g, $b, $a\\};",
5208 def SUST_B_2D_ARRAY_B8_TRAP
5210 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5212 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5214 def SUST_B_2D_ARRAY_B16_TRAP
5216 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5218 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5220 def SUST_B_2D_ARRAY_B32_TRAP
5222 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5224 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5226 def SUST_B_2D_ARRAY_B64_TRAP
5228 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5230 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5232 def SUST_B_2D_ARRAY_V2B8_TRAP
5234 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5235 Int16Regs:$r, Int16Regs:$g),
5236 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5239 def SUST_B_2D_ARRAY_V2B16_TRAP
5241 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5242 Int16Regs:$r, Int16Regs:$g),
5243 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5246 def SUST_B_2D_ARRAY_V2B32_TRAP
5248 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5249 Int32Regs:$r, Int32Regs:$g),
5250 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5253 def SUST_B_2D_ARRAY_V2B64_TRAP
5255 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5256 Int64Regs:$r, Int64Regs:$g),
5257 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5260 def SUST_B_2D_ARRAY_V4B8_TRAP
5262 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5264 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5265 "\\{$r, $g, $b, $a\\};",
5267 def SUST_B_2D_ARRAY_V4B16_TRAP
5269 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5270 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5271 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5272 "\\{$r, $g, $b, $a\\};",
5274 def SUST_B_2D_ARRAY_V4B32_TRAP
5276 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5277 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5278 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5279 "\\{$r, $g, $b, $a\\};",
5283 def SUST_B_3D_B8_TRAP
5285 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5287 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5289 def SUST_B_3D_B16_TRAP
5291 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5293 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5295 def SUST_B_3D_B32_TRAP
5297 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5299 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5301 def SUST_B_3D_B64_TRAP
5303 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5305 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5307 def SUST_B_3D_V2B8_TRAP
5309 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5310 Int16Regs:$r, Int16Regs:$g),
5311 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5314 def SUST_B_3D_V2B16_TRAP
5316 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5317 Int16Regs:$r, Int16Regs:$g),
5318 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5321 def SUST_B_3D_V2B32_TRAP
5323 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5324 Int32Regs:$r, Int32Regs:$g),
5325 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5328 def SUST_B_3D_V2B64_TRAP
5330 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5331 Int64Regs:$r, Int64Regs:$g),
5332 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5335 def SUST_B_3D_V4B8_TRAP
5337 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5338 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5339 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5340 "\\{$r, $g, $b, $a\\};",
5342 def SUST_B_3D_V4B16_TRAP
5344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5345 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5346 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5347 "\\{$r, $g, $b, $a\\};",
5349 def SUST_B_3D_V4B32_TRAP
5351 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5352 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5353 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5354 "\\{$r, $g, $b, $a\\};",
5359 def SUST_B_1D_B8_ZERO
5361 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5362 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5364 def SUST_B_1D_B16_ZERO
5366 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5367 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5369 def SUST_B_1D_B32_ZERO
5371 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5372 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5374 def SUST_B_1D_B64_ZERO
5376 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5377 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5379 def SUST_B_1D_V2B8_ZERO
5381 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5382 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5384 def SUST_B_1D_V2B16_ZERO
5386 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5387 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5389 def SUST_B_1D_V2B32_ZERO
5391 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5392 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5394 def SUST_B_1D_V2B64_ZERO
5396 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5397 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5399 def SUST_B_1D_V4B8_ZERO
5401 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5402 Int16Regs:$b, Int16Regs:$a),
5403 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5405 def SUST_B_1D_V4B16_ZERO
5407 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5408 Int16Regs:$b, Int16Regs:$a),
5409 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5411 def SUST_B_1D_V4B32_ZERO
5413 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5414 Int32Regs:$b, Int32Regs:$a),
5415 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5419 def SUST_B_1D_ARRAY_B8_ZERO
5421 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5422 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5424 def SUST_B_1D_ARRAY_B16_ZERO
5426 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5427 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5429 def SUST_B_1D_ARRAY_B32_ZERO
5431 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5432 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5434 def SUST_B_1D_ARRAY_B64_ZERO
5436 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5437 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5439 def SUST_B_1D_ARRAY_V2B8_ZERO
5441 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5443 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5445 def SUST_B_1D_ARRAY_V2B16_ZERO
5447 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5449 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5451 def SUST_B_1D_ARRAY_V2B32_ZERO
5453 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5455 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5457 def SUST_B_1D_ARRAY_V2B64_ZERO
5459 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5461 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5463 def SUST_B_1D_ARRAY_V4B8_ZERO
5465 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5466 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5467 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5468 "\\{$r, $g, $b, $a\\};",
5470 def SUST_B_1D_ARRAY_V4B16_ZERO
5472 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5473 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5474 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5475 "\\{$r, $g, $b, $a\\};",
5477 def SUST_B_1D_ARRAY_V4B32_ZERO
5479 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5480 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5481 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5482 "\\{$r, $g, $b, $a\\};",
5486 def SUST_B_2D_B8_ZERO
5488 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5489 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5491 def SUST_B_2D_B16_ZERO
5493 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5494 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5496 def SUST_B_2D_B32_ZERO
5498 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5499 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5501 def SUST_B_2D_B64_ZERO
5503 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5504 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5506 def SUST_B_2D_V2B8_ZERO
5508 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5510 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5512 def SUST_B_2D_V2B16_ZERO
5514 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5516 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5518 def SUST_B_2D_V2B32_ZERO
5520 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5522 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5524 def SUST_B_2D_V2B64_ZERO
5526 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5528 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5530 def SUST_B_2D_V4B8_ZERO
5532 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5533 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5534 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5535 "\\{$r, $g, $b, $a\\};",
5537 def SUST_B_2D_V4B16_ZERO
5539 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5540 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5541 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5542 "\\{$r, $g, $b, $a\\};",
5544 def SUST_B_2D_V4B32_ZERO
5546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5547 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5548 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5549 "\\{$r, $g, $b, $a\\};",
5553 def SUST_B_2D_ARRAY_B8_ZERO
5555 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5557 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5559 def SUST_B_2D_ARRAY_B16_ZERO
5561 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5563 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5565 def SUST_B_2D_ARRAY_B32_ZERO
5567 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5569 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5571 def SUST_B_2D_ARRAY_B64_ZERO
5573 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5575 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5577 def SUST_B_2D_ARRAY_V2B8_ZERO
5579 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5580 Int16Regs:$r, Int16Regs:$g),
5581 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5584 def SUST_B_2D_ARRAY_V2B16_ZERO
5586 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5587 Int16Regs:$r, Int16Regs:$g),
5588 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5591 def SUST_B_2D_ARRAY_V2B32_ZERO
5593 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5594 Int32Regs:$r, Int32Regs:$g),
5595 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5598 def SUST_B_2D_ARRAY_V2B64_ZERO
5600 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5601 Int64Regs:$r, Int64Regs:$g),
5602 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5605 def SUST_B_2D_ARRAY_V4B8_ZERO
5607 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5608 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5609 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5610 "\\{$r, $g, $b, $a\\};",
5612 def SUST_B_2D_ARRAY_V4B16_ZERO
5614 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5615 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5616 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5617 "\\{$r, $g, $b, $a\\};",
5619 def SUST_B_2D_ARRAY_V4B32_ZERO
5621 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5622 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5623 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5624 "\\{$r, $g, $b, $a\\};",
5628 def SUST_B_3D_B8_ZERO
5630 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5632 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5634 def SUST_B_3D_B16_ZERO
5636 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5638 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5640 def SUST_B_3D_B32_ZERO
5642 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5644 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5646 def SUST_B_3D_B64_ZERO
5648 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5650 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5652 def SUST_B_3D_V2B8_ZERO
5654 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5655 Int16Regs:$r, Int16Regs:$g),
5656 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5659 def SUST_B_3D_V2B16_ZERO
5661 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5662 Int16Regs:$r, Int16Regs:$g),
5663 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5666 def SUST_B_3D_V2B32_ZERO
5668 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5669 Int32Regs:$r, Int32Regs:$g),
5670 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5673 def SUST_B_3D_V2B64_ZERO
5675 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5676 Int64Regs:$r, Int64Regs:$g),
5677 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5680 def SUST_B_3D_V4B8_ZERO
5682 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5683 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5684 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5685 "\\{$r, $g, $b, $a\\};",
5687 def SUST_B_3D_V4B16_ZERO
5689 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5690 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5691 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5692 "\\{$r, $g, $b, $a\\};",
5694 def SUST_B_3D_V4B32_ZERO
5696 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5697 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5698 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5699 "\\{$r, $g, $b, $a\\};",
5706 def SUST_P_1D_B8_TRAP
5708 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5709 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5711 def SUST_P_1D_B16_TRAP
5713 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5714 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5716 def SUST_P_1D_B32_TRAP
5718 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5719 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5721 def SUST_P_1D_V2B8_TRAP
5723 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5724 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5726 def SUST_P_1D_V2B16_TRAP
5728 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5729 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5731 def SUST_P_1D_V2B32_TRAP
5733 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5734 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5736 def SUST_P_1D_V4B8_TRAP
5738 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5739 Int16Regs:$b, Int16Regs:$a),
5740 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5742 def SUST_P_1D_V4B16_TRAP
5744 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5745 Int16Regs:$b, Int16Regs:$a),
5746 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5748 def SUST_P_1D_V4B32_TRAP
5750 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5751 Int32Regs:$b, Int32Regs:$a),
5752 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5756 def SUST_P_1D_ARRAY_B8_TRAP
5758 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5759 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5761 def SUST_P_1D_ARRAY_B16_TRAP
5763 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5764 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5766 def SUST_P_1D_ARRAY_B32_TRAP
5768 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5769 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5771 def SUST_P_1D_ARRAY_V2B8_TRAP
5773 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5775 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5777 def SUST_P_1D_ARRAY_V2B16_TRAP
5779 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5781 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5783 def SUST_P_1D_ARRAY_V2B32_TRAP
5785 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5787 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5789 def SUST_P_1D_ARRAY_V4B8_TRAP
5791 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5792 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5793 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5794 "\\{$r, $g, $b, $a\\};",
5796 def SUST_P_1D_ARRAY_V4B16_TRAP
5798 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5799 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5800 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5801 "\\{$r, $g, $b, $a\\};",
5803 def SUST_P_1D_ARRAY_V4B32_TRAP
5805 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5806 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5807 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5808 "\\{$r, $g, $b, $a\\};",
5812 def SUST_P_2D_B8_TRAP
5814 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5815 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5817 def SUST_P_2D_B16_TRAP
5819 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5820 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5822 def SUST_P_2D_B32_TRAP
5824 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5825 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5827 def SUST_P_2D_V2B8_TRAP
5829 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5831 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5833 def SUST_P_2D_V2B16_TRAP
5835 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5837 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5839 def SUST_P_2D_V2B32_TRAP
5841 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5843 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5845 def SUST_P_2D_V4B8_TRAP
5847 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5848 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5849 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5850 "\\{$r, $g, $b, $a\\};",
5852 def SUST_P_2D_V4B16_TRAP
5854 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5855 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5856 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5857 "\\{$r, $g, $b, $a\\};",
5859 def SUST_P_2D_V4B32_TRAP
5861 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5862 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5863 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5864 "\\{$r, $g, $b, $a\\};",
5868 def SUST_P_2D_ARRAY_B8_TRAP
5870 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5872 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5874 def SUST_P_2D_ARRAY_B16_TRAP
5876 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5878 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5880 def SUST_P_2D_ARRAY_B32_TRAP
5882 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5884 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5886 def SUST_P_2D_ARRAY_V2B8_TRAP
5888 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5889 Int16Regs:$r, Int16Regs:$g),
5890 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5893 def SUST_P_2D_ARRAY_V2B16_TRAP
5895 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5896 Int16Regs:$r, Int16Regs:$g),
5897 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5900 def SUST_P_2D_ARRAY_V2B32_TRAP
5902 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5903 Int32Regs:$r, Int32Regs:$g),
5904 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5907 def SUST_P_2D_ARRAY_V4B8_TRAP
5909 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5910 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5911 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5912 "\\{$r, $g, $b, $a\\};",
5914 def SUST_P_2D_ARRAY_V4B16_TRAP
5916 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5917 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5918 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5919 "\\{$r, $g, $b, $a\\};",
5921 def SUST_P_2D_ARRAY_V4B32_TRAP
5923 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5924 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5925 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5926 "\\{$r, $g, $b, $a\\};",
5930 def SUST_P_3D_B8_TRAP
5932 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5934 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5936 def SUST_P_3D_B16_TRAP
5938 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5940 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5942 def SUST_P_3D_B32_TRAP
5944 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5946 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5948 def SUST_P_3D_V2B8_TRAP
5950 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5951 Int16Regs:$r, Int16Regs:$g),
5952 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5955 def SUST_P_3D_V2B16_TRAP
5957 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5958 Int16Regs:$r, Int16Regs:$g),
5959 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5962 def SUST_P_3D_V2B32_TRAP
5964 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5965 Int32Regs:$r, Int32Regs:$g),
5966 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5969 def SUST_P_3D_V4B8_TRAP
5971 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5972 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5973 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5974 "\\{$r, $g, $b, $a\\};",
5976 def SUST_P_3D_V4B16_TRAP
5978 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5979 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5980 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5981 "\\{$r, $g, $b, $a\\};",
5983 def SUST_P_3D_V4B32_TRAP
5985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5986 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5987 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5988 "\\{$r, $g, $b, $a\\};",
5992 // Surface store instruction patterns
5993 // I'm not sure why we can't just include these in the instruction definitions,
5994 // but TableGen complains of type errors :(
5997 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5998 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5999 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6001 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6002 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6003 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6005 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6006 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6007 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6009 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6010 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6011 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6013 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6014 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6015 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6016 Int16Regs:$r, Int16Regs:$g)>;
6018 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6019 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6020 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6021 Int16Regs:$r, Int16Regs:$g)>;
6023 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6024 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6025 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6026 Int32Regs:$r, Int32Regs:$g)>;
6028 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6029 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6030 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6031 Int64Regs:$r, Int64Regs:$g)>;
6033 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6034 Int64Regs:$s, Int32Regs:$x,
6035 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6036 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6037 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6039 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6040 Int64Regs:$s, Int32Regs:$x,
6041 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6042 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6043 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6045 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6046 Int64Regs:$s, Int32Regs:$x,
6047 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6048 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6049 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6053 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6054 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6055 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6058 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6059 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6060 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6063 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6064 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6065 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6068 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6069 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6070 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6073 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6074 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6075 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6076 Int16Regs:$r, Int16Regs:$g)>;
6078 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6079 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6080 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6081 Int16Regs:$r, Int16Regs:$g)>;
6083 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6084 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6085 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6086 Int32Regs:$r, Int32Regs:$g)>;
6088 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6089 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6090 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6091 Int64Regs:$r, Int64Regs:$g)>;
6093 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6094 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6095 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6096 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6097 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6099 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6100 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6101 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6102 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6103 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6105 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6106 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6107 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6108 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6113 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6114 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6115 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6118 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6119 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6120 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6123 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6124 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6125 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6128 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6129 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6130 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6133 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6134 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6135 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6136 Int16Regs:$r, Int16Regs:$g)>;
6138 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6139 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6140 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6141 Int16Regs:$r, Int16Regs:$g)>;
6143 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6144 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6145 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6146 Int32Regs:$r, Int32Regs:$g)>;
6148 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6149 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6150 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6151 Int64Regs:$r, Int64Regs:$g)>;
6153 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6154 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6155 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6156 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6157 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6159 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6160 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6161 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6162 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6163 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6165 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6166 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6167 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6168 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6173 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6174 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6175 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6176 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6179 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6180 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6181 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6182 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6185 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6186 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6187 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6188 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6191 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6192 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6193 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6194 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6197 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6198 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6199 Int16Regs:$r, Int16Regs:$g),
6200 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6201 Int32Regs:$x, Int32Regs:$y,
6202 Int16Regs:$r, Int16Regs:$g)>;
6204 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6205 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6206 Int16Regs:$r, Int16Regs:$g),
6207 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6208 Int32Regs:$x, Int32Regs:$y,
6209 Int16Regs:$r, Int16Regs:$g)>;
6211 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6212 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6214 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6215 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6217 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6218 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6220 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6221 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6223 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6224 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6225 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6226 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6227 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6228 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6230 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6231 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6232 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6233 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6234 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6235 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6237 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6238 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6239 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6240 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6241 Int32Regs:$x, Int32Regs:$y,
6242 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6246 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6247 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6249 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6250 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6253 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6254 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6256 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6257 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6260 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6261 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6263 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6264 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6267 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6268 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6270 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6271 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6274 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6275 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6276 Int16Regs:$r, Int16Regs:$g),
6277 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6278 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6279 Int16Regs:$r, Int16Regs:$g)>;
6281 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6282 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6283 Int16Regs:$r, Int16Regs:$g),
6284 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6285 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6286 Int16Regs:$r, Int16Regs:$g)>;
6288 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6289 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6290 Int32Regs:$r, Int32Regs:$g),
6291 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6292 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6293 Int32Regs:$r, Int32Regs:$g)>;
6295 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6296 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6297 Int64Regs:$r, Int64Regs:$g),
6298 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6299 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6300 Int64Regs:$r, Int64Regs:$g)>;
6302 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6303 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6305 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6306 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6307 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6309 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6310 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6312 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6313 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6314 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6316 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6317 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6319 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6320 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6325 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6326 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6327 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6329 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6330 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6331 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6333 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6334 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6335 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6337 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6338 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6339 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6341 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6342 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6343 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6344 Int16Regs:$r, Int16Regs:$g)>;
6346 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6347 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6348 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6349 Int16Regs:$r, Int16Regs:$g)>;
6351 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6352 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6353 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6354 Int32Regs:$r, Int32Regs:$g)>;
6356 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6357 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6358 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6359 Int64Regs:$r, Int64Regs:$g)>;
6361 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6362 Int64Regs:$s, Int32Regs:$x,
6363 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6364 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6365 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6367 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6368 Int64Regs:$s, Int32Regs:$x,
6369 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6370 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6371 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6373 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6374 Int64Regs:$s, Int32Regs:$x,
6375 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6376 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6377 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6381 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6382 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6383 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6386 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6387 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6388 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6391 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6392 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6393 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6396 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6397 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6398 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6401 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6402 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6403 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6404 Int16Regs:$r, Int16Regs:$g)>;
6406 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6407 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6408 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6409 Int16Regs:$r, Int16Regs:$g)>;
6411 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6412 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6413 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6414 Int32Regs:$r, Int32Regs:$g)>;
6416 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6417 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6418 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6419 Int64Regs:$r, Int64Regs:$g)>;
6421 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6422 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6423 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6424 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6425 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6427 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6428 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6429 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6430 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6431 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6433 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6434 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6435 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6436 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6441 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6442 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6443 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6446 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6447 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6448 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6451 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6452 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6453 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6456 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6457 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6458 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6461 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6462 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6463 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6464 Int16Regs:$r, Int16Regs:$g)>;
6466 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6467 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6468 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6469 Int16Regs:$r, Int16Regs:$g)>;
6471 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6472 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6473 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6474 Int32Regs:$r, Int32Regs:$g)>;
6476 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6477 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6478 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6479 Int64Regs:$r, Int64Regs:$g)>;
6481 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6482 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6483 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6484 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6485 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6487 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6488 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6489 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6490 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6491 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6493 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6494 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6495 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6496 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6501 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6502 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6503 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6504 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6507 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6508 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6509 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6510 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6513 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6514 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6515 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6516 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6519 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6520 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6521 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6522 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6525 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6526 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6527 Int16Regs:$r, Int16Regs:$g),
6528 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6529 Int32Regs:$x, Int32Regs:$y,
6530 Int16Regs:$r, Int16Regs:$g)>;
6532 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6533 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6534 Int16Regs:$r, Int16Regs:$g),
6535 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6536 Int32Regs:$x, Int32Regs:$y,
6537 Int16Regs:$r, Int16Regs:$g)>;
6539 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6540 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6542 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6543 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6545 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6546 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6548 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6549 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6551 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6552 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6553 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6554 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6555 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6556 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6558 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6559 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6560 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6561 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6562 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6563 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6565 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6566 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6567 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6568 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6569 Int32Regs:$x, Int32Regs:$y,
6570 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6574 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6575 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6577 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6578 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6581 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6582 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6584 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6585 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6588 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6589 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6591 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6592 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6595 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6596 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6598 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6599 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6602 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6603 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6604 Int16Regs:$r, Int16Regs:$g),
6605 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6606 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6607 Int16Regs:$r, Int16Regs:$g)>;
6609 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6610 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6611 Int16Regs:$r, Int16Regs:$g),
6612 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6613 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6614 Int16Regs:$r, Int16Regs:$g)>;
6616 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6617 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6618 Int32Regs:$r, Int32Regs:$g),
6619 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6620 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6621 Int32Regs:$r, Int32Regs:$g)>;
6623 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6624 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6625 Int64Regs:$r, Int64Regs:$g),
6626 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6627 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6628 Int64Regs:$r, Int64Regs:$g)>;
6630 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6631 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6633 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6634 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6635 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6637 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6638 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6640 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6641 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6642 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6644 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6645 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6647 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6648 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6653 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6654 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6655 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6657 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6658 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6659 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6661 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6662 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6663 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6665 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6666 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6667 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6669 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6670 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6671 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6672 Int16Regs:$r, Int16Regs:$g)>;
6674 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6675 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6676 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6677 Int16Regs:$r, Int16Regs:$g)>;
6679 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6680 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6681 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6682 Int32Regs:$r, Int32Regs:$g)>;
6684 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6685 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6686 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6687 Int64Regs:$r, Int64Regs:$g)>;
6689 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6690 Int64Regs:$s, Int32Regs:$x,
6691 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6692 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6693 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6695 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6696 Int64Regs:$s, Int32Regs:$x,
6697 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6698 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6699 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6701 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6702 Int64Regs:$s, Int32Regs:$x,
6703 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6704 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6705 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6709 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6710 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6711 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6714 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6715 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6716 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6719 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6720 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6721 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6724 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6725 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6726 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6729 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6730 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6731 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6732 Int16Regs:$r, Int16Regs:$g)>;
6734 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6735 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6736 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6737 Int16Regs:$r, Int16Regs:$g)>;
6739 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6740 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6741 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6742 Int32Regs:$r, Int32Regs:$g)>;
6744 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6745 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6746 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6747 Int64Regs:$r, Int64Regs:$g)>;
6749 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6750 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6751 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6752 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6753 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6755 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6756 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6757 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6758 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6759 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6761 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6762 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6763 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6764 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6765 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6769 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6770 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6771 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6774 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6775 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6776 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6779 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6780 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6781 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6784 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6785 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6786 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6789 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6790 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6791 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6792 Int16Regs:$r, Int16Regs:$g)>;
6794 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6795 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6796 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6797 Int16Regs:$r, Int16Regs:$g)>;
6799 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6800 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6801 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6802 Int32Regs:$r, Int32Regs:$g)>;
6804 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6805 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6806 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6807 Int64Regs:$r, Int64Regs:$g)>;
6809 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6810 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6811 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6812 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6813 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6815 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6816 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6817 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6818 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6819 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6821 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6822 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6823 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6824 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6829 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6830 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6831 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6832 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6835 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6836 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6837 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6838 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6841 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6842 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6843 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6844 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6847 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6848 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6849 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6850 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6853 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6854 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6855 Int16Regs:$r, Int16Regs:$g),
6856 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6857 Int32Regs:$x, Int32Regs:$y,
6858 Int16Regs:$r, Int16Regs:$g)>;
6860 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6861 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6862 Int16Regs:$r, Int16Regs:$g),
6863 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6864 Int32Regs:$x, Int32Regs:$y,
6865 Int16Regs:$r, Int16Regs:$g)>;
6867 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6868 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6870 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6871 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6873 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6874 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6876 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6877 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6879 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6880 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6881 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6882 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6883 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6884 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6886 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6887 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6888 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6889 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6890 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6891 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6893 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6894 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6895 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6896 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6897 Int32Regs:$x, Int32Regs:$y,
6898 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6902 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6903 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6905 (SUST_B_3D_B8_ZERO Int64Regs:$s,
6906 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6909 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6910 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6912 (SUST_B_3D_B16_ZERO Int64Regs:$s,
6913 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6916 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6917 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6919 (SUST_B_3D_B32_ZERO Int64Regs:$s,
6920 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6923 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6924 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6926 (SUST_B_3D_B64_ZERO Int64Regs:$s,
6927 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6930 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6931 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6932 Int16Regs:$r, Int16Regs:$g),
6933 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6934 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6935 Int16Regs:$r, Int16Regs:$g)>;
6937 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6938 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6939 Int16Regs:$r, Int16Regs:$g),
6940 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6941 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6942 Int16Regs:$r, Int16Regs:$g)>;
6944 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6945 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6946 Int32Regs:$r, Int32Regs:$g),
6947 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6948 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6949 Int32Regs:$r, Int32Regs:$g)>;
6951 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6952 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6953 Int64Regs:$r, Int64Regs:$g),
6954 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6955 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6956 Int64Regs:$r, Int64Regs:$g)>;
6958 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6959 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6960 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6961 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6962 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6965 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6966 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6967 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6968 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6969 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6970 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6972 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6973 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6974 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6975 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6976 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6977 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6982 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6983 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6984 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6986 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6987 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6988 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6990 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6991 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6992 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6994 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6995 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6996 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6997 Int16Regs:$r, Int16Regs:$g)>;
6999 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7000 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7001 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7002 Int16Regs:$r, Int16Regs:$g)>;
7004 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7005 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7006 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7007 Int32Regs:$r, Int32Regs:$g)>;
7009 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7010 Int64Regs:$s, Int32Regs:$x,
7011 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7012 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7013 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7015 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7016 Int64Regs:$s, Int32Regs:$x,
7017 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7018 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7019 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7021 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7022 Int64Regs:$s, Int32Regs:$x,
7023 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7024 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7025 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7029 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7030 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7031 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7034 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7035 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7036 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7039 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7040 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7041 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7044 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7045 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7046 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7047 Int16Regs:$r, Int16Regs:$g)>;
7049 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7050 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7051 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7052 Int16Regs:$r, Int16Regs:$g)>;
7054 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7055 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7056 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7057 Int32Regs:$r, Int32Regs:$g)>;
7059 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7060 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7061 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7062 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7063 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7065 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7066 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7067 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7068 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7069 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7071 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7072 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7073 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7074 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7075 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7079 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7080 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7081 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7084 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7085 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7086 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7089 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7090 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7091 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7094 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7095 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7096 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7097 Int16Regs:$r, Int16Regs:$g)>;
7099 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7100 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7101 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7102 Int16Regs:$r, Int16Regs:$g)>;
7104 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7105 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7106 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7107 Int32Regs:$r, Int32Regs:$g)>;
7109 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7110 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7111 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7112 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7113 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7115 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7116 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7117 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7118 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7119 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7121 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7122 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7123 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7124 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7125 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7129 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7130 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7131 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7132 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7135 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7136 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7137 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7138 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7141 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7143 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7144 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7147 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7148 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7149 Int16Regs:$r, Int16Regs:$g),
7150 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7151 Int32Regs:$x, Int32Regs:$y,
7152 Int16Regs:$r, Int16Regs:$g)>;
7154 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7155 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7156 Int16Regs:$r, Int16Regs:$g),
7157 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7158 Int32Regs:$x, Int32Regs:$y,
7159 Int16Regs:$r, Int16Regs:$g)>;
7161 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7162 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7164 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7165 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7167 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7168 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7169 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7170 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7171 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7172 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7174 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7175 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7176 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7177 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7178 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7179 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7181 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7182 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7183 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7184 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7185 Int32Regs:$x, Int32Regs:$y,
7186 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7190 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7191 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7193 (SUST_P_3D_B8_TRAP Int64Regs:$s,
7194 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7197 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7198 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7200 (SUST_P_3D_B16_TRAP Int64Regs:$s,
7201 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7204 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7205 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7207 (SUST_P_3D_B32_TRAP Int64Regs:$s,
7208 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7211 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7212 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7213 Int16Regs:$r, Int16Regs:$g),
7214 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7215 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7216 Int16Regs:$r, Int16Regs:$g)>;
7218 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7219 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7220 Int16Regs:$r, Int16Regs:$g),
7221 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7222 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7223 Int16Regs:$r, Int16Regs:$g)>;
7225 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7226 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7227 Int32Regs:$r, Int32Regs:$g),
7228 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7229 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7230 Int32Regs:$r, Int32Regs:$g)>;
7232 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7233 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7234 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7235 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7236 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7237 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7239 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7240 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7241 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7242 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7243 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7244 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7246 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7247 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7248 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7249 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7250 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7251 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7253 //-----------------------------------
7254 // Read Special Registers
7255 //-----------------------------------
7257 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7258 : NVPTXInst<(outs Int64Regs:$d), (ins),
7259 !strconcat("mov.u64 \t$d, %", regname, ";"),
7260 [(set Int64Regs:$d, (intop))]>;
7262 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7263 : NVPTXInst<(outs Int32Regs:$d), (ins),
7264 !strconcat("mov.u32 \t$d, %", regname, ";"),
7265 [(set Int32Regs:$d, (intop))]>;
7267 // TODO Add read vector-version of special registers
7269 def INT_PTX_SREG_TID_X :
7270 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7271 def INT_PTX_SREG_TID_Y :
7272 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7273 def INT_PTX_SREG_TID_Z :
7274 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7275 def INT_PTX_SREG_TID_W :
7276 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7278 def INT_PTX_SREG_NTID_X :
7279 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7280 def INT_PTX_SREG_NTID_Y :
7281 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7282 def INT_PTX_SREG_NTID_Z :
7283 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7284 def INT_PTX_SREG_NTID_W :
7285 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7287 def INT_PTX_SREG_LANEID :
7288 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7289 def INT_PTX_SREG_WARPID :
7290 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7291 def INT_PTX_SREG_NWARPID :
7292 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7294 def INT_PTX_SREG_CTAID_X :
7295 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7296 def INT_PTX_SREG_CTAID_Y :
7297 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7298 def INT_PTX_SREG_CTAID_Z :
7299 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7300 def INT_PTX_SREG_CTAID_W :
7301 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7303 def INT_PTX_SREG_NCTAID_X :
7304 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7305 def INT_PTX_SREG_NCTAID_Y :
7306 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7307 def INT_PTX_SREG_NCTAID_Z :
7308 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7309 def INT_PTX_SREG_NCTAID_W :
7310 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7312 def INT_PTX_SREG_SMID :
7313 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7314 def INT_PTX_SREG_NSMID :
7315 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7316 def INT_PTX_SREG_GRIDID :
7317 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7319 def INT_PTX_SREG_LANEMASK_EQ :
7320 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7321 def INT_PTX_SREG_LANEMASK_LE :
7322 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7323 def INT_PTX_SREG_LANEMASK_LT :
7324 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7325 def INT_PTX_SREG_LANEMASK_GE :
7326 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7327 def INT_PTX_SREG_LANEMASK_GT :
7328 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7330 def INT_PTX_SREG_CLOCK :
7331 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7332 def INT_PTX_SREG_CLOCK64 :
7333 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7335 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7336 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7337 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7338 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7340 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7341 // handle the constant.
7342 def INT_PTX_SREG_WARPSIZE :
7343 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7344 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7346 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7347 // In addition to target-independent fields provided by WMMA_REGS, it adds
7348 // the fields commonly used to implement specific PTX instruction -- register
7349 // types and names, constraints, parts of assembly, etc.
7350 class WMMA_REGINFO<WMMA_REGS r>
7351 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7352 // NVPTX register types used to carry fragment data.
7353 NVPTXRegClass regclass = !cond(
7354 !eq(ptx_elt_type, "f16") : Float16x2Regs,
7355 !eq(ptx_elt_type, "f32") : Float32Regs,
7356 !eq(ptx_elt_type, "s32") : Int32Regs,
7357 !eq(ptx_elt_type, "s8") : Int32Regs,
7358 !eq(ptx_elt_type, "u8") : Int32Regs,
7359 !eq(ptx_elt_type, "s4") : Int32Regs,
7360 !eq(ptx_elt_type, "u4") : Int32Regs,
7361 !eq(ptx_elt_type, "b1") : Int32Regs);
7363 // Instruction input/output arguments for the fragment.
7364 list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
7366 // List of register names for the fragment -- ["ra0", "ra1",...]
7367 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7369 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7370 string regstring = "{{$" # !head(reg_names)
7371 # !foldl("", !tail(reg_names), a, b,
7372 !strconcat(a, ", $", b))
7375 // Predicates for particular fragment variant. Technically those are
7376 // per-instruction predicates, but currently all fragments that can be used in
7377 // a given instruction are subject to the same constraints, so an instruction
7378 // can use predicates from any of its fragments. If/when this is no
7379 // longer the case, we can concat all per-fragment predicates to enforce that
7380 // all fragments of the instruction are viable.
7381 list<Predicate> Predicates = !cond(
7382 // fp16 -> fp16/fp32 @ m16n16k16
7383 !and(!eq(geom, "m16n16k16"),
7384 !or(!eq(ptx_elt_type, "f16"),
7385 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7387 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7388 !and(!or(!eq(geom, "m8n32k16"),
7389 !eq(geom, "m32n8k16")),
7390 !or(!eq(ptx_elt_type, "f16"),
7391 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7393 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7394 !and(!or(!eq(geom,"m16n16k16"),
7395 !eq(geom,"m8n32k16"),
7396 !eq(geom,"m32n8k16")),
7397 !or(!eq(ptx_elt_type, "u8"),
7398 !eq(ptx_elt_type, "s8"),
7399 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7401 // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
7402 !or(!eq(geom,"m8n8k128"),
7403 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]);
7405 // template DAGs for instruction inputs/output.
7406 dag Outs = !dag(outs, ptx_regs, reg_names);
7407 dag Ins = !dag(ins, ptx_regs, reg_names);
7410 // Convert dag of arguments into a dag to match given intrinsic.
7411 class BuildPatternI<Intrinsic Intr, dag Ins> {
7412 // Build a dag pattern that matches the intrinsic call.
7413 dag ret = !foreach(tmp, Ins,
7414 !subst(imem, ADDRvar,
7415 !subst(MEMri64, ADDRri64,
7416 !subst(MEMri, ADDRri,
7417 !subst(ins, Intr, tmp)))));
7420 // Same as above, but uses PatFrag instead of an Intrinsic.
7421 class BuildPatternPF<PatFrag Intr, dag Ins> {
7422 // Build a dag pattern that matches the intrinsic call.
7423 dag ret = !foreach(tmp, Ins,
7424 !subst(imem, ADDRvar,
7425 !subst(MEMri64, ADDRri64,
7426 !subst(MEMri, ADDRri,
7427 !subst(ins, Intr, tmp)))));
7430 // Common WMMA-related fields used for building patterns for all MMA instructions.
7431 class WMMA_INSTR<string _Intr, list<dag> _Args>
7432 : NVPTXInst<(outs), (ins), "?", []> {
7433 Intrinsic Intr = !cast<Intrinsic>(_Intr);
7434 // Concatenate all arguments into a single dag.
7435 dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7436 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7437 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7441 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7444 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7446 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7447 [!con((ins SrcOp:$src),
7448 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7449 Requires<Frag.Predicates> {
7450 // Load/store intrinsics are overloaded on pointer's address space.
7451 // To match the right intrinsic, we need to build AS-constrained PatFrag.
7452 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7453 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7454 // Build PatFrag that only matches particular address space.
7455 PatFrag IntrFrag = PatFrag<PFOperands,
7456 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7457 !cond(!eq(Space, ".shared"): AS_match.shared,
7458 !eq(Space, ".global"): AS_match.global,
7459 1: AS_match.generic)>;
7460 // Build AS-constrained pattern.
7461 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7463 let OutOperandList = Frag.Outs;
7464 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7465 let AsmString = "wmma.load."
7472 # "." # Frag.ptx_elt_type # " \t"
7475 # !if(WithStride, ", $ldm", "")
7480 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7482 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7483 bit WithStride, DAGOperand DstOp>
7484 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7485 [!con((ins DstOp:$dst),
7487 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7488 Requires<Frag.Predicates> {
7490 // Load/store intrinsics are overloaded on pointer's address space.
7491 // To match the right intrinsic, we need to build AS-constrained PatFrag.
7492 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7493 dag PFOperands = !con((ops node:$dst),
7494 !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
7495 !if(WithStride, (ops node:$ldm), (ops)));
7496 // Build PatFrag that only matches particular address space.
7497 PatFrag IntrFrag = PatFrag<PFOperands,
7498 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7499 !cond(!eq(Space, ".shared"): AS_match.shared,
7500 !eq(Space, ".global"): AS_match.global,
7501 1: AS_match.generic)>;
7502 // Build AS-constrained pattern.
7503 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7505 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7506 let OutOperandList = (outs);
7507 let AsmString = "wmma.store.d.sync"
7512 # "." # Frag.ptx_elt_type
7515 # !if(WithStride, ", $ldm", "")
7519 // Create all load/store variants
7520 defset list<WMMA_INSTR> MMA_LDSTs = {
7521 foreach layout = ["row", "col"] in {
7522 foreach stride = [0, 1] in {
7523 foreach space = [".global", ".shared", ""] in {
7524 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7525 foreach frag = NVVM_MMA_OPS.all_ld_ops in
7526 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7527 def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7528 foreach frag = NVVM_MMA_OPS.all_st_ops in
7529 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7530 def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7538 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7539 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7540 string ALayout, string BLayout, int Satfinite>
7541 : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
7542 [FragA.Ins, FragB.Ins, FragC.Ins]>,
7543 // Requires does not seem to have effect on Instruction w/o Patterns.
7544 // We set it here anyways and propagate to the Pat<> we construct below.
7545 Requires<FragA.Predicates> {
7546 let OutOperandList = FragD.Outs;
7547 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7548 string TypeList = !cond(
7549 !eq(FragD.ptx_elt_type, "s32") : ".s32"
7550 # "." # FragA.ptx_elt_type
7551 # "." # FragB.ptx_elt_type
7553 1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
7555 let AsmString = "wmma.mma"
7556 # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
7563 # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7564 # FragD.regstring # ",\n\t\t"
7565 # FragA.regstring # ",\n\t\t"
7566 # FragB.regstring # ",\n\t\t"
7567 # FragC.regstring # ";";
7570 defset list<WMMA_INSTR> MMAs = {
7571 foreach layout_a = ["row", "col"] in {
7572 foreach layout_b = ["row", "col"] in {
7573 foreach satf = [0, 1] in {
7574 foreach op = NVVM_MMA_OPS.all_mma_ops in {
7575 foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
7576 def : WMMA_MMA<WMMA_REGINFO<op[0]>,
7577 WMMA_REGINFO<op[1]>,
7578 WMMA_REGINFO<op[2]>,
7579 WMMA_REGINFO<op[3]>,
7580 layout_a, layout_b, satf>;
7589 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7590 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7591 // the instruction record.
7592 class WMMA_PAT<WMMA_INSTR wi>
7593 : Pat<wi.IntrinsicPattern,
7594 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7596 Requires<wi.Predicates>;
7598 // Build intrinsic->instruction patterns for all MMA instructions.
7599 foreach mma = !listconcat(MMAs, MMA_LDSTs) in
7600 def : WMMA_PAT<mma>;