1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10 float f = (float)N->getValueAPF().convertToFloat();
14 def immFloat1 : PatLeaf<(fpimm), [{
15 float f = (float)N->getValueAPF().convertToFloat();
19 def immDouble0 : PatLeaf<(fpimm), [{
20 double d = (double)N->getValueAPF().convertToDouble();
24 def immDouble1 : PatLeaf<(fpimm), [{
25 double d = (double)N->getValueAPF().convertToDouble();
31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
41 // A node that will be replaced with the current PTX version.
43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
46 // (i32 0) will be XForm'ed to the currently used PTX version.
47 dag version = (PTXVerXform (i32 0));
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55 [prefix # !sub(n, 1)]),
59 class THREADMASK_INFO<bit sync> {
60 list<bit> ret = !if(sync, [0, 1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = true in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
69 [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
72 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74 "bar.sync \t$src1, $src2;",
75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
78 ".reg .pred \t%p1; \n\t",
79 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
85 ".reg .pred \t%p1; \n\t",
86 ".reg .pred \t%p2; \n\t",
87 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
94 ".reg .pred \t%p1; \n\t",
95 ".reg .pred \t%p2; \n\t",
96 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103 [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106 [(int_nvvm_bar_warp_sync imm:$i)]>,
107 Requires<[hasPTX<60>, hasSM<30>]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110 Requires<[hasPTX<60>, hasSM<30>]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113 [(int_nvvm_barrier_sync imm:$i)]>,
114 Requires<[hasPTX<60>, hasSM<30>]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116 [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117 Requires<[hasPTX<60>, hasSM<30>]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120 "barrier.sync \t$id, $cnt;",
121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122 Requires<[hasPTX<60>, hasSM<30>]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124 "barrier.sync \t$id, $cnt;",
125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126 Requires<[hasPTX<60>, hasSM<30>]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128 "barrier.sync \t$id, $cnt;",
129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130 Requires<[hasPTX<60>, hasSM<30>]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132 "barrier.sync \t$id, $cnt;",
133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134 Requires<[hasPTX<60>, hasSM<30>]>;
136 class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
137 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
138 NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
141 def barrier_cluster_arrive:
142 INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
143 def barrier_cluster_arrive_relaxed:
144 INT_BARRIER_CLUSTER<"arrive.relaxed",
145 int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
146 def barrier_cluster_wait:
147 INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
149 // 'aligned' versions of the cluster barrier intrinsics
150 def barrier_cluster_arrive_aligned:
151 INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
152 def barrier_cluster_arrive_relaxed_aligned:
153 INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
154 int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
155 def barrier_cluster_wait_aligned:
156 INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
158 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
159 bit offset_imm, bit mask_imm, bit threadmask_imm>
160 : NVPTXInst<(outs), (ins), "?", []> {
161 NVPTXRegClass rc = !cond(
162 !eq(reg, "i32"): Int32Regs,
163 !eq(reg, "f32"): Float32Regs);
164 string IntrName = "int_nvvm_shfl_"
165 # !if(sync, "sync_", "")
168 # !if(return_pred, "p", "");
169 Intrinsic Intr = !cast<Intrinsic>(IntrName);
170 let InOperandList = !con(
172 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
175 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
176 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
178 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
179 let AsmString = "shfl."
180 # !if(sync, "sync.", "")
183 # !if(return_pred, "|$pred", "") # ", "
184 # "$src, $offset, $mask"
185 # !if(sync, ", $threadmask", "")
189 !foreach(tmp, OutOperandList,
191 !subst(i32imm, imm, tmp))),
192 (set !foreach(tmp, InOperandList,
194 !subst(i32imm, imm, tmp))))
198 foreach sync = [false, true] in {
199 foreach mode = ["up", "down", "bfly", "idx"] in {
200 foreach regclass = ["i32", "f32"] in {
201 foreach return_pred = [false, true] in {
202 foreach offset_imm = [false, true] in {
203 foreach mask_imm = [false, true] in {
204 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
205 def : SHFL_INSTR<sync, mode, regclass, return_pred,
206 offset_imm, mask_imm, threadmask_imm>,
207 Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
216 // vote.{all,any,uni,ballot}
217 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
218 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
219 "vote." # mode # " \t$dest, $pred;",
220 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
221 Requires<[hasPTX<60>, hasSM<30>]>;
224 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
225 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
226 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
227 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
229 // vote.sync.{all,any,uni,ballot}
230 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
231 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
232 "vote.sync." # mode # " \t$dest, $pred, $mask;",
233 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
234 Requires<[hasPTX<60>, hasSM<30>]>;
235 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
236 "vote.sync." # mode #" \t$dest, $pred, $mask;",
237 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
238 Requires<[hasPTX<60>, hasSM<30>]>;
241 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
242 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
243 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
244 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
246 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
248 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
249 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
250 [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
251 Requires<[hasPTX<60>, hasSM<70>]>;
252 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
253 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
254 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
255 Requires<[hasPTX<60>, hasSM<70>]>;
256 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
257 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
258 [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
259 Requires<[hasPTX<60>, hasSM<70>]>;
260 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
261 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
262 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
263 Requires<[hasPTX<60>, hasSM<70>]>;
266 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
268 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
271 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
273 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
274 (ins i32imm:$mask, ImmOp:$value),
275 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
276 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
277 Requires<[hasPTX<60>, hasSM<70>]>;
278 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
279 (ins Int32Regs:$mask, ImmOp:$value),
280 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
281 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
282 Requires<[hasPTX<60>, hasSM<70>]>;
283 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
284 (ins i32imm:$mask, regclass:$value),
285 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
286 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
287 Requires<[hasPTX<60>, hasSM<70>]>;
288 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
289 (ins Int32Regs:$mask, regclass:$value),
290 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
291 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
292 Requires<[hasPTX<60>, hasSM<70>]>;
294 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
296 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
299 multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
300 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
301 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
302 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
303 Requires<[hasPTX<70>, hasSM<80>]>;
306 defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
307 defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
308 defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
309 defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
310 defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
311 defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
312 defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
313 defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
315 } // isConvergent = true
317 //-----------------------------------
318 // Explicit Memory Fence Functions
319 //-----------------------------------
320 class MEMBAR<string StrOp, Intrinsic IntOP> :
321 NVPTXInst<(outs), (ins),
324 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
325 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
326 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
328 def INT_FENCE_SC_CLUSTER:
329 MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
330 Requires<[hasPTX<78>, hasSM<90>]>;
332 //-----------------------------------
333 // Async Copy Functions
334 //-----------------------------------
336 multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
337 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
338 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
339 [(Intrin Int32Regs:$addr)]>,
340 Requires<[hasPTX<70>, hasSM<80>]>;
341 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
342 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
343 [(Intrin Int64Regs:$addr)]>,
344 Requires<[hasPTX<70>, hasSM<80>]>;
347 defm CP_ASYNC_MBARRIER_ARRIVE :
348 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
349 defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
350 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
351 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
352 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
353 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
354 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
356 multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
357 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
358 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
359 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
360 Requires<[hasPTX<70>, hasSM<80>]>;
361 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
362 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
363 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
364 Requires<[hasPTX<70>, hasSM<80>]>;
365 // Variant with src_size parameter
366 def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
367 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
368 [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
369 Requires<[hasPTX<70>, hasSM<80>]>;
370 def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
371 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
372 [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
373 Requires<[hasPTX<70>, hasSM<80>]>;
374 def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
375 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
376 [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
377 Requires<[hasPTX<70>, hasSM<80>]>;
378 def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
379 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
380 [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
381 Requires<[hasPTX<70>, hasSM<80>]>;
384 defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
385 CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
386 int_nvvm_cp_async_ca_shared_global_4_s>;
388 defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
389 CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
390 int_nvvm_cp_async_ca_shared_global_8_s>;
392 defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
393 CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
394 int_nvvm_cp_async_ca_shared_global_16_s>;
396 defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
397 CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
398 int_nvvm_cp_async_cg_shared_global_16_s>;
400 def CP_ASYNC_COMMIT_GROUP :
401 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
402 Requires<[hasPTX<70>, hasSM<80>]>;
404 def CP_ASYNC_WAIT_GROUP :
405 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
406 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
407 Requires<[hasPTX<70>, hasSM<80>]>;
409 def CP_ASYNC_WAIT_ALL :
410 NVPTXInst<(outs), (ins), "cp.async.wait_all;",
411 [(int_nvvm_cp_async_wait_all)]>,
412 Requires<[hasPTX<70>, hasSM<80>]>;
414 // cp.async.bulk variants of the commit/wait group
415 def CP_ASYNC_BULK_COMMIT_GROUP :
416 NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
417 [(int_nvvm_cp_async_bulk_commit_group)]>,
418 Requires<[hasPTX<80>, hasSM<90>]>;
420 def CP_ASYNC_BULK_WAIT_GROUP :
421 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
422 [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
423 Requires<[hasPTX<80>, hasSM<90>]>;
425 def CP_ASYNC_BULK_WAIT_GROUP_READ :
426 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
427 [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
428 Requires<[hasPTX<80>, hasSM<90>]>;
430 //-----------------------------------
431 // MBarrier Functions
432 //-----------------------------------
434 multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
435 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
436 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
437 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
438 Requires<[hasPTX<70>, hasSM<80>]>;
439 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
440 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
441 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
442 Requires<[hasPTX<70>, hasSM<80>]>;
445 defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
446 defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
447 int_nvvm_mbarrier_init_shared>;
449 multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
450 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
451 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
452 [(Intrin Int32Regs:$addr)]>,
453 Requires<[hasPTX<70>, hasSM<80>]>;
454 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
455 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
456 [(Intrin Int64Regs:$addr)]>,
457 Requires<[hasPTX<70>, hasSM<80>]>;
460 defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
461 defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
462 int_nvvm_mbarrier_inval_shared>;
464 multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
465 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
466 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
467 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
468 Requires<[hasPTX<70>, hasSM<80>]>;
469 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
470 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
471 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
472 Requires<[hasPTX<70>, hasSM<80>]>;
475 defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
476 defm MBARRIER_ARRIVE_SHARED :
477 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
479 multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
480 def _32 : NVPTXInst<(outs Int64Regs:$state),
481 (ins Int32Regs:$addr, Int32Regs:$count),
482 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
483 ".b64 $state, [$addr], $count;"),
484 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
485 Requires<[hasPTX<70>, hasSM<80>]>;
486 def _64 : NVPTXInst<(outs Int64Regs:$state),
487 (ins Int64Regs:$addr, Int32Regs:$count),
488 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
489 ".b64 $state, [$addr], $count;"),
490 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
491 Requires<[hasPTX<70>, hasSM<80>]>;
494 defm MBARRIER_ARRIVE_NOCOMPLETE :
495 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
496 defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
497 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
499 multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
500 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
501 !strconcat("mbarrier.arrive_drop", AddrSpace,
502 ".b64 $state, [$addr];"),
503 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
504 Requires<[hasPTX<70>, hasSM<80>]>;
505 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
506 !strconcat("mbarrier.arrive_drop", AddrSpace,
507 ".b64 $state, [$addr];"),
508 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
509 Requires<[hasPTX<70>, hasSM<80>]>;
512 defm MBARRIER_ARRIVE_DROP :
513 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
514 defm MBARRIER_ARRIVE_DROP_SHARED :
515 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
517 multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
518 def _32 : NVPTXInst<(outs Int64Regs:$state),
519 (ins Int32Regs:$addr, Int32Regs:$count),
520 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
521 ".b64 $state, [$addr], $count;"),
522 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
523 Requires<[hasPTX<70>, hasSM<80>]>;
524 def _64 : NVPTXInst<(outs Int64Regs:$state),
525 (ins Int64Regs:$addr, Int32Regs:$count),
526 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
527 ".b64 $state, [$addr], $count;"),
528 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
529 Requires<[hasPTX<70>, hasSM<80>]>;
532 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
533 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
534 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
535 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
536 int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
538 multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
539 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
540 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
541 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
542 Requires<[hasPTX<70>, hasSM<80>]>;
543 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
544 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
545 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
546 Requires<[hasPTX<70>, hasSM<80>]>;
549 defm MBARRIER_TEST_WAIT :
550 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
551 defm MBARRIER_TEST_WAIT_SHARED :
552 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
554 class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
555 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
556 "mbarrier.pending_count.b64 $res, $state;",
557 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
558 Requires<[hasPTX<70>, hasSM<80>]>;
560 def MBARRIER_PENDING_COUNT :
561 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
563 //-----------------------------------
565 //-----------------------------------
567 // Map min(1.0, max(0.0, x)) to sat(x)
568 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
570 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
571 // Same story for fmax, fmin.
573 def : Pat<(int_nvvm_fmin_f immFloat1,
574 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
575 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
576 def : Pat<(int_nvvm_fmin_f immFloat1,
577 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
578 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
579 def : Pat<(int_nvvm_fmin_f
580 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
581 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
582 def : Pat<(int_nvvm_fmin_f
583 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
584 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
586 def : Pat<(int_nvvm_fmin_d immDouble1,
587 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
588 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
589 def : Pat<(int_nvvm_fmin_d immDouble1,
590 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
591 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
592 def : Pat<(int_nvvm_fmin_d
593 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
594 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
595 def : Pat<(int_nvvm_fmin_d
596 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
597 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
600 // We need a full string for OpcStr here because we need to deal with case like
602 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
603 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
604 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
606 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
609 // We need a full string for OpcStr here because we need to deal with the case
610 // like INT_PTX_NATIVE_POWR_F.
611 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
612 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
613 list<Predicate> Preds = []>
614 : NVPTXInst<(outs t_regclass:$dst),
615 (ins s0_regclass:$src0, s1_regclass:$src1),
617 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
620 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
621 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
622 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
623 : NVPTXInst<(outs t_regclass:$dst),
624 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
626 [(set t_regclass:$dst,
627 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
634 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
635 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
641 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
642 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
643 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
644 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
645 def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
646 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
647 [hasPTX<70>, hasSM<80>]>;
648 def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
649 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
650 [hasPTX<70>, hasSM<80>]>;
651 def INT_NVVM_FMIN_XORSIGN_ABS_F :
652 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
653 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
654 [hasPTX<72>, hasSM<86>]>;
655 def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
656 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
657 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
658 [hasPTX<72>, hasSM<86>]>;
659 def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
660 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
661 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
662 [hasPTX<72>, hasSM<86>]>;
663 def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
664 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
665 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
666 [hasPTX<72>, hasSM<86>]>;
668 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
669 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
670 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
671 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
672 def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
673 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
674 [hasPTX<70>, hasSM<80>]>;
675 def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
676 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
677 [hasPTX<70>, hasSM<80>]>;
678 def INT_NVVM_FMAX_XORSIGN_ABS_F :
679 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
680 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
681 [hasPTX<72>, hasSM<86>]>;
682 def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
683 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
684 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
685 [hasPTX<72>, hasSM<86>]>;
686 def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
687 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
688 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
689 [hasPTX<72>, hasSM<86>]>;
690 def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
691 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
692 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
693 [hasPTX<72>, hasSM<86>]>;
695 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
696 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
697 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
698 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
701 // Min Max f16, f16x2, bf16, bf16x2
704 class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
705 list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
708 NVPTXRegClass RegClass = RC;
709 list<Predicate> Predicates = Preds;
712 multiclass MIN_MAX<string IntName> {
714 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
715 int_nvvm_fmax_f16), Int16Regs>,
716 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
717 int_nvvm_fmax_ftz_f16), Int16Regs>,
718 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
719 int_nvvm_fmax_nan_f16), Int16Regs>,
720 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
721 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
722 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
723 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
724 Int16Regs, [hasPTX<72>, hasSM<86>]>,
725 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
726 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
727 Int16Regs, [hasPTX<72>, hasSM<86>]>,
728 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
729 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
730 Int16Regs, [hasPTX<72>, hasSM<86>]>,
731 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
732 int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
733 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
734 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
735 int_nvvm_fmax_f16x2), Int32Regs>,
736 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
737 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
738 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
739 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
740 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
741 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
742 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
743 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
744 Int32Regs, [hasPTX<72>, hasSM<86>]>,
745 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
746 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
747 Int32Regs, [hasPTX<72>, hasSM<86>]>,
748 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
749 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
750 Int32Regs, [hasPTX<72>, hasSM<86>]>,
751 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
752 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
753 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
754 Int32Regs, [hasPTX<72>, hasSM<86>]>,
755 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
756 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
757 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
758 int_nvvm_fmax_nan_bf16), Int16Regs>,
759 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
760 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
761 Int16Regs, [hasPTX<72>, hasSM<86>]>,
762 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
763 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
764 Int16Regs, [hasPTX<72>, hasSM<86>]>,
765 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
766 int_nvvm_fmax_bf16x2), Int32Regs>,
767 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
768 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
769 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
770 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
771 Int32Regs, [hasPTX<72>, hasSM<86>]>,
772 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
773 int_nvvm_fmin_nan_xorsign_abs_bf16x2,
774 int_nvvm_fmax_nan_xorsign_abs_bf16x2),
775 Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
776 def P.Variant : F_MATH_2<!strconcat(
777 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
778 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
782 defm INT_NVVM_FMIN : MIN_MAX<"min">;
783 defm INT_NVVM_FMAN : MIN_MAX<"max">;
789 def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs,
790 Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
791 def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs,
792 Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
793 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
794 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
795 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
796 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
797 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
798 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
799 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
800 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
802 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
803 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
804 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
805 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
806 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
807 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
808 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
809 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
810 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
811 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
812 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
813 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
814 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
815 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
816 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
817 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
819 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
820 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
821 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
822 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
823 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
824 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
825 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
826 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
828 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
829 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
830 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
831 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
837 def INT_NVVM_DIV_APPROX_FTZ_F
838 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
839 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
840 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
841 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
843 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
844 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
845 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
846 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
847 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
848 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
849 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
850 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
851 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
852 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
853 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
854 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
855 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
856 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
857 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
858 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
860 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
861 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
862 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
863 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
864 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
865 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
866 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
867 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
873 def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;",
874 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
875 def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;",
876 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
877 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
878 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
879 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
880 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
881 def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;",
882 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
883 def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
884 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
890 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
891 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
892 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
893 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
894 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
895 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
897 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
898 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
899 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
900 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
901 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
902 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
908 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
909 Float32Regs, int_nvvm_fabs_ftz_f>;
910 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
911 Float32Regs, int_nvvm_fabs_f>;
913 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
914 Float64Regs, int_nvvm_fabs_d>;
917 // Abs, Neg bf16, bf16x2
920 def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
921 Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
922 def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
923 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
924 def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
925 Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
926 def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
927 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
933 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
934 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
935 def : Pat<(int_nvvm_round_f Float32Regs:$a),
936 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
937 def : Pat<(int_nvvm_round_d Float64Regs:$a),
938 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
944 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
945 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
946 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
947 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
948 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
949 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
955 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
956 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
957 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
958 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
959 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
960 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
966 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
967 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
968 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
969 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
970 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
971 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
972 def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
973 Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
974 def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
975 Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
977 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
978 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
979 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
980 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
981 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
982 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
988 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
989 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
990 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
991 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
993 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
994 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
995 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
996 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
1002 class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
1003 list<Predicate> Preds = []> {
1006 NVPTXRegClass RegClass = RC;
1007 list<Predicate> Predicates = Preds;
1010 multiclass FMA_INST {
1012 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
1013 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
1014 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
1015 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
1017 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
1018 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
1019 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
1020 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
1021 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
1022 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
1023 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
1024 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
1026 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
1027 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
1028 [hasPTX<42>, hasSM<53>]>,
1029 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
1030 [hasPTX<42>, hasSM<53>]>,
1031 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
1032 [hasPTX<42>, hasSM<53>]>,
1033 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
1034 [hasPTX<70>, hasSM<80>]>,
1035 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
1036 [hasPTX<70>, hasSM<80>]>,
1038 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1039 FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1040 [hasPTX<70>, hasSM<80>]>,
1041 FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1042 [hasPTX<70>, hasSM<80>]>,
1043 FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1044 [hasPTX<70>, hasSM<80>]>,
1045 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1046 [hasPTX<70>, hasSM<80>]>,
1047 FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1048 [hasPTX<70>, hasSM<80>]>,
1050 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1051 [hasPTX<42>, hasSM<53>]>,
1052 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1053 [hasPTX<42>, hasSM<53>]>,
1054 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1055 [hasPTX<42>, hasSM<53>]>,
1056 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1057 Int32Regs, [hasPTX<42>, hasSM<53>]>,
1058 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1059 [hasPTX<70>, hasSM<80>]>,
1060 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1061 Int32Regs, [hasPTX<70>, hasSM<80>]>,
1062 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1063 [hasPTX<70>, hasSM<80>]>,
1064 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1065 [hasPTX<70>, hasSM<80>]>
1068 F_MATH_3<!strconcat("fma",
1069 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1070 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1074 defm INT_NVVM_FMA : FMA_INST;
1080 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1081 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1082 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1083 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1084 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1085 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1086 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1087 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1088 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1089 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1090 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1091 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1092 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1093 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1094 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1095 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1097 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1098 Float64Regs, int_nvvm_rcp_rn_d>;
1099 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1100 Float64Regs, int_nvvm_rcp_rz_d>;
1101 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1102 Float64Regs, int_nvvm_rcp_rm_d>;
1103 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1104 Float64Regs, int_nvvm_rcp_rp_d>;
1106 def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1107 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1108 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1109 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1115 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1116 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1117 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1118 Float32Regs, int_nvvm_sqrt_rn_f>;
1119 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1120 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1121 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1122 Float32Regs, int_nvvm_sqrt_rz_f>;
1123 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1124 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1125 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1126 Float32Regs, int_nvvm_sqrt_rm_f>;
1127 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1128 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1129 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1130 Float32Regs, int_nvvm_sqrt_rp_f>;
1131 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1132 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1133 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1134 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1136 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1137 Float64Regs, int_nvvm_sqrt_rn_d>;
1138 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1139 Float64Regs, int_nvvm_sqrt_rz_d>;
1140 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1141 Float64Regs, int_nvvm_sqrt_rm_d>;
1142 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1143 Float64Regs, int_nvvm_sqrt_rp_d>;
1145 // nvvm_sqrt intrinsic
1146 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1147 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1148 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1149 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1150 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1151 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1152 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1153 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1159 def INT_NVVM_RSQRT_APPROX_FTZ_F
1160 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1161 int_nvvm_rsqrt_approx_ftz_f>;
1162 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1163 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1164 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1165 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1171 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1172 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1173 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1174 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1175 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1176 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1177 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1178 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1179 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1180 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1181 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1182 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1183 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1184 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1185 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1186 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1188 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1189 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1190 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1191 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1192 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1193 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1194 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1195 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1201 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1202 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1203 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1204 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1205 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1206 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1207 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1208 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1209 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1210 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1211 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1212 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1213 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1214 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1215 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1216 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1218 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1219 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1220 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1221 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1222 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1223 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1224 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1225 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1227 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1228 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1229 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1230 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1231 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1232 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1233 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1234 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1236 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1237 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1238 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1239 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1240 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1241 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1242 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1243 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1245 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1246 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1247 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1248 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1249 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1250 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1251 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1252 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1254 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1255 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1256 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1257 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1258 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1259 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1260 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1261 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1262 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1263 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1264 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1265 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1266 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1267 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1268 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1269 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1271 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1272 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1273 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1274 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1275 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1276 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1277 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1278 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1279 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1280 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1281 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1282 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1283 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1284 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1285 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1286 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1288 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1289 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1290 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1291 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1292 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1293 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1294 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1295 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1297 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1298 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1299 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1300 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1301 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1302 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1303 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1304 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1306 def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1307 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1308 def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1309 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1310 def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1311 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1312 def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1313 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1315 def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1316 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1317 def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1318 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1319 def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1320 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1321 def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1322 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1324 def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1325 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1326 def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1327 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1328 def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1329 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1330 def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1331 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1334 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1335 "cvt.rna.tf32.f32 \t$dest, $a;",
1336 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1338 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1339 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1341 def INT_NVVM_D2I_LO : F_MATH_1<
1342 !strconcat("{{\n\t",
1343 ".reg .b32 %temp; \n\t",
1344 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1346 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1347 def INT_NVVM_D2I_HI : F_MATH_1<
1348 !strconcat("{{\n\t",
1349 ".reg .b32 %temp; \n\t",
1350 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1352 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1354 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1355 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1356 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1357 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1358 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1359 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1360 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1361 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1362 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1363 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1364 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1365 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1366 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1367 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1368 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1369 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1371 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1372 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1373 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1374 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1375 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1376 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1377 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1378 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1379 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1380 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1381 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1382 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1383 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1384 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1385 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1386 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1388 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1389 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1390 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1391 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1392 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1393 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1394 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1395 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1397 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1398 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1399 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1400 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1401 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1402 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1403 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1404 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1406 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1407 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1408 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1409 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1410 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1411 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1412 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1413 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1415 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1416 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1417 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1418 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1419 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1420 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1421 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1422 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1424 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1425 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1426 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1427 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1428 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1429 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1430 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1431 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1433 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1434 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1435 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1436 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1437 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1438 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1439 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1440 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1443 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1444 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1445 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1446 (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1452 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1453 Float32Regs, int_nvvm_bitcast_f2i>;
1454 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1455 Int32Regs, int_nvvm_bitcast_i2f>;
1457 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1458 Int64Regs, int_nvvm_bitcast_ll2d>;
1459 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1460 Float64Regs, int_nvvm_bitcast_d2ll>;
1466 class INT_FNS_MBO<dag ins, dag Operands>
1467 : NVPTXInst<(outs Int32Regs:$dst), ins,
1468 "fns.b32 \t$dst, $mask, $base, $offset;",
1469 [(set Int32Regs:$dst, Operands )]>,
1470 Requires<[hasPTX<60>, hasSM<30>]>;
1472 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1473 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1474 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
1475 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
1476 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
1477 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
1478 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
1479 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
1480 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1481 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1482 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
1483 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
1484 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
1485 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
1486 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
1487 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
1489 //-----------------------------------
1491 //-----------------------------------
1493 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1494 : PatFrag<ops, frag, AS_match.global>;
1495 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1496 : PatFrag<ops, frag, AS_match.shared>;
1497 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1498 : PatFrag<ops, frag, AS_match.generic>;
1500 multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1501 ValueType regT, NVPTXRegClass regclass,
1502 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1503 Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1504 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1505 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1506 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1508 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1509 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1510 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1513 multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1514 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1515 list<Predicate> Pred = []> {
1516 defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1517 IntOp, IMMType, IMM, Pred>;
1518 defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1519 IntOp, IMMType, IMM, Pred>;
1522 // has 2 operands, neg the second one
1523 multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1524 ValueType regT, NVPTXRegClass regclass,
1525 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1526 list<Predicate> Pred> {
1527 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1530 ".reg \t.s", TypeStr, " temp; \n\t",
1531 "neg.s", TypeStr, " \ttemp, $b; \n\t",
1532 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1534 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1537 multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1538 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1539 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1541 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1546 multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1547 ValueType regT, NVPTXRegClass regclass,
1548 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1549 Operand IMMType, list<Predicate> Pred> {
1550 def reg : NVPTXInst<(outs regclass:$dst),
1551 (ins ptrclass:$addr, regclass:$b, regclass:$c),
1552 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1553 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1556 def imm1 : NVPTXInst<(outs regclass:$dst),
1557 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1558 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1559 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1562 def imm2 : NVPTXInst<(outs regclass:$dst),
1563 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1564 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1565 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1568 def imm3 : NVPTXInst<(outs regclass:$dst),
1569 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1570 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1571 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1574 multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1575 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1576 defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1577 IntOp, IMMType, Pred>;
1578 defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1579 IntOp, IMMType, Pred>;
1584 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1585 (atomic_load_add_32 node:$a, node:$b)>;
1586 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1587 (atomic_load_add_32 node:$a, node:$b)>;
1588 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1589 (atomic_load_add_32 node:$a, node:$b)>;
1590 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1591 (atomic_load_add_64 node:$a, node:$b)>;
1592 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1593 (atomic_load_add_64 node:$a, node:$b)>;
1594 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1595 (atomic_load_add_64 node:$a, node:$b)>;
1596 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1597 (atomic_load_fadd node:$a, node:$b)>;
1598 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1599 (atomic_load_fadd node:$a, node:$b)>;
1600 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1601 (atomic_load_fadd node:$a, node:$b)>;
1603 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1604 atomic_load_add_32_g, i32imm, imm>;
1605 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1606 atomic_load_add_32_s, i32imm, imm>;
1607 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1608 atomic_load_add_32_gen, i32imm, imm>;
1609 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1610 ".add", atomic_load_add_32_gen, i32imm, imm>;
1612 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1613 atomic_load_add_64_g, i64imm, imm>;
1614 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1615 atomic_load_add_64_s, i64imm, imm>;
1616 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1617 atomic_load_add_64_gen, i64imm, imm>;
1618 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1619 ".add", atomic_load_add_64_gen, i64imm, imm>;
1621 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1622 atomic_load_add_g, f32imm, fpimm>;
1623 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1624 atomic_load_add_s, f32imm, fpimm>;
1625 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1626 atomic_load_add_gen, f32imm, fpimm>;
1628 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1629 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1630 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1631 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1632 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1633 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1637 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1638 (atomic_load_sub_32 node:$a, node:$b)>;
1639 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1640 (atomic_load_sub_32 node:$a, node:$b)>;
1641 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1642 (atomic_load_sub_32 node:$a, node:$b)>;
1643 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1644 (atomic_load_sub_64 node:$a, node:$b)>;
1645 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1646 (atomic_load_sub_64 node:$a, node:$b)>;
1647 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1648 (atomic_load_sub_64 node:$a, node:$b)>;
1650 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1651 atomic_load_sub_32_g>;
1652 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1653 atomic_load_sub_64_g>;
1654 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1655 atomic_load_sub_32_gen>;
1656 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1657 ".add", atomic_load_sub_32_gen>;
1658 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1659 atomic_load_sub_32_s>;
1660 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1661 atomic_load_sub_64_s>;
1662 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1663 atomic_load_sub_64_gen>;
1664 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1665 ".add", atomic_load_sub_64_gen>;
1669 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1670 (atomic_swap_32 node:$a, node:$b)>;
1671 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1672 (atomic_swap_32 node:$a, node:$b)>;
1673 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1674 (atomic_swap_32 node:$a, node:$b)>;
1675 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1676 (atomic_swap_64 node:$a, node:$b)>;
1677 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1678 (atomic_swap_64 node:$a, node:$b)>;
1679 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1680 (atomic_swap_64 node:$a, node:$b)>;
1682 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1683 atomic_swap_32_g, i32imm, imm>;
1684 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1685 atomic_swap_32_s, i32imm, imm>;
1686 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1687 atomic_swap_32_gen, i32imm, imm>;
1688 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1689 ".exch", atomic_swap_32_gen, i32imm, imm>;
1690 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1691 atomic_swap_64_g, i64imm, imm>;
1692 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1693 atomic_swap_64_s, i64imm, imm>;
1694 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1695 atomic_swap_64_gen, i64imm, imm>;
1696 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1697 ".exch", atomic_swap_64_gen, i64imm, imm>;
1701 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1702 , (atomic_load_max_32 node:$a, node:$b)>;
1703 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1704 (atomic_load_max_32 node:$a, node:$b)>;
1705 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1706 (atomic_load_max_32 node:$a, node:$b)>;
1707 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1708 , (atomic_load_max_64 node:$a, node:$b)>;
1709 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1710 (atomic_load_max_64 node:$a, node:$b)>;
1711 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1712 (atomic_load_max_64 node:$a, node:$b)>;
1713 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1714 (atomic_load_umax_32 node:$a, node:$b)>;
1715 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1716 (atomic_load_umax_32 node:$a, node:$b)>;
1717 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1718 (atomic_load_umax_32 node:$a, node:$b)>;
1719 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1720 (atomic_load_umax_64 node:$a, node:$b)>;
1721 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1722 (atomic_load_umax_64 node:$a, node:$b)>;
1723 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1724 (atomic_load_umax_64 node:$a, node:$b)>;
1726 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1727 ".max", atomic_load_max_32_g, i32imm, imm>;
1728 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1729 ".max", atomic_load_max_32_s, i32imm, imm>;
1730 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1731 atomic_load_max_32_gen, i32imm, imm>;
1732 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1733 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1734 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1735 ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
1736 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1737 ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
1738 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1739 atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1740 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1741 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1742 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1743 ".max", atomic_load_umax_32_g, i32imm, imm>;
1744 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1745 ".max", atomic_load_umax_32_s, i32imm, imm>;
1746 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1747 atomic_load_umax_32_gen, i32imm, imm>;
1748 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1749 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1750 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1751 ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
1752 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1753 ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
1754 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1755 atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1756 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1757 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1761 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1762 (atomic_load_min_32 node:$a, node:$b)>;
1763 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1764 (atomic_load_min_32 node:$a, node:$b)>;
1765 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1766 (atomic_load_min_32 node:$a, node:$b)>;
1767 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1768 (atomic_load_min_64 node:$a, node:$b)>;
1769 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1770 (atomic_load_min_64 node:$a, node:$b)>;
1771 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1772 (atomic_load_min_64 node:$a, node:$b)>;
1773 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1774 (atomic_load_umin_32 node:$a, node:$b)>;
1775 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1776 (atomic_load_umin_32 node:$a, node:$b)>;
1777 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1778 (atomic_load_umin_32 node:$a, node:$b)>;
1779 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1780 (atomic_load_umin_64 node:$a, node:$b)>;
1781 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1782 (atomic_load_umin_64 node:$a, node:$b)>;
1783 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1784 (atomic_load_umin_64 node:$a, node:$b)>;
1786 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1787 ".min", atomic_load_min_32_g, i32imm, imm>;
1788 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1789 ".min", atomic_load_min_32_s, i32imm, imm>;
1790 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1791 atomic_load_min_32_gen, i32imm, imm>;
1792 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1793 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1794 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1795 ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
1796 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1797 ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
1798 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1799 atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1800 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1801 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1802 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1803 ".min", atomic_load_umin_32_g, i32imm, imm>;
1804 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1805 ".min", atomic_load_umin_32_s, i32imm, imm>;
1806 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1807 atomic_load_umin_32_gen, i32imm, imm>;
1808 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1809 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1810 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1811 ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
1812 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1813 ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
1814 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1815 atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1816 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1817 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1819 // atom_inc atom_dec
1821 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1822 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1823 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1824 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1825 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1826 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1827 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1828 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1829 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1830 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1831 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1832 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1834 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1835 atomic_load_inc_32_g, i32imm, imm>;
1836 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1837 atomic_load_inc_32_s, i32imm, imm>;
1838 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1839 atomic_load_inc_32_gen, i32imm, imm>;
1840 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1841 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1842 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1843 atomic_load_dec_32_g, i32imm, imm>;
1844 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1845 atomic_load_dec_32_s, i32imm, imm>;
1846 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1847 atomic_load_dec_32_gen, i32imm, imm>;
1848 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1849 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1853 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1854 (atomic_load_and_32 node:$a, node:$b)>;
1855 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1856 (atomic_load_and_32 node:$a, node:$b)>;
1857 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1858 (atomic_load_and_32 node:$a, node:$b)>;
1859 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1860 (atomic_load_and_64 node:$a, node:$b)>;
1861 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1862 (atomic_load_and_64 node:$a, node:$b)>;
1863 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1864 (atomic_load_and_64 node:$a, node:$b)>;
1866 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1867 atomic_load_and_32_g, i32imm, imm>;
1868 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1869 atomic_load_and_32_s, i32imm, imm>;
1870 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1871 atomic_load_and_32_gen, i32imm, imm>;
1872 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1873 ".and", atomic_load_and_32_gen, i32imm, imm>;
1874 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1875 atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
1876 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1877 atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
1878 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1879 atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1880 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1881 ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1885 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1886 (atomic_load_or_32 node:$a, node:$b)>;
1887 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1888 (atomic_load_or_32 node:$a, node:$b)>;
1889 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1890 (atomic_load_or_32 node:$a, node:$b)>;
1891 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1892 (atomic_load_or_64 node:$a, node:$b)>;
1893 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1894 (atomic_load_or_64 node:$a, node:$b)>;
1895 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1896 (atomic_load_or_64 node:$a, node:$b)>;
1898 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1899 atomic_load_or_32_g, i32imm, imm>;
1900 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1901 atomic_load_or_32_gen, i32imm, imm>;
1902 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1903 ".or", atomic_load_or_32_gen, i32imm, imm>;
1904 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1905 atomic_load_or_32_s, i32imm, imm>;
1906 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1907 atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
1908 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1909 atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1910 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1911 ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1912 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1913 atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
1917 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1918 (atomic_load_xor_32 node:$a, node:$b)>;
1919 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1920 (atomic_load_xor_32 node:$a, node:$b)>;
1921 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1922 (atomic_load_xor_32 node:$a, node:$b)>;
1923 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1924 (atomic_load_xor_64 node:$a, node:$b)>;
1925 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1926 (atomic_load_xor_64 node:$a, node:$b)>;
1927 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1928 (atomic_load_xor_64 node:$a, node:$b)>;
1930 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1931 atomic_load_xor_32_g, i32imm, imm>;
1932 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1933 atomic_load_xor_32_s, i32imm, imm>;
1934 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1935 atomic_load_xor_32_gen, i32imm, imm>;
1936 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1937 ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1938 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1939 atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
1940 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1941 atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
1942 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1943 atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1944 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1945 ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1949 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1950 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1951 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1952 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1953 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1954 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1955 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1956 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1957 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1958 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1959 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1960 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1962 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
1963 atomic_cmp_swap_32_g, i32imm>;
1964 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
1965 atomic_cmp_swap_32_s, i32imm>;
1966 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
1967 atomic_cmp_swap_32_gen, i32imm>;
1968 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
1969 ".cas", atomic_cmp_swap_32_gen, i32imm>;
1970 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
1971 atomic_cmp_swap_64_g, i64imm>;
1972 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
1973 atomic_cmp_swap_64_s, i64imm>;
1974 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
1975 atomic_cmp_swap_64_gen, i64imm>;
1976 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
1977 ".cas", atomic_cmp_swap_64_gen, i64imm>;
1979 // Support for scoped atomic operations. Matches
1980 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1981 // and converts it into the appropriate instruction.
1982 // NOTE: not all possible combinations are implemented
1983 // 'space' is limited to generic as it's the only one needed to support CUDA.
1984 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1985 class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
1986 dag ins, dag Operands>
1987 : NVPTXInst<(outs regclass:$result), ins,
1989 [(set (regT regclass:$result), Operands)]>,
1992 // Define instruction variants for all addressing modes.
1993 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1994 ValueType regT, NVPTXRegClass regclass, Operand ImmType,
1995 SDNode Imm, ValueType ImmTy,
1996 list<Predicate> Preds> {
1997 let AddedComplexity = 1 in {
1998 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1999 (ins Int32Regs:$src, regclass:$b),
2000 (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
2001 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2002 (ins Int64Regs:$src, regclass:$b),
2003 (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
2005 // tablegen can't infer argument types from Intrinsic (though it can
2006 // from Instruction) so we have to enforce specific type on
2007 // immediates via explicit cast to ImmTy.
2008 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2009 (ins Int32Regs:$src, ImmType:$b),
2010 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
2011 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2012 (ins Int64Regs:$src, ImmType:$b),
2013 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
2016 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
2017 ValueType regT, NVPTXRegClass regclass,
2018 Operand ImmType, SDNode Imm, ValueType ImmTy,
2019 list<Predicate> Preds> {
2020 // Variants for register/immediate permutations of $b and $c
2021 let AddedComplexity = 2 in {
2022 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2023 (ins Int32Regs:$src, regclass:$b, regclass:$c),
2024 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2025 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2026 (ins Int64Regs:$src, regclass:$b, regclass:$c),
2027 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2029 let AddedComplexity = 1 in {
2030 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2031 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
2032 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2033 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2034 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
2035 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2036 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2037 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2038 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2039 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2040 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2041 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2043 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2044 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2045 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2046 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2047 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2048 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2051 // Constructs intrinsic name and instruction asm strings.
2052 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2053 string ScopeStr, string SpaceStr,
2054 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2055 ValueType ImmTy, list<Predicate> Preds> {
2056 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2057 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2058 # "." # OpStr # "." # TypeStr
2059 # " \t$result, [$src], $b;",
2061 "int_nvvm_atomic_" # OpStr
2062 # "_" # SpaceStr # "_" # IntTypeStr
2063 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2064 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2066 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2067 string ScopeStr, string SpaceStr,
2068 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2069 ValueType ImmTy, list<Predicate> Preds> {
2070 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2071 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2072 # "." # OpStr # "." # TypeStr
2073 # " \t$result, [$src], $b, $c;",
2075 "int_nvvm_atomic_" # OpStr
2076 # "_" # SpaceStr # "_" # IntTypeStr
2077 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2078 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2081 // Constructs variants for different address spaces.
2082 // For now we only need variants for generic space pointers.
2083 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2084 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2085 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2086 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2087 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2089 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2090 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2091 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2092 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2093 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2096 // Constructs variants for different scopes of atomic op.
2097 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2098 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2099 ValueType ImmTy, list<Predicate> Preds> {
2100 // .gpu scope is default and is currently covered by existing
2101 // atomics w/o explicitly specified scope.
2102 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2103 regT, regclass, ImmType, Imm, ImmTy,
2104 !listconcat(Preds,[hasAtomScope])>;
2105 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2106 regT, regclass, ImmType, Imm, ImmTy,
2107 !listconcat(Preds,[hasAtomScope])>;
2109 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2110 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2111 list<Predicate> Preds> {
2112 // No need to define ".gpu"-scoped atomics. They do the same thing
2113 // as the regular, non-scoped atomics defined elsewhere.
2114 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2115 regT, regclass, ImmType, Imm, ImmTy,
2116 !listconcat(Preds,[hasAtomScope])>;
2117 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2118 regT, regclass, ImmType, Imm, ImmTy,
2119 !listconcat(Preds,[hasAtomScope])>;
2123 multiclass ATOM2_add_impl<string OpStr> {
2124 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2125 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2126 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2127 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2129 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2133 // atom.{and,or,xor}
2134 multiclass ATOM2_bitwise_impl<string OpStr> {
2135 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2136 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2137 [hasAtomBitwise64]>;
2141 multiclass ATOM2_exch_impl<string OpStr> {
2142 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2143 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2147 multiclass ATOM2_minmax_impl<string OpStr> {
2148 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2149 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2150 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2152 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2157 multiclass ATOM2_incdec_impl<string OpStr> {
2158 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2162 multiclass ATOM3_cas_impl<string OpStr> {
2163 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2164 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2167 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2168 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2169 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2170 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2171 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2172 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2173 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2174 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2175 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
2176 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2178 //-----------------------------------
2179 // Support for ldu on sm_20 or later
2180 //-----------------------------------
2182 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
2183 // read-only in a kernel.
2187 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2188 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2189 !strconcat("ldu.global.", TyStr),
2190 []>, Requires<[hasLDU]>;
2191 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2192 !strconcat("ldu.global.", TyStr),
2193 []>, Requires<[hasLDU]>;
2194 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2195 !strconcat("ldu.global.", TyStr),
2196 []>, Requires<[hasLDU]>;
2197 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2198 !strconcat("ldu.global.", TyStr),
2199 []>, Requires<[hasLDU]>;
2200 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2201 !strconcat("ldu.global.", TyStr),
2202 []>, Requires<[hasLDU]>;
2205 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2206 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2207 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2208 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2209 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2210 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2214 // Elementized vector ldu
2215 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2216 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2217 (ins Int32Regs:$src),
2218 !strconcat("ldu.global.", TyStr), []>;
2219 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2220 (ins Int64Regs:$src),
2221 !strconcat("ldu.global.", TyStr), []>;
2222 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2224 !strconcat("ldu.global.", TyStr), []>;
2225 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2227 !strconcat("ldu.global.", TyStr), []>;
2228 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2230 !strconcat("ldu.global.", TyStr), []>;
2233 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2234 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2235 regclass:$dst4), (ins Int32Regs:$src),
2236 !strconcat("ldu.global.", TyStr), []>;
2237 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2238 regclass:$dst4), (ins Int64Regs:$src),
2239 !strconcat("ldu.global.", TyStr), []>;
2240 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2241 regclass:$dst4), (ins MEMri:$src),
2242 !strconcat("ldu.global.", TyStr), []>;
2243 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2244 regclass:$dst4), (ins MEMri64:$src),
2245 !strconcat("ldu.global.", TyStr), []>;
2246 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2247 regclass:$dst4), (ins imemAny:$src),
2248 !strconcat("ldu.global.", TyStr), []>;
2251 defm INT_PTX_LDU_G_v2i8_ELE
2252 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2253 defm INT_PTX_LDU_G_v2i16_ELE
2254 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2255 defm INT_PTX_LDU_G_v2i32_ELE
2256 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2257 defm INT_PTX_LDU_G_v2f32_ELE
2258 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2259 defm INT_PTX_LDU_G_v2i64_ELE
2260 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2261 defm INT_PTX_LDU_G_v2f64_ELE
2262 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2263 defm INT_PTX_LDU_G_v4i8_ELE
2264 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2265 defm INT_PTX_LDU_G_v4i16_ELE
2266 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2268 defm INT_PTX_LDU_G_v4i32_ELE
2269 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2271 defm INT_PTX_LDU_G_v4f16_ELE
2272 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2274 defm INT_PTX_LDU_G_v4f16x2_ELE
2275 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2277 defm INT_PTX_LDU_G_v4f32_ELE
2278 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2282 //-----------------------------------
2283 // Support for ldg on sm_35 or later
2284 //-----------------------------------
2286 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
2287 // non-coherent texture cache, and therefore the values read must be read-only
2288 // during the lifetime of the kernel.
2290 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2291 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2292 !strconcat("ld.global.nc.", TyStr),
2293 []>, Requires<[hasLDG]>;
2294 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2295 !strconcat("ld.global.nc.", TyStr),
2296 []>, Requires<[hasLDG]>;
2297 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2298 !strconcat("ld.global.nc.", TyStr),
2299 []>, Requires<[hasLDG]>;
2300 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2301 !strconcat("ld.global.nc.", TyStr),
2302 []>, Requires<[hasLDG]>;
2303 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2304 !strconcat("ld.global.nc.", TyStr),
2305 []>, Requires<[hasLDG]>;
2308 defm INT_PTX_LDG_GLOBAL_i8
2309 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2310 defm INT_PTX_LDG_GLOBAL_i16
2311 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2312 defm INT_PTX_LDG_GLOBAL_i32
2313 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2314 defm INT_PTX_LDG_GLOBAL_i64
2315 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2316 defm INT_PTX_LDG_GLOBAL_f32
2317 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2318 defm INT_PTX_LDG_GLOBAL_f64
2319 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2323 // Elementized vector ldg
2324 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2325 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2326 (ins Int32Regs:$src),
2327 !strconcat("ld.global.nc.", TyStr), []>;
2328 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2329 (ins Int64Regs:$src),
2330 !strconcat("ld.global.nc.", TyStr), []>;
2331 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2333 !strconcat("ld.global.nc.", TyStr), []>;
2334 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2336 !strconcat("ld.global.nc.", TyStr), []>;
2337 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2339 !strconcat("ld.global.nc.", TyStr), []>;
2342 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2343 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2344 regclass:$dst4), (ins Int32Regs:$src),
2345 !strconcat("ld.global.nc.", TyStr), []>;
2346 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2347 regclass:$dst4), (ins Int64Regs:$src),
2348 !strconcat("ld.global.nc.", TyStr), []>;
2349 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2350 regclass:$dst4), (ins MEMri:$src),
2351 !strconcat("ld.global.nc.", TyStr), []>;
2352 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2353 regclass:$dst4), (ins MEMri64:$src),
2354 !strconcat("ld.global.nc.", TyStr), []>;
2355 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2356 regclass:$dst4), (ins imemAny:$src),
2357 !strconcat("ld.global.nc.", TyStr), []>;
2360 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2361 defm INT_PTX_LDG_G_v2i8_ELE
2362 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2363 defm INT_PTX_LDG_G_v2i16_ELE
2364 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2365 defm INT_PTX_LDG_G_v2i32_ELE
2366 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2367 defm INT_PTX_LDG_G_v2f32_ELE
2368 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2369 defm INT_PTX_LDG_G_v2i64_ELE
2370 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2371 defm INT_PTX_LDG_G_v2f64_ELE
2372 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2373 defm INT_PTX_LDG_G_v4i8_ELE
2374 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2375 defm INT_PTX_LDG_G_v4i16_ELE
2376 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2377 defm INT_PTX_LDG_G_v4i32_ELE
2378 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2379 defm INT_PTX_LDG_G_v4f32_ELE
2380 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2383 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2384 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2385 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2386 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2387 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2388 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2389 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2390 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2391 "{{ .reg .b64 %tmp;\n\t"
2392 #" cvt.u64.u32 \t%tmp, $src;\n\t"
2393 #" cvta." # Str # ".u64 \t$result, %tmp; }}",
2394 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2395 Requires<[useShortPtr]>;
2398 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2399 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2400 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2401 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2402 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2403 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2404 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2405 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2406 "{{ .reg .b64 %tmp;\n\t"
2407 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2408 #" cvt.u32.u64 \t$result, %tmp; }}",
2409 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2410 Requires<[useShortPtr]>;
2413 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2414 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2415 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2416 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2418 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2419 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2420 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2421 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2424 // nvvm.ptr.gen.to.param
2425 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2426 (ins Int32Regs:$src),
2427 "mov.u32 \t$result, $src;",
2428 [(set Int32Regs:$result,
2429 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2430 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2431 (ins Int64Regs:$src),
2432 "mov.u64 \t$result, $src;",
2433 [(set Int64Regs:$result,
2434 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2437 // nvvm.move intrinsicc
2438 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2439 "mov.b16 \t$r, $s;",
2441 (int_nvvm_move_i16 Int16Regs:$s))]>;
2442 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2443 "mov.b32 \t$r, $s;",
2445 (int_nvvm_move_i32 Int32Regs:$s))]>;
2446 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2447 "mov.b64 \t$r, $s;",
2449 (int_nvvm_move_i64 Int64Regs:$s))]>;
2450 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2451 "mov.f32 \t$r, $s;",
2452 [(set Float32Regs:$r,
2453 (int_nvvm_move_float Float32Regs:$s))]>;
2454 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2455 "mov.f64 \t$r, $s;",
2456 [(set Float64Regs:$r,
2457 (int_nvvm_move_double Float64Regs:$s))]>;
2458 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2459 "mov.u32 \t$r, $s;",
2461 (int_nvvm_move_ptr Int32Regs:$s))]>;
2462 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2463 "mov.u64 \t$r, $s;",
2465 (int_nvvm_move_ptr Int64Regs:$s))]>;
2467 // @TODO: Are these actually needed, or will we always just see symbols
2468 // copied to registers first?
2469 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2470 "mov.u32 \t$r, $s;",
2472 (int_nvvm_move_ptr texternalsym:$s))]>;
2473 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2474 "mov.u64 \t$r, $s;",
2476 (int_nvvm_move_ptr texternalsym:$s))]>;*/
2479 // MoveParam %r1, param
2480 // ptr_local_to_gen %r2, %r1
2481 // ptr_gen_to_local %r3, %r2
2485 // @TODO: Revisit this. There is a type
2486 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2487 // instructions are not currently defined. However, we can use the ptr
2488 // variants and the asm printer will do the right thing.
2489 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2490 (MoveParam texternalsym:$src)))),
2491 (nvvm_move_ptr64 texternalsym:$src)>;
2492 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2493 (MoveParam texternalsym:$src)))),
2494 (nvvm_move_ptr32 texternalsym:$src)>;
2497 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2498 "mov.u64 \t$result, $src;", []>;
2500 //-----------------------------------
2501 // Compiler Error Warn
2502 // - Just ignore them in codegen
2503 //-----------------------------------
2505 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2506 "// llvm.nvvm.compiler.warn()",
2507 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2508 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2509 "// llvm.nvvm.compiler.warn()",
2510 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2511 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2512 "// llvm.nvvm.compiler.error()",
2513 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2514 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2515 "// llvm.nvvm.compiler.error()",
2516 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2521 multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2522 def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2523 "isspacep." # suffix # "\t$d, $a;",
2524 [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2526 def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2527 "isspacep." # suffix # "\t$d, $a;",
2528 [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2532 defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2533 defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2534 defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>;
2535 defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2536 defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2537 int_nvvm_isspacep_shared_cluster,
2538 [hasPTX<78>, hasSM<90>]>;
2540 // Special register reads
2541 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2542 (ins SpecialRegs:$r),
2543 "mov.b32 \t$d, $r;", []>;
2545 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2546 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2547 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2548 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2549 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2550 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2551 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2552 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2553 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2554 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2555 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2556 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2557 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2558 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2559 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2560 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2561 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2562 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2563 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2564 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2565 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2566 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2567 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2568 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2569 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2570 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2571 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2572 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2573 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2574 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2575 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2576 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2579 // rotate builtin support
2581 def ROTATE_B32_HW_IMM
2582 : NVPTXInst<(outs Int32Regs:$dst),
2583 (ins Int32Regs:$src, i32imm:$amt),
2584 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2585 [(set Int32Regs:$dst,
2586 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2587 Requires<[hasHWROT32]> ;
2589 def ROTATE_B32_HW_REG
2590 : NVPTXInst<(outs Int32Regs:$dst),
2591 (ins Int32Regs:$src, Int32Regs:$amt),
2592 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2593 [(set Int32Regs:$dst,
2594 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2595 Requires<[hasHWROT32]> ;
2597 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2598 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2599 Requires<[noHWROT32]> ;
2601 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2602 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2603 Requires<[noHWROT32]> ;
2605 let hasSideEffects = false in {
2606 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2607 !strconcat("{{\n\t",
2608 ".reg .b32 %dummy;\n\t",
2609 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2613 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2614 !strconcat("{{\n\t",
2615 ".reg .b32 %dummy;\n\t",
2616 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2621 let hasSideEffects = false in {
2623 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2624 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2627 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2628 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2629 (GET_LO_INT64 Int64Regs:$src))> ;
2631 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2633 let hasSideEffects = false in {
2634 def SHF_L_WRAP_B32_IMM
2635 : NVPTXInst<(outs Int32Regs:$dst),
2636 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2637 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2638 Requires<[hasHWROT32]>;
2640 def SHF_L_WRAP_B32_REG
2641 : NVPTXInst<(outs Int32Regs:$dst),
2642 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2643 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2644 Requires<[hasHWROT32]>;
2646 def SHF_R_WRAP_B32_IMM
2647 : NVPTXInst<(outs Int32Regs:$dst),
2648 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2649 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2650 Requires<[hasHWROT32]>;
2652 def SHF_R_WRAP_B32_REG
2653 : NVPTXInst<(outs Int32Regs:$dst),
2654 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2655 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2656 Requires<[hasHWROT32]>;
2659 // HW version of rotate 64
2660 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2662 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2663 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2664 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2665 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2666 Requires<[hasHWROT32]>;
2668 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2670 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2671 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2672 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2673 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2674 Requires<[hasHWROT32]>;
2677 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2679 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2680 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2681 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2682 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2683 Requires<[hasHWROT32]>;
2685 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2687 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2688 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2689 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2690 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2691 Requires<[hasHWROT32]>;
2693 // SW version of rotate 64
2694 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2695 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2696 Requires<[noHWROT32]>;
2697 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2698 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2699 Requires<[noHWROT32]>;
2700 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2701 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2702 Requires<[noHWROT32]>;
2703 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2704 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2705 Requires<[noHWROT32]>;
2708 //-----------------------------------
2709 // Texture Intrinsics
2710 //-----------------------------------
2712 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2713 // also defined in NVPTXReplaceImageHandles.cpp
2715 // texmode_independent
2716 let IsTex = true, IsTexModeUnified = false in {
2717 // Texture fetch instructions using handles
2719 class TEX_1D_base<string inst, NVPTXRegClass outtype,
2720 NVPTXRegClass intype, dag texsamp>
2721 : NVPTXInst<(outs outtype:$r, outtype:$g,
2722 outtype:$b, outtype:$a),
2723 !con(texsamp, (ins intype:$x)),
2724 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2727 multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2728 def _RR : TEX_1D_base<inst, outtype, intype,
2729 (ins Int64Regs:$t, Int64Regs:$s)>;
2730 def _RI : TEX_1D_base<inst, outtype, intype,
2731 (ins Int64Regs:$t, i64imm:$s)>;
2732 def _IR : TEX_1D_base<inst, outtype, intype,
2733 (ins i64imm:$t, Int64Regs:$s)>;
2734 def _II : TEX_1D_base<inst, outtype, intype,
2735 (ins i64imm:$t, i64imm:$s)>;
2738 defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2739 defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2740 defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2741 defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2742 defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2743 defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2745 class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2746 NVPTXRegClass intype, dag texsamp>
2747 : NVPTXInst<(outs outtype:$r, outtype:$g,
2748 outtype:$b, outtype:$a),
2749 !con(texsamp, (ins intype:$x, intype:$lod)),
2750 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2753 multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2754 NVPTXRegClass intype> {
2755 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2756 (ins Int64Regs:$t, Int64Regs:$s)>;
2757 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2758 (ins Int64Regs:$t, i64imm:$s)>;
2759 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2760 (ins i64imm:$t, Int64Regs:$s)>;
2761 def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2762 (ins i64imm:$t, i64imm:$s)>;
2765 defm TEX_1D_F32_F32_LEVEL :
2766 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2767 defm TEX_1D_S32_F32_LEVEL :
2768 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2769 defm TEX_1D_U32_F32_LEVEL :
2770 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2772 class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2773 NVPTXRegClass intype, dag texsamp>
2774 : NVPTXInst<(outs outtype:$r, outtype:$g,
2775 outtype:$b, outtype:$a),
2776 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2777 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2778 " \\{$gradx\\}, \\{$grady\\};",
2781 multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2782 NVPTXRegClass intype> {
2783 def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2784 (ins Int64Regs:$t, Int64Regs:$s)>;
2785 def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2786 (ins Int64Regs:$t, i64imm:$s)>;
2787 def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2788 (ins i64imm:$t, Int64Regs:$s)>;
2789 def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2790 (ins i64imm:$t, i64imm:$s)>;
2793 defm TEX_1D_F32_F32_GRAD
2794 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2795 defm TEX_1D_S32_F32_GRAD
2796 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2797 defm TEX_1D_U32_F32_GRAD
2798 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2800 class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2801 NVPTXRegClass intype, dag texsamp>
2802 : NVPTXInst<(outs outtype:$r, outtype:$g,
2803 outtype:$b, outtype:$a),
2804 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2805 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2808 multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2809 NVPTXRegClass intype> {
2810 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2811 (ins Int64Regs:$t, Int64Regs:$s)>;
2812 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2813 (ins Int64Regs:$t, i64imm:$s)>;
2814 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2815 (ins i64imm:$t, Int64Regs:$s)>;
2816 def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2817 (ins i64imm:$t, i64imm:$s)>;
2820 defm TEX_1D_ARRAY_F32_F32
2821 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2822 defm TEX_1D_ARRAY_F32_S32
2823 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2824 defm TEX_1D_ARRAY_S32_S32
2825 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2826 defm TEX_1D_ARRAY_S32_F32
2827 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2828 defm TEX_1D_ARRAY_U32_S32
2829 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2830 defm TEX_1D_ARRAY_U32_F32
2831 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2833 class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2834 NVPTXRegClass intype, dag texsamp>
2835 : NVPTXInst<(outs outtype:$r, outtype:$g,
2836 outtype:$b, outtype:$a),
2837 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2838 inst # " \t\\{$r, $g, $b, $a\\},"
2839 " [$t, $s, \\{$l, $x\\}], $lod;",
2842 multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2843 NVPTXRegClass intype> {
2844 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2845 (ins Int64Regs:$t, Int64Regs:$s)>;
2846 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2847 (ins Int64Regs:$t, i64imm:$s)>;
2848 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2849 (ins i64imm:$t, Int64Regs:$s)>;
2850 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2851 (ins i64imm:$t, i64imm:$s)>;
2854 defm TEX_1D_ARRAY_F32_F32_LEVEL
2855 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2856 defm TEX_1D_ARRAY_S32_F32_LEVEL
2857 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2858 defm TEX_1D_ARRAY_U32_F32_LEVEL
2859 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2861 class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2862 NVPTXRegClass intype, dag texsamp>
2863 : NVPTXInst<(outs outtype:$r, outtype:$g,
2864 outtype:$b, outtype:$a),
2865 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2866 intype:$gradx, intype:$grady)),
2867 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2868 " \\{$gradx\\}, \\{$grady\\};",
2871 multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2872 NVPTXRegClass intype> {
2873 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2874 (ins Int64Regs:$t, Int64Regs:$s)>;
2875 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2876 (ins Int64Regs:$t, i64imm:$s)>;
2877 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2878 (ins i64imm:$t, Int64Regs:$s)>;
2879 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2880 (ins i64imm:$t, i64imm:$s)>;
2883 defm TEX_1D_ARRAY_F32_F32_GRAD
2884 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2885 defm TEX_1D_ARRAY_S32_F32_GRAD
2886 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2887 defm TEX_1D_ARRAY_U32_F32_GRAD
2888 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2890 class TEX_2D_base<string inst, NVPTXRegClass outtype,
2891 NVPTXRegClass intype, dag texsamp>
2892 : NVPTXInst<(outs outtype:$r, outtype:$g,
2893 outtype:$b, outtype:$a),
2894 !con(texsamp, (ins intype:$x, intype:$y)),
2895 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2898 multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2899 def _RR : TEX_2D_base<inst, outtype, intype,
2900 (ins Int64Regs:$t, Int64Regs:$s)>;
2901 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2902 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2903 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2906 defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2907 defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2908 defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2909 defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2910 defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2911 defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2913 class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2914 NVPTXRegClass intype, dag texsamp>
2915 : NVPTXInst<(outs outtype:$r, outtype:$g,
2916 outtype:$b, outtype:$a),
2917 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2918 inst # " \t\\{$r, $g, $b, $a\\},"
2919 " [$t, $s, \\{$x, $y\\}], $lod;",
2922 multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2923 NVPTXRegClass intype> {
2924 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2925 (ins Int64Regs:$t, Int64Regs:$s)>;
2926 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2927 (ins Int64Regs:$t, i64imm:$s)>;
2928 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2929 (ins i64imm:$t, Int64Regs:$s)>;
2930 def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2931 (ins i64imm:$t, i64imm:$s)>;
2934 defm TEX_2D_F32_F32_LEVEL :
2935 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2936 defm TEX_2D_S32_F32_LEVEL :
2937 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2938 defm TEX_2D_U32_F32_LEVEL :
2939 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2941 class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2942 NVPTXRegClass intype, dag texsamp>
2943 : NVPTXInst<(outs outtype:$r, outtype:$g,
2944 outtype:$b, outtype:$a),
2945 !con(texsamp, (ins intype:$x, intype:$y,
2946 intype:$gradx0, intype:$gradx1,
2947 intype:$grady0, intype:$grady1)),
2948 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2949 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2952 multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2953 NVPTXRegClass intype> {
2954 def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2955 (ins Int64Regs:$t, Int64Regs:$s)>;
2956 def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2957 (ins Int64Regs:$t, i64imm:$s)>;
2958 def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2959 (ins i64imm:$t, Int64Regs:$s)>;
2960 def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2961 (ins i64imm:$t, i64imm:$s)>;
2964 defm TEX_2D_F32_F32_GRAD :
2965 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2966 defm TEX_2D_S32_F32_GRAD :
2967 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2968 defm TEX_2D_U32_F32_GRAD :
2969 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2971 class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2972 NVPTXRegClass intype, dag texsamp>
2973 : NVPTXInst<(outs outtype:$r, outtype:$g,
2974 outtype:$b, outtype:$a),
2975 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2976 inst # " \t\\{$r, $g, $b, $a\\},"
2977 " [$t, $s, \\{$l, $x, $y, $y\\}];",
2980 multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2981 NVPTXRegClass intype> {
2982 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2983 (ins Int64Regs:$t, Int64Regs:$s)>;
2984 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2985 (ins Int64Regs:$t, i64imm:$s)>;
2986 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2987 (ins i64imm:$t, Int64Regs:$s)>;
2988 def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2989 (ins i64imm:$t, i64imm:$s)>;
2992 defm TEX_2D_ARRAY_F32_F32
2993 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2994 defm TEX_2D_ARRAY_F32_S32
2995 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2996 defm TEX_2D_ARRAY_S32_S32
2997 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2998 defm TEX_2D_ARRAY_S32_F32
2999 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3000 defm TEX_2D_ARRAY_U32_S32
3001 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3002 defm TEX_2D_ARRAY_U32_F32
3003 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3005 class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3006 NVPTXRegClass intype, dag texsamp>
3007 : NVPTXInst<(outs outtype:$r, outtype:$g,
3008 outtype:$b, outtype:$a),
3009 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3011 inst # " \t\\{$r, $g, $b, $a\\},"
3012 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
3015 multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3016 NVPTXRegClass intype> {
3017 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3018 (ins Int64Regs:$t, Int64Regs:$s)>;
3019 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3020 (ins Int64Regs:$t, i64imm:$s)>;
3021 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3022 (ins i64imm:$t, Int64Regs:$s)>;
3023 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3024 (ins i64imm:$t, i64imm:$s)>;
3027 defm TEX_2D_ARRAY_F32_F32_LEVEL
3028 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3029 defm TEX_2D_ARRAY_S32_F32_LEVEL
3030 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3031 defm TEX_2D_ARRAY_U32_F32_LEVEL
3032 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3034 class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3035 NVPTXRegClass intype, dag texsamp>
3036 : NVPTXInst<(outs outtype:$r, outtype:$g,
3037 outtype:$b, outtype:$a),
3038 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3039 intype:$gradx0, intype:$gradx1,
3040 intype:$grady0, intype:$grady1)),
3041 inst # " \t\\{$r, $g, $b, $a\\},"
3042 " [$t, $s, \\{$l, $x, $y, $y\\}],"
3043 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3046 multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3047 NVPTXRegClass intype> {
3048 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3049 (ins Int64Regs:$t, Int64Regs:$s)>;
3050 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3051 (ins Int64Regs:$t, i64imm:$s)>;
3052 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3053 (ins i64imm:$t, Int64Regs:$s)>;
3054 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3055 (ins i64imm:$t, i64imm:$s)>;
3058 defm TEX_2D_ARRAY_F32_F32_GRAD
3059 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3060 defm TEX_2D_ARRAY_S32_F32_GRAD
3061 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3062 defm TEX_2D_ARRAY_U32_F32_GRAD
3063 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3065 class TEX_3D_base<string inst, NVPTXRegClass outtype,
3066 NVPTXRegClass intype, dag texsamp>
3067 : NVPTXInst<(outs outtype:$r, outtype:$g,
3068 outtype:$b, outtype:$a),
3069 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3070 inst # " \t\\{$r, $g, $b, $a\\},"
3071 " [$t, $s, \\{$x, $y, $z, $z\\}];",
3074 multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3075 def _RR : TEX_3D_base<inst, outtype, intype,
3076 (ins Int64Regs:$t, Int64Regs:$s)>;
3077 def _RI : TEX_3D_base<inst, outtype, intype,
3078 (ins Int64Regs:$t, i64imm:$s)>;
3079 def _IR : TEX_3D_base<inst, outtype, intype,
3080 (ins i64imm:$t, Int64Regs:$s)>;
3081 def _II : TEX_3D_base<inst, outtype, intype,
3082 (ins i64imm:$t, i64imm:$s)>;
3085 defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3086 defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3087 defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3088 defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3089 defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3090 defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3092 class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3093 NVPTXRegClass intype, dag texsamp>
3094 : NVPTXInst<(outs outtype:$r, outtype:$g,
3095 outtype:$b, outtype:$a),
3096 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3098 inst # " \t\\{$r, $g, $b, $a\\},"
3099 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3102 multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3103 NVPTXRegClass intype> {
3104 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3105 (ins Int64Regs:$t, Int64Regs:$s)>;
3106 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3107 (ins Int64Regs:$t, i64imm:$s)>;
3108 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3109 (ins i64imm:$t, Int64Regs:$s)>;
3110 def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3111 (ins i64imm:$t, i64imm:$s)>;
3114 defm TEX_3D_F32_F32_LEVEL
3115 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3116 defm TEX_3D_S32_F32_LEVEL
3117 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3118 defm TEX_3D_U32_F32_LEVEL
3119 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3121 class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3122 NVPTXRegClass intype, dag texsamp>
3123 : NVPTXInst<(outs outtype:$r, outtype:$g,
3124 outtype:$b, outtype:$a),
3125 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3126 intype :$gradx0, intype:$gradx1,
3127 intype:$gradx2, intype:$grady0,
3128 intype:$grady1, intype:$grady2)),
3129 inst # " \t\\{$r, $g, $b, $a\\},"
3130 " [$t, $s, \\{$x, $y, $z, $z\\}],"
3131 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3132 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3135 multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3136 NVPTXRegClass intype> {
3137 def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3138 (ins Int64Regs:$t, Int64Regs:$s)>;
3139 def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3140 (ins Int64Regs:$t, i64imm:$s)>;
3141 def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3142 (ins i64imm:$t, Int64Regs:$s)>;
3143 def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3144 (ins i64imm:$t, i64imm:$s)>;
3147 defm TEX_3D_F32_F32_GRAD
3148 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3149 defm TEX_3D_S32_F32_GRAD
3150 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3151 defm TEX_3D_U32_F32_GRAD
3152 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3154 class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3155 NVPTXRegClass intype, dag texsamp>
3156 : NVPTXInst<(outs outtype:$r, outtype:$g,
3157 outtype:$b, outtype:$a),
3158 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3159 inst # " \t\\{$r, $g, $b, $a\\},"
3160 " [$t, $s, \\{$x, $y, $z, $z\\}];",
3163 multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3164 def _RR : TEX_CUBE_base<inst, outtype, intype,
3165 (ins Int64Regs:$t, Int64Regs:$s)>;
3166 def _RI : TEX_CUBE_base<inst, outtype, intype,
3167 (ins Int64Regs:$t, i64imm:$s)>;
3168 def _IR : TEX_CUBE_base<inst, outtype, intype,
3169 (ins i64imm:$t, Int64Regs:$s)>;
3170 def _II : TEX_CUBE_base<inst, outtype, intype,
3171 (ins i64imm:$t, i64imm:$s)>;
3174 defm TEX_CUBE_F32_F32
3175 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3176 defm TEX_CUBE_S32_F32
3177 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3178 defm TEX_CUBE_U32_F32
3179 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3181 class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3182 NVPTXRegClass intype, dag texsamp>
3183 : NVPTXInst<(outs outtype:$r, outtype:$g,
3184 outtype:$b, outtype:$a),
3185 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3187 inst # " \t\\{$r, $g, $b, $a\\},"
3188 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3191 multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3192 NVPTXRegClass intype> {
3193 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3194 (ins Int64Regs:$t, Int64Regs:$s)>;
3195 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3196 (ins Int64Regs:$t, i64imm:$s)>;
3197 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3198 (ins i64imm:$t, Int64Regs:$s)>;
3199 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3200 (ins i64imm:$t, i64imm:$s)>;
3203 defm TEX_CUBE_F32_F32_LEVEL
3204 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3205 defm TEX_CUBE_S32_F32_LEVEL
3206 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3207 defm TEX_CUBE_U32_F32_LEVEL
3208 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3210 class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3211 NVPTXRegClass intype, dag texsamp>
3212 : NVPTXInst<(outs outtype:$r, outtype:$g,
3213 outtype:$b, outtype:$a),
3214 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3216 inst # " \t\\{$r, $g, $b, $a\\},"
3217 " [$t, $s, \\{$l, $x, $y, $z\\}];",
3220 multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3221 NVPTXRegClass intype> {
3222 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3223 (ins Int64Regs:$t, Int64Regs:$s)>;
3224 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3225 (ins Int64Regs:$t, i64imm:$s)>;
3226 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3227 (ins i64imm:$t, Int64Regs:$s)>;
3228 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3229 (ins i64imm:$t, i64imm:$s)>;
3232 defm TEX_CUBE_ARRAY_F32_F32
3233 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3234 defm TEX_CUBE_ARRAY_S32_F32
3235 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3236 defm TEX_CUBE_ARRAY_U32_F32
3237 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3239 class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3240 NVPTXRegClass intype, dag texsamp>
3241 : NVPTXInst<(outs outtype:$r, outtype:$g,
3242 outtype:$b, outtype:$a),
3243 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3244 intype:$z, intype:$lod)),
3245 inst # " \t\\{$r, $g, $b, $a\\},"
3246 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3249 multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3250 NVPTXRegClass intype> {
3251 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3252 (ins Int64Regs:$t, Int64Regs:$s)>;
3253 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3254 (ins Int64Regs:$t, i64imm:$s)>;
3255 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3256 (ins i64imm:$t, Int64Regs:$s)>;
3257 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3258 (ins i64imm:$t, i64imm:$s)>;
3261 defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3262 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3263 Float32Regs, Float32Regs>;
3264 defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3265 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3266 Int32Regs, Float32Regs>;
3267 defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3268 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3269 Int32Regs, Float32Regs>;
3271 class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3272 NVPTXRegClass intype, dag texsamp>
3273 : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3274 outtype:$v2, outtype:$v3),
3275 !con(texsamp, (ins intype:$x, intype:$y)),
3276 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3279 multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3280 def _RR : TLD4_2D_base<inst, outtype, intype,
3281 (ins Int64Regs:$t, Int64Regs:$s)>;
3282 def _RI : TLD4_2D_base<inst, outtype, intype,
3283 (ins Int64Regs:$t, i64imm:$s)>;
3284 def _IR : TLD4_2D_base<inst, outtype, intype,
3285 (ins i64imm:$t, Int64Regs:$s)>;
3286 def _II : TLD4_2D_base<inst, outtype, intype,
3287 (ins i64imm:$t, i64imm:$s)>;
3290 defm TLD4_R_2D_F32_F32
3291 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3292 defm TLD4_G_2D_F32_F32
3293 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3294 defm TLD4_B_2D_F32_F32
3295 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3296 defm TLD4_A_2D_F32_F32
3297 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3299 defm TLD4_R_2D_S32_F32
3300 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3301 defm TLD4_G_2D_S32_F32
3302 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3303 defm TLD4_B_2D_S32_F32
3304 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3305 defm TLD4_A_2D_S32_F32
3306 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3308 defm TLD4_R_2D_U32_F32
3309 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3310 defm TLD4_G_2D_U32_F32
3311 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3312 defm TLD4_B_2D_U32_F32
3313 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3314 defm TLD4_A_2D_U32_F32
3315 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3321 let IsTex = true, IsTexModeUnified = true in {
3322 // Texture fetch instructions using handles
3324 class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3325 NVPTXRegClass intype, dag tex>
3326 : NVPTXInst<(outs outtype:$r, outtype:$g,
3327 outtype:$b, outtype:$a),
3328 !con(tex, (ins intype:$x)),
3329 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3332 multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3333 NVPTXRegClass intype> {
3334 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3335 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3338 defm TEX_UNIFIED_1D_F32_S32
3339 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3340 defm TEX_UNIFIED_1D_F32_F32
3341 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3342 defm TEX_UNIFIED_1D_S32_S32
3343 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3344 defm TEX_UNIFIED_1D_S32_F32
3345 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3346 defm TEX_UNIFIED_1D_U32_S32
3347 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3348 defm TEX_UNIFIED_1D_U32_F32
3349 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3351 class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3352 NVPTXRegClass intype, dag tex>
3353 : NVPTXInst<(outs outtype:$r, outtype:$g,
3354 outtype:$b, outtype:$a),
3355 !con(tex, (ins intype:$x, intype:$lod)),
3356 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3359 multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3360 NVPTXRegClass intype> {
3361 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3362 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3365 defm TEX_UNIFIED_1D_F32_F32_LEVEL
3366 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3367 defm TEX_UNIFIED_1D_S32_F32_LEVEL
3368 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3369 defm TEX_UNIFIED_1D_U32_F32_LEVEL
3370 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3372 class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3373 NVPTXRegClass intype, dag tex>
3374 : NVPTXInst<(outs outtype:$r, outtype:$g,
3375 outtype:$b, outtype:$a),
3376 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3377 inst # " \t\\{$r, $g, $b, $a\\},"
3378 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3381 multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3382 NVPTXRegClass intype> {
3383 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3384 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3387 defm TEX_UNIFIED_1D_F32_F32_GRAD
3388 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3389 defm TEX_UNIFIED_1D_S32_F32_GRAD
3390 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3391 defm TEX_UNIFIED_1D_U32_F32_GRAD
3392 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3394 class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3395 NVPTXRegClass intype, dag tex>
3396 : NVPTXInst<(outs outtype:$r, outtype:$g,
3397 outtype:$b, outtype:$a),
3398 !con(tex, (ins Int32Regs:$l, intype:$x)),
3399 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3402 multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3403 NVPTXRegClass intype> {
3404 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3405 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3408 defm TEX_UNIFIED_1D_ARRAY_F32_S32
3409 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3410 defm TEX_UNIFIED_1D_ARRAY_F32_F32
3411 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3412 defm TEX_UNIFIED_1D_ARRAY_S32_S32
3413 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3414 defm TEX_UNIFIED_1D_ARRAY_S32_F32
3415 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3416 defm TEX_UNIFIED_1D_ARRAY_U32_S32
3417 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3418 defm TEX_UNIFIED_1D_ARRAY_U32_F32
3419 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3421 class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3422 NVPTXRegClass intype, dag tex>
3423 : NVPTXInst<(outs outtype:$r, outtype:$g,
3424 outtype:$b, outtype:$a),
3425 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3426 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3429 multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3430 NVPTXRegClass intype> {
3431 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3432 (ins Int64Regs:$t)>;
3433 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3437 defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3438 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3439 Float32Regs, Float32Regs>;
3440 defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3441 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3442 Int32Regs, Float32Regs>;
3443 defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3444 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3445 Int32Regs, Float32Regs>;
3447 class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3448 NVPTXRegClass intype, dag tex>
3449 : NVPTXInst<(outs outtype:$r, outtype:$g,
3450 outtype:$b, outtype:$a),
3451 !con(tex, (ins Int32Regs:$l, intype:$x,
3452 intype:$gradx, intype:$grady)),
3453 inst # " \t\\{$r, $g, $b, $a\\},"
3454 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3457 multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3458 NVPTXRegClass intype> {
3459 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3460 (ins Int64Regs:$t)>;
3461 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3465 defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3466 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3467 Float32Regs, Float32Regs>;
3468 defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3469 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3470 Int32Regs, Float32Regs>;
3471 defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3472 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3473 Int32Regs, Float32Regs>;
3475 class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3476 NVPTXRegClass intype, dag tex>
3477 : NVPTXInst<(outs outtype:$r, outtype:$g,
3478 outtype:$b, outtype:$a),
3479 !con(tex, (ins intype:$x, intype:$y)),
3480 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3483 multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3484 NVPTXRegClass intype> {
3485 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3486 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3489 defm TEX_UNIFIED_2D_F32_S32
3490 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3491 defm TEX_UNIFIED_2D_F32_F32
3492 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3493 defm TEX_UNIFIED_2D_S32_S32
3494 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3495 defm TEX_UNIFIED_2D_S32_F32
3496 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3497 defm TEX_UNIFIED_2D_U32_S32
3498 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3499 defm TEX_UNIFIED_2D_U32_F32
3500 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3502 class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3503 NVPTXRegClass intype, dag tex>
3504 : NVPTXInst<(outs outtype:$r, outtype:$g,
3505 outtype:$b, outtype:$a),
3506 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3507 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3510 multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3511 NVPTXRegClass intype> {
3512 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3513 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3516 defm TEX_UNIFIED_2D_F32_F32_LEVEL
3517 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3518 defm TEX_UNIFIED_2D_S32_F32_LEVEL
3519 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3520 defm TEX_UNIFIED_2D_U32_F32_LEVEL
3521 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3523 class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3524 NVPTXRegClass intype, dag tex>
3525 : NVPTXInst<(outs outtype:$r, outtype:$g,
3526 outtype:$b, outtype:$a),
3527 !con(tex, (ins intype:$x, intype:$y,
3528 intype:$gradx0, intype:$gradx1,
3529 intype:$grady0, intype:$grady1)),
3530 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3531 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3533 multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3534 NVPTXRegClass intype> {
3535 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3536 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3539 defm TEX_UNIFIED_2D_F32_F32_GRAD
3540 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3541 defm TEX_UNIFIED_2D_S32_F32_GRAD
3542 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3543 defm TEX_UNIFIED_2D_U32_F32_GRAD
3544 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3546 class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3547 NVPTXRegClass intype, dag tex>
3548 : NVPTXInst<(outs outtype:$r, outtype:$g,
3549 outtype:$b, outtype:$a),
3550 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3551 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3553 multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3554 NVPTXRegClass intype> {
3555 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3556 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3559 defm TEX_UNIFIED_2D_ARRAY_F32_S32
3560 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3561 defm TEX_UNIFIED_2D_ARRAY_F32_F32
3562 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3563 defm TEX_UNIFIED_2D_ARRAY_S32_S32
3564 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3565 defm TEX_UNIFIED_2D_ARRAY_S32_F32
3566 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3567 defm TEX_UNIFIED_2D_ARRAY_U32_S32
3568 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3569 defm TEX_UNIFIED_2D_ARRAY_U32_F32
3570 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3572 class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3573 NVPTXRegClass intype, dag tex>
3574 : NVPTXInst<(outs outtype:$r, outtype:$g,
3575 outtype:$b, outtype:$a),
3576 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3578 inst # " \t\\{$r, $g, $b, $a\\},"
3579 " [$t, \\{$l, $x, $y, $y\\}], $lod;",
3581 multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3582 NVPTXRegClass intype> {
3583 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3584 (ins Int64Regs:$t)>;
3585 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3589 defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3590 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3591 Float32Regs, Float32Regs>;
3592 defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3593 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3594 Int32Regs, Float32Regs>;
3595 defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3596 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3597 Int32Regs, Float32Regs>;
3599 class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3600 NVPTXRegClass intype, dag tex>
3601 : NVPTXInst<(outs outtype:$r, outtype:$g,
3602 outtype:$b, outtype:$a),
3603 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3604 intype:$gradx0, intype:$gradx1,
3605 intype:$grady0, intype:$grady1)),
3606 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3607 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3609 multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3610 NVPTXRegClass intype> {
3611 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3612 (ins Int64Regs:$t)>;
3613 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3617 defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3618 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3619 Float32Regs, Float32Regs>;
3620 defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3621 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3622 Int32Regs, Float32Regs>;
3623 defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3624 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3625 Int32Regs, Float32Regs>;
3627 class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3628 NVPTXRegClass intype, dag tex>
3629 : NVPTXInst<(outs outtype:$r, outtype:$g,
3630 outtype:$b, outtype:$a),
3631 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3632 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3634 multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3635 NVPTXRegClass intype> {
3636 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3637 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3640 defm TEX_UNIFIED_3D_F32_S32
3641 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3642 defm TEX_UNIFIED_3D_F32_F32
3643 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3644 defm TEX_UNIFIED_3D_S32_S32
3645 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3646 defm TEX_UNIFIED_3D_S32_F32
3647 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3648 defm TEX_UNIFIED_3D_U32_S32
3649 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3650 defm TEX_UNIFIED_3D_U32_F32
3651 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3653 class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3654 NVPTXRegClass intype, dag tex>
3655 : NVPTXInst<(outs outtype:$r, outtype:$g,
3656 outtype:$b, outtype:$a),
3657 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3658 inst # " \t\\{$r, $g, $b, $a\\},"
3659 " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3661 multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3662 NVPTXRegClass intype> {
3663 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3664 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3667 defm TEX_UNIFIED_3D_F32_F32_LEVEL
3668 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3669 defm TEX_UNIFIED_3D_S32_F32_LEVEL
3670 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3671 defm TEX_UNIFIED_3D_U32_F32_LEVEL
3672 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3674 class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3675 NVPTXRegClass intype, dag tex>
3676 : NVPTXInst<(outs outtype:$r, outtype:$g,
3677 outtype:$b, outtype:$a),
3678 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3679 intype:$gradx0, intype:$gradx1,
3680 intype:$gradx2, intype:$grady0,
3681 intype:$grady1, intype:$grady2)),
3682 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3683 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3684 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3686 multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3687 NVPTXRegClass intype> {
3688 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3689 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3692 defm TEX_UNIFIED_3D_F32_F32_GRAD
3693 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3694 defm TEX_UNIFIED_3D_S32_F32_GRAD
3695 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3696 defm TEX_UNIFIED_3D_U32_F32_GRAD
3697 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3699 class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3700 NVPTXRegClass intype, dag tex>
3701 : NVPTXInst<(outs outtype:$r, outtype:$g,
3702 outtype:$b, outtype:$a),
3703 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3704 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3706 multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3707 NVPTXRegClass intype> {
3708 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3709 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3712 defm TEX_UNIFIED_CUBE_F32_F32
3713 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3714 defm TEX_UNIFIED_CUBE_S32_F32
3715 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3716 defm TEX_UNIFIED_CUBE_U32_F32
3717 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3719 class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3720 NVPTXRegClass intype, dag tex>
3721 : NVPTXInst<(outs outtype:$r, outtype:$g,
3722 outtype:$b, outtype:$a),
3723 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3724 inst # " \t\\{$r, $g, $b, $a\\},"
3725 " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3727 multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3728 NVPTXRegClass intype> {
3729 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3730 (ins Int64Regs:$t)>;
3731 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3735 defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3736 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3737 Float32Regs, Float32Regs>;
3738 defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3739 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3740 Int32Regs, Float32Regs>;
3741 defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3742 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3743 Int32Regs, Float32Regs>;
3745 class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3746 NVPTXRegClass intype, dag tex>
3747 : NVPTXInst<(outs outtype:$r, outtype:$g,
3748 outtype:$b, outtype:$a),
3749 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3750 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3752 multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3753 NVPTXRegClass intype> {
3754 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3755 (ins Int64Regs:$t)>;
3756 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3760 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3761 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3762 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3763 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3764 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3765 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3767 class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3768 NVPTXRegClass intype, dag tex>
3769 : NVPTXInst<(outs outtype:$r, outtype:$g,
3770 outtype:$b, outtype:$a),
3771 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3773 inst # " \t\\{$r, $g, $b, $a\\},"
3774 " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3776 multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3777 NVPTXRegClass intype> {
3778 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3779 (ins Int64Regs:$t)>;
3780 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3784 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3785 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3786 Float32Regs, Float32Regs>;
3787 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3788 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3789 Int32Regs, Float32Regs>;
3790 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3791 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3792 Int32Regs, Float32Regs>;
3794 class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
3795 NVPTXRegClass intype, dag tex>
3796 : NVPTXInst<(outs outtype:$r, outtype:$g,
3797 outtype:$b, outtype:$a),
3798 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3799 intype:$gradx0, intype:$gradx1,
3800 intype:$gradx2, intype:$grady0,
3801 intype:$grady1, intype:$grady2)),
3802 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3803 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3804 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3807 multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
3808 NVPTXRegClass intype> {
3809 def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3810 def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3813 defm TEX_UNIFIED_CUBE_F32_F32_GRAD
3814 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3815 defm TEX_UNIFIED_CUBE_S32_F32_GRAD
3816 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3817 defm TEX_UNIFIED_CUBE_U32_F32_GRAD
3818 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3820 class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3821 NVPTXRegClass intype, dag tex>
3822 : NVPTXInst<(outs outtype:$r, outtype:$g,
3823 outtype:$b, outtype:$a),
3824 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3825 intype:$gradx0, intype:$gradx1,
3826 intype:$gradx2, intype:$grady0,
3827 intype:$grady1, intype:$grady2)),
3828 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
3829 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3830 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3832 multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3833 NVPTXRegClass intype> {
3834 def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3835 (ins Int64Regs:$t)>;
3836 def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3840 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
3841 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
3842 Float32Regs, Float32Regs>;
3843 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
3844 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
3845 Int32Regs, Float32Regs>;
3846 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
3847 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
3848 Int32Regs, Float32Regs>;
3850 class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3851 NVPTXRegClass intype, dag tex>
3852 : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3853 outtype:$v2, outtype:$v3),
3854 !con(tex, (ins intype:$x, intype:$y)),
3855 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3857 multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3858 NVPTXRegClass intype> {
3859 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3860 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3863 defm TLD4_UNIFIED_R_2D_F32_F32
3864 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3865 defm TLD4_UNIFIED_G_2D_F32_F32
3866 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3867 defm TLD4_UNIFIED_B_2D_F32_F32
3868 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3869 defm TLD4_UNIFIED_A_2D_F32_F32
3870 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3872 defm TLD4_UNIFIED_R_2D_S32_F32
3873 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3874 defm TLD4_UNIFIED_G_2D_S32_F32
3875 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3876 defm TLD4_UNIFIED_B_2D_S32_F32
3877 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3878 defm TLD4_UNIFIED_A_2D_S32_F32
3879 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3881 defm TLD4_UNIFIED_R_2D_U32_F32
3882 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3883 defm TLD4_UNIFIED_G_2D_U32_F32
3884 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3885 defm TLD4_UNIFIED_B_2D_U32_F32
3886 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3887 defm TLD4_UNIFIED_A_2D_U32_F32
3888 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3894 //=== Surface load instructions
3896 let IsSuld = true in {
3898 class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3899 : NVPTXInst<(outs outtype:$r),
3900 !con(surf, (ins Int32Regs:$x)),
3901 inst # " \\{$r\\}, [$s, \\{$x\\}];",
3903 multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3904 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3905 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3908 defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3909 defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3910 defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3911 defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3913 defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3914 defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3915 defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3916 defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3918 defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3919 defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3920 defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3921 defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3923 class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3924 : NVPTXInst<(outs outtype:$r),
3925 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3926 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3928 multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3929 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3930 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3933 defm SULD_1D_ARRAY_I8_CLAMP
3934 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3935 defm SULD_1D_ARRAY_I16_CLAMP
3936 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3937 defm SULD_1D_ARRAY_I32_CLAMP
3938 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3939 defm SULD_1D_ARRAY_I64_CLAMP
3940 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3942 defm SULD_1D_ARRAY_I8_TRAP
3943 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3944 defm SULD_1D_ARRAY_I16_TRAP
3945 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3946 defm SULD_1D_ARRAY_I32_TRAP
3947 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3948 defm SULD_1D_ARRAY_I64_TRAP
3949 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3951 defm SULD_1D_ARRAY_I8_ZERO
3952 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3953 defm SULD_1D_ARRAY_I16_ZERO
3954 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3955 defm SULD_1D_ARRAY_I32_ZERO
3956 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3957 defm SULD_1D_ARRAY_I64_ZERO
3958 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3960 class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3961 : NVPTXInst<(outs outtype:$r),
3962 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3963 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3965 multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3966 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3967 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3970 defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3971 defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3972 defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3973 defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3975 defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3976 defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3977 defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3978 defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3980 defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3981 defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3982 defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3983 defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3985 class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3986 : NVPTXInst<(outs outtype:$r),
3987 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3988 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3990 multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3991 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3992 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3995 defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3996 defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3997 defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3998 defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
4000 defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
4001 defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
4002 defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
4003 defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
4005 defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
4006 defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
4007 defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
4008 defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
4010 class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
4011 : NVPTXInst<(outs outtype:$r),
4012 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4013 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4015 multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
4016 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
4017 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
4020 defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
4021 defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
4022 defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
4023 defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
4025 defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
4026 defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
4027 defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
4028 defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
4030 defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
4031 defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
4032 defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
4033 defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
4038 class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4039 : NVPTXInst<(outs outtype:$r, outtype:$g),
4040 !con(surf, (ins Int32Regs:$x)),
4041 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
4043 multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
4044 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4045 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
4048 defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
4049 defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
4050 defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
4051 defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
4053 defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
4054 defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
4055 defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
4056 defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
4058 defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
4059 defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
4060 defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
4061 defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
4063 class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4064 : NVPTXInst<(outs outtype:$r, outtype:$g),
4065 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4066 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4068 multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4069 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4070 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4073 defm SULD_1D_ARRAY_V2I8_CLAMP
4074 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
4075 defm SULD_1D_ARRAY_V2I16_CLAMP
4076 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
4077 defm SULD_1D_ARRAY_V2I32_CLAMP
4078 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
4079 defm SULD_1D_ARRAY_V2I64_CLAMP
4080 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
4082 defm SULD_1D_ARRAY_V2I8_TRAP
4083 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
4084 defm SULD_1D_ARRAY_V2I16_TRAP
4085 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
4086 defm SULD_1D_ARRAY_V2I32_TRAP
4087 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4088 defm SULD_1D_ARRAY_V2I64_TRAP
4089 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4091 defm SULD_1D_ARRAY_V2I8_ZERO
4092 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4093 defm SULD_1D_ARRAY_V2I16_ZERO
4094 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4095 defm SULD_1D_ARRAY_V2I32_ZERO
4096 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4097 defm SULD_1D_ARRAY_V2I64_ZERO
4098 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4100 class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4101 : NVPTXInst<(outs outtype:$r, outtype:$g),
4102 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4103 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4105 multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4106 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4107 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4110 defm SULD_2D_V2I8_CLAMP
4111 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4112 defm SULD_2D_V2I16_CLAMP
4113 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4114 defm SULD_2D_V2I32_CLAMP
4115 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4116 defm SULD_2D_V2I64_CLAMP
4117 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4119 defm SULD_2D_V2I8_TRAP
4120 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4121 defm SULD_2D_V2I16_TRAP
4122 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4123 defm SULD_2D_V2I32_TRAP
4124 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4125 defm SULD_2D_V2I64_TRAP
4126 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4128 defm SULD_2D_V2I8_ZERO
4129 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4130 defm SULD_2D_V2I16_ZERO
4131 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4132 defm SULD_2D_V2I32_ZERO
4133 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4134 defm SULD_2D_V2I64_ZERO
4135 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4137 class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4138 : NVPTXInst<(outs outtype:$r, outtype:$g),
4139 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4140 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4142 multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4143 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4144 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4147 defm SULD_2D_ARRAY_V2I8_CLAMP
4148 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4149 defm SULD_2D_ARRAY_V2I16_CLAMP
4150 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4151 defm SULD_2D_ARRAY_V2I32_CLAMP
4152 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4153 defm SULD_2D_ARRAY_V2I64_CLAMP
4154 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4156 defm SULD_2D_ARRAY_V2I8_TRAP
4157 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4158 defm SULD_2D_ARRAY_V2I16_TRAP
4159 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4160 defm SULD_2D_ARRAY_V2I32_TRAP
4161 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4162 defm SULD_2D_ARRAY_V2I64_TRAP
4163 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4165 defm SULD_2D_ARRAY_V2I8_ZERO
4166 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4167 defm SULD_2D_ARRAY_V2I16_ZERO
4168 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4169 defm SULD_2D_ARRAY_V2I32_ZERO
4170 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4171 defm SULD_2D_ARRAY_V2I64_ZERO
4172 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4174 class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4175 : NVPTXInst<(outs outtype:$r, outtype:$g),
4176 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4177 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4179 multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4180 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4181 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4184 defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4185 defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4186 defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4187 defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4189 defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4190 defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4191 defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4192 defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4194 defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4195 defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4196 defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4197 defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4203 class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4204 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4205 !con(surf, (ins Int32Regs:$x)),
4206 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4208 multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4209 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4210 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4213 defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4214 defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4215 defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4217 defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4218 defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4219 defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4221 defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4222 defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4223 defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4225 class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4226 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4227 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4228 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4230 multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4231 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4232 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4235 defm SULD_1D_ARRAY_V4I8_CLAMP
4236 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4237 defm SULD_1D_ARRAY_V4I16_CLAMP
4238 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4239 defm SULD_1D_ARRAY_V4I32_CLAMP
4240 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4242 defm SULD_1D_ARRAY_V4I8_TRAP
4243 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4244 defm SULD_1D_ARRAY_V4I16_TRAP
4245 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4246 defm SULD_1D_ARRAY_V4I32_TRAP
4247 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4249 defm SULD_1D_ARRAY_V4I8_ZERO
4250 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4251 defm SULD_1D_ARRAY_V4I16_ZERO
4252 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4253 defm SULD_1D_ARRAY_V4I32_ZERO
4254 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4256 class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4257 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4258 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4259 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4261 multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4262 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4263 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4266 defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4267 defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4268 defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4270 defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4271 defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4272 defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4274 defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4275 defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4276 defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4278 class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4279 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4280 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4281 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4283 multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4284 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4285 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4288 defm SULD_2D_ARRAY_V4I8_CLAMP
4289 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4290 defm SULD_2D_ARRAY_V4I16_CLAMP
4291 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4292 defm SULD_2D_ARRAY_V4I32_CLAMP
4293 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4295 defm SULD_2D_ARRAY_V4I8_TRAP
4296 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4297 defm SULD_2D_ARRAY_V4I16_TRAP
4298 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4299 defm SULD_2D_ARRAY_V4I32_TRAP
4300 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4302 defm SULD_2D_ARRAY_V4I8_ZERO
4303 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4304 defm SULD_2D_ARRAY_V4I16_ZERO
4305 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4306 defm SULD_2D_ARRAY_V4I32_ZERO
4307 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4309 class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4310 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4311 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4312 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4314 multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4315 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4316 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4319 defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4320 defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4321 defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4323 defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4324 defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4325 defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4327 defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4328 defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4329 defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4333 //-----------------------------------
4334 // Texture Query Intrinsics
4335 //-----------------------------------
4337 let IsSurfTexQuery = true in {
4338 def TXQ_CHANNEL_ORDER_R
4339 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4340 "txq.channel_order.b32 \t$d, [$a];",
4342 def TXQ_CHANNEL_ORDER_I
4343 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4344 "txq.channel_order.b32 \t$d, [$a];",
4346 def TXQ_CHANNEL_DATA_TYPE_R
4347 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4348 "txq.channel_data_type.b32 \t$d, [$a];",
4350 def TXQ_CHANNEL_DATA_TYPE_I
4351 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4352 "txq.channel_data_type.b32 \t$d, [$a];",
4355 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4356 "txq.width.b32 \t$d, [$a];",
4359 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4360 "txq.width.b32 \t$d, [$a];",
4363 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4364 "txq.height.b32 \t$d, [$a];",
4367 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4368 "txq.height.b32 \t$d, [$a];",
4371 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4372 "txq.depth.b32 \t$d, [$a];",
4375 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4376 "txq.depth.b32 \t$d, [$a];",
4378 def TXQ_ARRAY_SIZE_R
4379 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4380 "txq.array_size.b32 \t$d, [$a];",
4382 def TXQ_ARRAY_SIZE_I
4383 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4384 "txq.array_size.b32 \t$d, [$a];",
4386 def TXQ_NUM_SAMPLES_R
4387 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4388 "txq.num_samples.b32 \t$d, [$a];",
4390 def TXQ_NUM_SAMPLES_I
4391 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4392 "txq.num_samples.b32 \t$d, [$a];",
4394 def TXQ_NUM_MIPMAP_LEVELS_R
4395 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4396 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4398 def TXQ_NUM_MIPMAP_LEVELS_I
4399 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4400 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4404 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4405 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4406 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4407 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4408 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4409 (TXQ_WIDTH_R Int64Regs:$a)>;
4410 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4411 (TXQ_HEIGHT_R Int64Regs:$a)>;
4412 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4413 (TXQ_DEPTH_R Int64Regs:$a)>;
4414 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4415 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4416 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4417 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4418 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4419 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4422 //-----------------------------------
4423 // Surface Query Intrinsics
4424 //-----------------------------------
4426 let IsSurfTexQuery = true in {
4427 def SUQ_CHANNEL_ORDER_R
4428 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4429 "suq.channel_order.b32 \t$d, [$a];",
4431 def SUQ_CHANNEL_ORDER_I
4432 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4433 "suq.channel_order.b32 \t$d, [$a];",
4435 def SUQ_CHANNEL_DATA_TYPE_R
4436 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4437 "suq.channel_data_type.b32 \t$d, [$a];",
4439 def SUQ_CHANNEL_DATA_TYPE_I
4440 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4441 "suq.channel_data_type.b32 \t$d, [$a];",
4444 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4445 "suq.width.b32 \t$d, [$a];",
4448 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4449 "suq.width.b32 \t$d, [$a];",
4452 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4453 "suq.height.b32 \t$d, [$a];",
4456 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4457 "suq.height.b32 \t$d, [$a];",
4460 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4461 "suq.depth.b32 \t$d, [$a];",
4464 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4465 "suq.depth.b32 \t$d, [$a];",
4467 def SUQ_ARRAY_SIZE_R
4468 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4469 "suq.array_size.b32 \t$d, [$a];",
4471 def SUQ_ARRAY_SIZE_I
4472 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4473 "suq.array_size.b32 \t$d, [$a];",
4477 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4478 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4479 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4480 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4481 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4482 (SUQ_WIDTH_R Int64Regs:$a)>;
4483 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4484 (SUQ_HEIGHT_R Int64Regs:$a)>;
4485 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4486 (SUQ_DEPTH_R Int64Regs:$a)>;
4487 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4488 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4491 //===- Handle Query -------------------------------------------------------===//
4493 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4495 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4496 "istypep.samplerref \t$d, $a;",
4497 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4499 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4500 "istypep.surfref \t$d, $a;",
4501 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4503 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4504 "istypep.texref \t$d, $a;",
4505 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4507 //===- Surface Stores -----------------------------------------------------===//
4509 let IsSust = true in {
4511 class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4513 !con(surf, (ins Int32Regs:$x, intype:$r)),
4514 inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4516 multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4517 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4518 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4521 defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4522 defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4523 defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4524 defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4526 defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4527 defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4528 defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4529 defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4531 defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4532 defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4533 defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4534 defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4536 defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4537 defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4538 defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4540 class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4542 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4543 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4545 multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4546 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4547 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4550 defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4551 defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4552 defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4553 defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4555 defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4556 defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4557 defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4558 defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4560 defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4561 defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4562 defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4563 defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4565 defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4566 defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4567 defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4569 class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4571 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4572 intype:$b, intype:$a)),
4573 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4575 multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4576 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4577 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4580 defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4581 defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4582 defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4584 defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4585 defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4586 defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4588 defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4589 defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4590 defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4592 defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4593 defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4594 defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4596 class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4598 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4599 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4601 multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4602 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4603 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4606 defm SUST_B_1D_ARRAY_B8_CLAMP
4607 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4608 defm SUST_B_1D_ARRAY_B16_CLAMP
4609 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4610 defm SUST_B_1D_ARRAY_B32_CLAMP
4611 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4612 defm SUST_B_1D_ARRAY_B64_CLAMP
4613 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4615 defm SUST_B_1D_ARRAY_B8_TRAP
4616 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4617 defm SUST_B_1D_ARRAY_B16_TRAP
4618 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4619 defm SUST_B_1D_ARRAY_B32_TRAP
4620 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4621 defm SUST_B_1D_ARRAY_B64_TRAP
4622 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4624 defm SUST_B_1D_ARRAY_B8_ZERO
4625 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4626 defm SUST_B_1D_ARRAY_B16_ZERO
4627 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4628 defm SUST_B_1D_ARRAY_B32_ZERO
4629 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4630 defm SUST_B_1D_ARRAY_B64_ZERO
4631 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4633 defm SUST_P_1D_ARRAY_B8_TRAP
4634 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4635 defm SUST_P_1D_ARRAY_B16_TRAP
4636 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4637 defm SUST_P_1D_ARRAY_B32_TRAP
4638 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4640 class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4642 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4643 intype:$r, intype:$g)),
4644 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4646 multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4647 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4648 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4651 defm SUST_B_1D_ARRAY_V2B8_CLAMP
4652 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4653 defm SUST_B_1D_ARRAY_V2B16_CLAMP
4654 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4655 defm SUST_B_1D_ARRAY_V2B32_CLAMP
4656 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4657 defm SUST_B_1D_ARRAY_V2B64_CLAMP
4658 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4660 defm SUST_B_1D_ARRAY_V2B8_TRAP
4661 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4662 defm SUST_B_1D_ARRAY_V2B16_TRAP
4663 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4664 defm SUST_B_1D_ARRAY_V2B32_TRAP
4665 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4666 defm SUST_B_1D_ARRAY_V2B64_TRAP
4667 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4669 defm SUST_B_1D_ARRAY_V2B8_ZERO
4670 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4671 defm SUST_B_1D_ARRAY_V2B16_ZERO
4672 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4673 defm SUST_B_1D_ARRAY_V2B32_ZERO
4674 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4675 defm SUST_B_1D_ARRAY_V2B64_ZERO
4676 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4678 defm SUST_P_1D_ARRAY_V2B8_TRAP
4679 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4680 defm SUST_P_1D_ARRAY_V2B16_TRAP
4681 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4682 defm SUST_P_1D_ARRAY_V2B32_TRAP
4683 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4685 class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4687 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4688 intype:$r, intype:$g, intype:$b, intype:$a)),
4689 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4691 multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4692 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4693 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4696 defm SUST_B_1D_ARRAY_V4B8_CLAMP
4697 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4698 defm SUST_B_1D_ARRAY_V4B16_CLAMP
4699 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4700 defm SUST_B_1D_ARRAY_V4B32_CLAMP
4701 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4703 defm SUST_B_1D_ARRAY_V4B8_TRAP
4704 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4705 defm SUST_B_1D_ARRAY_V4B16_TRAP
4706 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4707 defm SUST_B_1D_ARRAY_V4B32_TRAP
4708 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4710 defm SUST_B_1D_ARRAY_V4B8_ZERO
4711 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4712 defm SUST_B_1D_ARRAY_V4B16_ZERO
4713 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4714 defm SUST_B_1D_ARRAY_V4B32_ZERO
4715 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4717 defm SUST_P_1D_ARRAY_V4B8_TRAP
4718 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4719 defm SUST_P_1D_ARRAY_V4B16_TRAP
4720 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4721 defm SUST_P_1D_ARRAY_V4B32_TRAP
4722 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4724 class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4726 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4727 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4729 multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4730 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4731 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4734 defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4735 defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4736 defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4737 defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4739 defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4740 defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4741 defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4742 defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4744 defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4745 defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4746 defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4747 defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4749 defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4750 defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4751 defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4753 class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4755 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4756 intype:$r, intype:$g)),
4757 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4759 multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4760 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4761 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4764 defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4765 defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4766 defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4767 defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4769 defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4770 defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4771 defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4772 defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4774 defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4775 defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4776 defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4777 defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4779 defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4780 defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4781 defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4783 class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4785 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4786 intype:$r, intype:$g, intype:$b, intype:$a)),
4787 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4789 multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4790 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4791 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4794 defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4795 defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4796 defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4798 defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4799 defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4800 defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4802 defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4803 defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4804 defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4806 defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4807 defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4808 defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4810 class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4812 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4814 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4816 multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4817 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4818 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4821 defm SUST_B_2D_ARRAY_B8_CLAMP
4822 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4823 defm SUST_B_2D_ARRAY_B16_CLAMP
4824 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4825 defm SUST_B_2D_ARRAY_B32_CLAMP
4826 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4827 defm SUST_B_2D_ARRAY_B64_CLAMP
4828 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4830 defm SUST_B_2D_ARRAY_B8_TRAP
4831 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4832 defm SUST_B_2D_ARRAY_B16_TRAP
4833 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4834 defm SUST_B_2D_ARRAY_B32_TRAP
4835 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4836 defm SUST_B_2D_ARRAY_B64_TRAP
4837 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4839 defm SUST_B_2D_ARRAY_B8_ZERO
4840 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4841 defm SUST_B_2D_ARRAY_B16_ZERO
4842 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4843 defm SUST_B_2D_ARRAY_B32_ZERO
4844 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4845 defm SUST_B_2D_ARRAY_B64_ZERO
4846 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4848 defm SUST_P_2D_ARRAY_B8_TRAP
4849 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4850 defm SUST_P_2D_ARRAY_B16_TRAP
4851 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4852 defm SUST_P_2D_ARRAY_B32_TRAP
4853 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4855 class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4857 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4858 intype:$r, intype:$g)),
4859 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4861 multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4862 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4863 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4866 defm SUST_B_2D_ARRAY_V2B8_CLAMP
4867 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4868 defm SUST_B_2D_ARRAY_V2B16_CLAMP
4869 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4870 defm SUST_B_2D_ARRAY_V2B32_CLAMP
4871 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4872 defm SUST_B_2D_ARRAY_V2B64_CLAMP
4873 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4875 defm SUST_B_2D_ARRAY_V2B8_TRAP
4876 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4877 defm SUST_B_2D_ARRAY_V2B16_TRAP
4878 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4879 defm SUST_B_2D_ARRAY_V2B32_TRAP
4880 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4881 defm SUST_B_2D_ARRAY_V2B64_TRAP
4882 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4884 defm SUST_B_2D_ARRAY_V2B8_ZERO
4885 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4886 defm SUST_B_2D_ARRAY_V2B16_ZERO
4887 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4888 defm SUST_B_2D_ARRAY_V2B32_ZERO
4889 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4890 defm SUST_B_2D_ARRAY_V2B64_ZERO
4891 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4893 defm SUST_P_2D_ARRAY_V2B8_TRAP
4894 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4895 defm SUST_P_2D_ARRAY_V2B16_TRAP
4896 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4897 defm SUST_P_2D_ARRAY_V2B32_TRAP
4898 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4900 class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4902 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4903 intype:$r, intype:$g, intype:$b, intype:$a)),
4904 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4906 multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4907 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4908 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4911 defm SUST_B_2D_ARRAY_V4B8_CLAMP
4912 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4913 defm SUST_B_2D_ARRAY_V4B16_CLAMP
4914 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4915 defm SUST_B_2D_ARRAY_V4B32_CLAMP
4916 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4918 defm SUST_B_2D_ARRAY_V4B8_TRAP
4919 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4920 defm SUST_B_2D_ARRAY_V4B16_TRAP
4921 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4922 defm SUST_B_2D_ARRAY_V4B32_TRAP
4923 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4925 defm SUST_B_2D_ARRAY_V4B8_ZERO
4926 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4927 defm SUST_B_2D_ARRAY_V4B16_ZERO
4928 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4929 defm SUST_B_2D_ARRAY_V4B32_ZERO
4930 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4932 defm SUST_P_2D_ARRAY_V4B8_TRAP
4933 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4934 defm SUST_P_2D_ARRAY_V4B16_TRAP
4935 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4936 defm SUST_P_2D_ARRAY_V4B32_TRAP
4937 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4939 class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4941 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4943 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4945 multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4946 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4947 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4950 defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4951 defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4952 defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4953 defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4955 defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4956 defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4957 defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4958 defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4960 defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4961 defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4962 defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4963 defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4965 defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4966 defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4967 defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4969 class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4971 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4972 intype:$r, intype:$g)),
4973 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4975 multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4976 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4977 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4980 defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4981 defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4982 defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4983 defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4985 defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4986 defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4987 defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4988 defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4990 defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4991 defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4992 defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4993 defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4995 defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4996 defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4997 defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4999 class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5001 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5002 intype:$r, intype:$g, intype:$b, intype:$a)),
5003 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
5005 multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
5006 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5007 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
5010 defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
5011 defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
5012 defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
5014 defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
5015 defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
5016 defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
5018 defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
5019 defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
5020 defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
5022 defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
5023 defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
5024 defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
5028 // Surface store instruction patterns
5029 // I'm not sure why we can't just include these in the instruction definitions,
5030 // but TableGen complains of type errors :(
5033 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5034 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5035 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5037 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5038 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5039 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5041 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5042 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5043 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5045 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5046 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5047 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5049 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5050 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5051 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5052 Int16Regs:$r, Int16Regs:$g)>;
5054 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5055 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5056 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5057 Int16Regs:$r, Int16Regs:$g)>;
5059 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5060 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5061 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5062 Int32Regs:$r, Int32Regs:$g)>;
5064 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5065 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5066 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5067 Int64Regs:$r, Int64Regs:$g)>;
5069 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5070 Int64Regs:$s, Int32Regs:$x,
5071 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5072 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5073 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5075 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5076 Int64Regs:$s, Int32Regs:$x,
5077 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5078 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5079 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5081 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5082 Int64Regs:$s, Int32Regs:$x,
5083 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5084 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5085 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5089 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5090 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5091 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5094 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5095 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5096 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5099 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5100 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5101 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5104 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5105 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5106 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5109 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5110 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5111 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5112 Int16Regs:$r, Int16Regs:$g)>;
5114 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5115 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5116 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5117 Int16Regs:$r, Int16Regs:$g)>;
5119 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5120 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5121 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5122 Int32Regs:$r, Int32Regs:$g)>;
5124 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5125 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5126 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5127 Int64Regs:$r, Int64Regs:$g)>;
5129 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5130 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5131 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5132 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5133 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5135 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5136 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5137 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5138 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5139 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5141 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5143 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5144 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5145 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5149 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5150 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5151 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5154 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5155 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5156 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5159 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5160 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5161 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5164 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5165 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5166 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5169 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5170 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5171 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5172 Int16Regs:$r, Int16Regs:$g)>;
5174 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5175 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5176 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5177 Int16Regs:$r, Int16Regs:$g)>;
5179 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5180 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5181 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5182 Int32Regs:$r, Int32Regs:$g)>;
5184 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5185 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5186 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5187 Int64Regs:$r, Int64Regs:$g)>;
5189 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5190 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5191 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5192 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5193 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5195 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5196 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5197 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5198 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5199 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5201 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5202 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5203 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5204 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5205 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5209 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5210 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5211 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5212 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5215 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5216 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5217 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5218 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5221 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5222 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5223 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5224 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5227 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5228 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5229 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5230 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5233 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5234 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5235 Int16Regs:$r, Int16Regs:$g),
5236 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5237 Int32Regs:$x, Int32Regs:$y,
5238 Int16Regs:$r, Int16Regs:$g)>;
5240 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5241 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5242 Int16Regs:$r, Int16Regs:$g),
5243 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5244 Int32Regs:$x, Int32Regs:$y,
5245 Int16Regs:$r, Int16Regs:$g)>;
5247 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5248 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5250 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5251 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5253 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5254 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5256 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5257 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5259 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5260 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5261 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5262 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5263 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5264 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5266 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5267 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5268 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5269 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5270 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5271 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5273 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5274 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5275 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5276 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5277 Int32Regs:$x, Int32Regs:$y,
5278 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5282 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5283 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5285 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5286 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5289 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5290 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5292 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5293 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5296 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5297 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5299 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5300 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5303 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5304 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5306 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5307 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5310 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5311 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5312 Int16Regs:$r, Int16Regs:$g),
5313 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5314 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5315 Int16Regs:$r, Int16Regs:$g)>;
5317 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5318 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5319 Int16Regs:$r, Int16Regs:$g),
5320 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5321 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5322 Int16Regs:$r, Int16Regs:$g)>;
5324 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5325 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5326 Int32Regs:$r, Int32Regs:$g),
5327 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5328 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5329 Int32Regs:$r, Int32Regs:$g)>;
5331 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5332 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5333 Int64Regs:$r, Int64Regs:$g),
5334 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5335 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5336 Int64Regs:$r, Int64Regs:$g)>;
5338 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5339 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5340 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5341 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5342 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5343 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5345 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5346 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5347 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5348 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5349 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5350 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5352 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5353 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5354 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5355 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5356 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5357 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5361 def : Pat<(int_nvvm_sust_b_1d_i8_trap
5362 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5363 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5365 def : Pat<(int_nvvm_sust_b_1d_i16_trap
5366 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5367 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5369 def : Pat<(int_nvvm_sust_b_1d_i32_trap
5370 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5371 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5373 def : Pat<(int_nvvm_sust_b_1d_i64_trap
5374 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5375 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5377 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5378 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5379 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5380 Int16Regs:$r, Int16Regs:$g)>;
5382 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5383 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5384 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5385 Int16Regs:$r, Int16Regs:$g)>;
5387 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5388 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5389 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5390 Int32Regs:$r, Int32Regs:$g)>;
5392 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5393 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5394 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5395 Int64Regs:$r, Int64Regs:$g)>;
5397 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5398 Int64Regs:$s, Int32Regs:$x,
5399 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5400 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5401 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5403 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5404 Int64Regs:$s, Int32Regs:$x,
5405 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5406 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5407 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5409 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5410 Int64Regs:$s, Int32Regs:$x,
5411 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5412 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5413 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5417 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5418 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5419 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5422 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5423 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5424 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5427 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5428 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5429 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5432 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5433 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5434 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5437 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5438 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5439 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5440 Int16Regs:$r, Int16Regs:$g)>;
5442 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5443 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5444 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5445 Int16Regs:$r, Int16Regs:$g)>;
5447 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5448 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5449 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5450 Int32Regs:$r, Int32Regs:$g)>;
5452 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5453 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5454 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5455 Int64Regs:$r, Int64Regs:$g)>;
5457 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5458 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5459 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5460 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5461 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5463 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5464 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5465 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5466 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5467 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5469 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5470 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5471 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5472 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5473 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5477 def : Pat<(int_nvvm_sust_b_2d_i8_trap
5478 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5479 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5482 def : Pat<(int_nvvm_sust_b_2d_i16_trap
5483 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5484 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5487 def : Pat<(int_nvvm_sust_b_2d_i32_trap
5488 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5489 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5492 def : Pat<(int_nvvm_sust_b_2d_i64_trap
5493 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5494 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5497 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5498 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5499 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5500 Int16Regs:$r, Int16Regs:$g)>;
5502 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5503 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5504 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5505 Int16Regs:$r, Int16Regs:$g)>;
5507 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5508 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5509 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5510 Int32Regs:$r, Int32Regs:$g)>;
5512 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5513 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5514 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5515 Int64Regs:$r, Int64Regs:$g)>;
5517 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5518 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5519 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5520 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5521 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5523 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5524 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5525 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5526 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5527 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5529 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5530 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5531 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5532 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5533 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5537 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5538 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5539 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5540 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5543 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5544 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5545 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5546 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5549 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5550 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5551 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5552 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5555 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5556 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5557 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5558 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5561 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5562 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5563 Int16Regs:$r, Int16Regs:$g),
5564 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5565 Int32Regs:$x, Int32Regs:$y,
5566 Int16Regs:$r, Int16Regs:$g)>;
5568 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5569 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5570 Int16Regs:$r, Int16Regs:$g),
5571 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5572 Int32Regs:$x, Int32Regs:$y,
5573 Int16Regs:$r, Int16Regs:$g)>;
5575 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5576 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5578 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5579 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5581 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5582 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5584 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5585 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5587 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5588 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5589 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5590 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5591 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5592 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5594 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5595 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5596 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5597 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5598 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5599 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5601 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5602 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5603 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5604 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5605 Int32Regs:$x, Int32Regs:$y,
5606 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5610 def : Pat<(int_nvvm_sust_b_3d_i8_trap
5611 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5613 (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5614 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5617 def : Pat<(int_nvvm_sust_b_3d_i16_trap
5618 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5620 (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5621 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5624 def : Pat<(int_nvvm_sust_b_3d_i32_trap
5625 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5627 (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5628 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5631 def : Pat<(int_nvvm_sust_b_3d_i64_trap
5632 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5634 (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5635 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5638 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5639 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5640 Int16Regs:$r, Int16Regs:$g),
5641 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5642 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5643 Int16Regs:$r, Int16Regs:$g)>;
5645 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5646 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5647 Int16Regs:$r, Int16Regs:$g),
5648 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5649 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5650 Int16Regs:$r, Int16Regs:$g)>;
5652 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5653 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5654 Int32Regs:$r, Int32Regs:$g),
5655 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5656 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5657 Int32Regs:$r, Int32Regs:$g)>;
5659 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5660 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5661 Int64Regs:$r, Int64Regs:$g),
5662 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5663 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5664 Int64Regs:$r, Int64Regs:$g)>;
5666 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5667 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5668 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5669 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5670 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5671 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5673 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5674 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5675 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5676 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5677 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5678 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5680 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5681 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5682 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5683 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5684 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5685 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5689 def : Pat<(int_nvvm_sust_b_1d_i8_zero
5690 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5691 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5693 def : Pat<(int_nvvm_sust_b_1d_i16_zero
5694 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5695 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5697 def : Pat<(int_nvvm_sust_b_1d_i32_zero
5698 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5699 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5701 def : Pat<(int_nvvm_sust_b_1d_i64_zero
5702 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5703 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5705 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5706 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5707 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5708 Int16Regs:$r, Int16Regs:$g)>;
5710 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5711 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5712 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5713 Int16Regs:$r, Int16Regs:$g)>;
5715 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5716 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5717 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5718 Int32Regs:$r, Int32Regs:$g)>;
5720 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5721 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5722 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5723 Int64Regs:$r, Int64Regs:$g)>;
5725 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5726 Int64Regs:$s, Int32Regs:$x,
5727 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5728 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5729 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5731 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5732 Int64Regs:$s, Int32Regs:$x,
5733 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5734 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5735 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5737 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5738 Int64Regs:$s, Int32Regs:$x,
5739 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5740 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5741 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5745 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5746 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5747 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5750 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5751 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5752 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5755 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5756 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5757 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5760 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5761 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5762 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5765 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5766 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5767 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5768 Int16Regs:$r, Int16Regs:$g)>;
5770 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5771 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5772 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5773 Int16Regs:$r, Int16Regs:$g)>;
5775 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5776 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5777 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5778 Int32Regs:$r, Int32Regs:$g)>;
5780 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5781 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5782 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5783 Int64Regs:$r, Int64Regs:$g)>;
5785 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5786 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5787 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5788 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5789 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5791 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5792 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5793 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5794 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5795 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5797 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5798 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5799 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5800 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5801 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5805 def : Pat<(int_nvvm_sust_b_2d_i8_zero
5806 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5807 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5810 def : Pat<(int_nvvm_sust_b_2d_i16_zero
5811 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5812 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5815 def : Pat<(int_nvvm_sust_b_2d_i32_zero
5816 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5817 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5820 def : Pat<(int_nvvm_sust_b_2d_i64_zero
5821 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5822 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5825 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5826 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5827 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5828 Int16Regs:$r, Int16Regs:$g)>;
5830 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5831 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5832 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5833 Int16Regs:$r, Int16Regs:$g)>;
5835 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5836 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5837 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5838 Int32Regs:$r, Int32Regs:$g)>;
5840 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5841 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5842 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5843 Int64Regs:$r, Int64Regs:$g)>;
5845 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5846 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5847 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5848 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5849 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5851 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5852 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5853 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5854 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5855 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5857 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5858 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5859 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5860 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5861 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5865 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5866 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5867 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5868 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5871 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5872 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5874 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5877 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5878 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5879 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5880 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5883 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5884 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5885 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5886 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5889 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5890 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5891 Int16Regs:$r, Int16Regs:$g),
5892 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5893 Int32Regs:$x, Int32Regs:$y,
5894 Int16Regs:$r, Int16Regs:$g)>;
5896 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5897 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5898 Int16Regs:$r, Int16Regs:$g),
5899 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5900 Int32Regs:$x, Int32Regs:$y,
5901 Int16Regs:$r, Int16Regs:$g)>;
5903 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5904 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5906 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5907 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5909 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5910 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5912 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5913 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5915 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5916 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5917 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5918 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5919 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5920 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5922 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5923 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5924 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5925 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5926 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5927 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5929 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5930 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5931 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5932 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5933 Int32Regs:$x, Int32Regs:$y,
5934 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5938 def : Pat<(int_nvvm_sust_b_3d_i8_zero
5939 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5941 (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5942 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5945 def : Pat<(int_nvvm_sust_b_3d_i16_zero
5946 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5948 (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5949 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5952 def : Pat<(int_nvvm_sust_b_3d_i32_zero
5953 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5955 (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5956 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5959 def : Pat<(int_nvvm_sust_b_3d_i64_zero
5960 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5962 (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5963 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5966 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5967 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5968 Int16Regs:$r, Int16Regs:$g),
5969 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5970 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5971 Int16Regs:$r, Int16Regs:$g)>;
5973 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5974 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5975 Int16Regs:$r, Int16Regs:$g),
5976 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5977 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5978 Int16Regs:$r, Int16Regs:$g)>;
5980 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5981 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5982 Int32Regs:$r, Int32Regs:$g),
5983 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5984 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5985 Int32Regs:$r, Int32Regs:$g)>;
5987 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5988 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5989 Int64Regs:$r, Int64Regs:$g),
5990 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5991 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5992 Int64Regs:$r, Int64Regs:$g)>;
5994 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5995 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5996 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5997 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5998 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5999 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6001 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6002 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6003 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6004 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
6005 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6006 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6008 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6009 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6010 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6011 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
6012 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6013 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6018 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6019 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6020 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6022 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6023 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6024 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6026 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6027 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6028 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6030 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6031 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6032 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6033 Int16Regs:$r, Int16Regs:$g)>;
6035 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6036 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6037 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6038 Int16Regs:$r, Int16Regs:$g)>;
6040 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6041 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6042 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6043 Int32Regs:$r, Int32Regs:$g)>;
6045 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6046 Int64Regs:$s, Int32Regs:$x,
6047 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6048 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6049 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6051 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6052 Int64Regs:$s, Int32Regs:$x,
6053 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6054 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6055 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6057 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6058 Int64Regs:$s, Int32Regs:$x,
6059 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6060 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6061 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6065 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6066 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6067 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6070 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6071 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6072 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6075 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6076 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6077 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6080 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6081 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6082 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6083 Int16Regs:$r, Int16Regs:$g)>;
6085 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6086 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6087 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6088 Int16Regs:$r, Int16Regs:$g)>;
6090 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6091 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6092 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6093 Int32Regs:$r, Int32Regs:$g)>;
6095 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6096 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6097 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6098 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6099 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6101 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6103 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6104 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6105 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6107 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6110 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6111 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6115 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6116 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6117 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6120 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6121 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6122 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6125 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6126 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6127 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6130 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6131 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6132 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6133 Int16Regs:$r, Int16Regs:$g)>;
6135 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6136 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6137 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6138 Int16Regs:$r, Int16Regs:$g)>;
6140 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6141 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6142 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6143 Int32Regs:$r, Int32Regs:$g)>;
6145 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6146 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6147 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6148 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6149 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6151 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6153 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6154 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6155 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6157 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6158 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6159 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6160 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6161 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6165 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6166 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6167 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6168 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6171 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6172 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6173 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6174 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6177 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6178 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6179 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6180 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6183 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6184 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6185 Int16Regs:$r, Int16Regs:$g),
6186 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6187 Int32Regs:$x, Int32Regs:$y,
6188 Int16Regs:$r, Int16Regs:$g)>;
6190 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6191 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6192 Int16Regs:$r, Int16Regs:$g),
6193 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6194 Int32Regs:$x, Int32Regs:$y,
6195 Int16Regs:$r, Int16Regs:$g)>;
6197 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6198 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6200 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6201 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6203 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6204 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6205 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6206 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6207 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6208 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6210 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6211 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6212 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6213 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6214 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6215 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6217 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6218 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6219 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6220 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6221 Int32Regs:$x, Int32Regs:$y,
6222 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6226 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6227 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6229 (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6230 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6233 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6234 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6236 (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6237 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6240 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6241 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6243 (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6244 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6247 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6248 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6249 Int16Regs:$r, Int16Regs:$g),
6250 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6251 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6252 Int16Regs:$r, Int16Regs:$g)>;
6254 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6255 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6256 Int16Regs:$r, Int16Regs:$g),
6257 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6258 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6259 Int16Regs:$r, Int16Regs:$g)>;
6261 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6262 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6263 Int32Regs:$r, Int32Regs:$g),
6264 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6265 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6266 Int32Regs:$r, Int32Regs:$g)>;
6268 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6269 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6270 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6271 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6272 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6273 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6275 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6276 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6277 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6278 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6279 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6280 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6282 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6283 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6284 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6285 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6286 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6287 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6289 //-----------------------------------
6290 // Read Special Registers
6291 //-----------------------------------
6293 class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6294 : NVPTXInst<(outs Int64Regs:$d), (ins),
6295 !strconcat("mov.u64 \t$d, %", regname, ";"),
6296 [(set Int64Regs:$d, (intop))]>,
6299 class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6300 : NVPTXInst<(outs Int32Regs:$d), (ins),
6301 !strconcat("mov.u32 \t$d, %", regname, ";"),
6302 [(set Int32Regs:$d, (intop))]>,
6305 multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6306 foreach suffix = ["x", "y", "z", "w"] in {
6307 defvar reg = regname # "." # suffix;
6308 defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6309 def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>;
6313 // TODO Add read vector-version of special registers
6315 defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">;
6316 defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">;
6317 defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6318 defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6320 defm INT_PTX_SREG_CLUSTERID :
6321 PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6322 defm INT_PTX_SREG_NCLUSTERID :
6323 PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6324 defm INT_PTX_SREG_CLUSTER_CTAID :
6325 PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6326 defm INT_PTX_SREG_CLUSTER_NCTAID:
6327 PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6329 def INT_PTX_SREG_CLUSTER_CTARANK :
6330 PTX_READ_SREG_R32<"cluster_ctarank",
6331 int_nvvm_read_ptx_sreg_cluster_ctarank,
6332 [hasSM<90>, hasPTX<78>]>;
6333 def INT_PTX_SREG_CLUSTER_NCTARANK:
6334 PTX_READ_SREG_R32<"cluster_nctarank",
6335 int_nvvm_read_ptx_sreg_cluster_nctarank,
6336 [hasSM<90>, hasPTX<78>]>;
6339 def INT_PTX_SREG_LANEID :
6340 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6341 def INT_PTX_SREG_WARPID :
6342 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6343 def INT_PTX_SREG_NWARPID :
6344 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6345 def INT_PTX_SREG_SMID :
6346 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6347 def INT_PTX_SREG_NSMID :
6348 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6349 def INT_PTX_SREG_GRIDID :
6350 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6352 def INT_PTX_SREG_LANEMASK_EQ :
6353 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6354 def INT_PTX_SREG_LANEMASK_LE :
6355 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6356 def INT_PTX_SREG_LANEMASK_LT :
6357 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6358 def INT_PTX_SREG_LANEMASK_GE :
6359 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6360 def INT_PTX_SREG_LANEMASK_GT :
6361 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6363 def INT_PTX_SREG_CLOCK :
6364 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6365 def INT_PTX_SREG_CLOCK64 :
6366 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6368 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6369 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6370 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6371 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6373 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6374 // handle the constant.
6375 def INT_PTX_SREG_WARPSIZE :
6376 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6377 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6379 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6380 // In addition to target-independent fields provided by WMMA_REGS, it adds
6381 // the fields commonly used to implement specific PTX instruction -- register
6382 // types and names, constraints, parts of assembly, etc.
6383 class WMMA_REGINFO<WMMA_REGS r, string op>
6384 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6385 // NVPTX register types used to carry fragment data.
6386 NVPTXRegClass regclass = !cond(
6387 !eq(ptx_elt_type, "f16") : Int32Regs,
6388 !eq(ptx_elt_type, "f32") : Float32Regs,
6389 !eq(ptx_elt_type, "f64") : Float64Regs,
6390 !eq(ptx_elt_type, "bf16") : Int32Regs,
6391 !eq(ptx_elt_type, "tf32") : Int32Regs,
6392 !eq(ptx_elt_type, "s32") : Int32Regs,
6393 !eq(ptx_elt_type, "b16") : Int32Regs,
6394 !eq(ptx_elt_type, "s8") : Int32Regs,
6395 !eq(ptx_elt_type, "u8") : Int32Regs,
6396 !eq(ptx_elt_type, "s4") : Int32Regs,
6397 !eq(ptx_elt_type, "u4") : Int32Regs,
6398 !eq(ptx_elt_type, "b1") : Int32Regs);
6400 // Instruction input/output arguments for the fragment.
6401 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6403 // List of register names for the fragment -- ["ra0", "ra1",...]
6404 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6406 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6407 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6409 // Predicates for particular fragment variant. Technically those are
6410 // per-instruction predicates, but currently all fragments that can be used in
6411 // a given instruction are subject to the same constraints, so an instruction
6412 // can use predicates from any of its fragments. If/when this is no
6413 // longer the case, we can concat all per-fragment predicates to enforce that
6414 // all fragments of the instruction are viable.
6415 list<Predicate> Predicates = !cond(
6416 // fp16 -> fp16/fp32 @ m16n16k16
6417 !and(!eq(geom, "m16n16k16"),
6418 !or(!eq(ptx_elt_type, "f16"),
6419 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6421 !and(!eq(geom,"m8n8k4"),
6422 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6424 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6425 !and(!or(!eq(geom, "m8n32k16"),
6426 !eq(geom, "m32n8k16")),
6427 !or(!eq(ptx_elt_type, "f16"),
6428 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6430 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6431 !and(!or(!eq(geom,"m16n16k16"),
6432 !eq(geom,"m8n32k16"),
6433 !eq(geom,"m32n8k16")),
6434 !or(!eq(ptx_elt_type, "u8"),
6435 !eq(ptx_elt_type, "s8"),
6436 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6438 !and(!or(!eq(geom,"m16n16k16"),
6439 !eq(geom,"m8n32k16"),
6440 !eq(geom,"m32n8k16")),
6441 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6443 !and(!eq(geom,"m16n16k8"),
6444 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6446 !and(!eq(geom,"m16n16k8"),
6447 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6449 // b1 -> s32 @ m8n8k128(b1)
6451 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6453 // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6455 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6457 !or(!eq(geom,"m16n8k8"),
6458 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6460 !and(!ne(ptx_elt_type,"f64"),
6461 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6463 // mma m8n8k32 requires higher PTX version
6465 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6467 !and(!eq(ptx_elt_type,"f64"),
6468 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6471 !or(!eq(geom, "m16n8k16"),
6472 !eq(geom, "m16n8k4"),
6473 !eq(geom, "m16n8k32"),
6474 !eq(geom, "m16n8k64"),
6475 !eq(geom, "m8n8k128"),
6476 !eq(geom, "m16n8k128"),
6477 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6479 !and(!eq(op,"ldmatrix"),
6480 !eq(ptx_elt_type,"b16"),
6481 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6483 // template DAGs for instruction inputs/output.
6484 dag Outs = !dag(outs, ptx_regs, reg_names);
6485 dag Ins = !dag(ins, ptx_regs, reg_names);
6488 // Convert dag of arguments into a dag to match given intrinsic.
6489 class BuildPatternI<Intrinsic Intr, dag Ins> {
6490 // Build a dag pattern that matches the intrinsic call.
6491 dag ret = !foreach(tmp, Ins,
6492 !subst(imem, ADDRvar,
6493 !subst(MEMri64, ADDRri64,
6494 !subst(MEMri, ADDRri,
6495 !subst(ins, Intr, tmp)))));
6498 // Same as above, but uses PatFrag instead of an Intrinsic.
6499 class BuildPatternPF<PatFrag Intr, dag Ins> {
6500 // Build a dag pattern that matches the intrinsic call.
6501 dag ret = !foreach(tmp, Ins,
6502 !subst(imem, ADDRvar,
6503 !subst(MEMri64, ADDRri64,
6504 !subst(MEMri, ADDRri,
6505 !subst(ins, Intr, tmp)))));
6508 // Common WMMA-related fields used for building patterns for all MMA instructions.
6509 class WMMA_INSTR<string _Intr, list<dag> _Args>
6510 : NVPTXInst<(outs), (ins), "?", []> {
6511 Intrinsic Intr = !cast<Intrinsic>(_Intr);
6512 // Concatenate all arguments into a single dag.
6513 dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6514 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6515 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6519 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6522 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6524 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6525 [!con((ins SrcOp:$src),
6526 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6527 Requires<Frag.Predicates> {
6528 // Load/store intrinsics are overloaded on pointer's address space.
6529 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6530 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6531 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6532 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6533 // Build PatFrag that only matches particular address space.
6534 PatFrag IntrFrag = PatFrag<PFOperands,
6536 !cond(!eq(Space, ".shared"): AS_match.shared,
6537 !eq(Space, ".global"): AS_match.global,
6538 true: AS_match.generic)>;
6539 // Build AS-constrained pattern.
6540 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6542 let OutOperandList = Frag.Outs;
6543 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6544 let AsmString = "wmma.load."
6551 # "." # Frag.ptx_elt_type # " \t"
6554 # !if(WithStride, ", $ldm", "")
6559 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6561 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6562 bit WithStride, DAGOperand DstOp>
6563 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6564 [!con((ins DstOp:$dst),
6566 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6567 Requires<Frag.Predicates> {
6569 // Load/store intrinsics are overloaded on pointer's address space.
6570 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6571 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6572 dag PFOperands = !con((ops node:$dst),
6573 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6574 !if(WithStride, (ops node:$ldm), (ops)));
6575 // Build PatFrag that only matches particular address space.
6576 PatFrag IntrFrag = PatFrag<PFOperands,
6577 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6578 !cond(!eq(Space, ".shared"): AS_match.shared,
6579 !eq(Space, ".global"): AS_match.global,
6580 true: AS_match.generic)>;
6581 // Build AS-constrained pattern.
6582 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6584 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6585 let OutOperandList = (outs);
6586 let AsmString = "wmma.store.d.sync"
6591 # "." # Frag.ptx_elt_type
6594 # !if(WithStride, ", $ldm", "")
6598 // Create all load/store variants
6599 defset list<WMMA_INSTR> MMA_LDSTs = {
6600 foreach layout = ["row", "col"] in {
6601 foreach stride = [false, true] in {
6602 foreach space = [".global", ".shared", ""] in {
6603 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6604 foreach frag = NVVM_MMA_OPS.all_ld_ops in
6605 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6606 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6607 foreach frag = NVVM_MMA_OPS.all_st_ops in
6608 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6609 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6616 // B1 instruction variants need extra constraints.
6617 class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6619 WMMA_REGINFO Frag = FragA;
6620 list<Predicate> ret = !listconcat(
6622 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6626 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6627 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6628 string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6629 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6630 [FragA.Ins, FragB.Ins, FragC.Ins]>,
6631 // Requires does not seem to have effect on Instruction w/o Patterns.
6632 // We set it here anyways and propagate to the Pat<> we construct below.
6633 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6634 let OutOperandList = FragD.Outs;
6635 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6636 string TypeList = !cond(
6637 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6638 # "." # FragC.ptx_elt_type,
6639 1: "." # FragD.ptx_elt_type
6640 # "." # FragA.ptx_elt_type
6641 # "." # FragB.ptx_elt_type
6642 # "." # FragC.ptx_elt_type,
6644 let AsmString = "wmma.mma"
6651 # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6653 # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6654 # FragD.regstring # ",\n\t\t"
6655 # FragA.regstring # ",\n\t\t"
6656 # FragB.regstring # ",\n\t\t"
6657 # FragC.regstring # ";";
6660 defset list<WMMA_INSTR> WMMAs = {
6661 foreach layout_a = ["row", "col"] in {
6662 foreach layout_b = ["row", "col"] in {
6663 foreach satf = [0, 1] in {
6664 foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6665 foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6666 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6667 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6668 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6669 WMMA_REGINFO<op[1], "wmma.mma">,
6670 WMMA_REGINFO<op[2], "wmma.mma">,
6671 WMMA_REGINFO<op[3], "wmma.mma">,
6672 layout_a, layout_b, satf, rnd, b1op>;
6683 class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6684 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6685 string ALayout, string BLayout, int Satfinite, string b1op>
6686 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6687 [FragA.Ins, FragB.Ins, FragC.Ins]>,
6688 // Requires does not seem to have effect on Instruction w/o Patterns.
6689 // We set it here anyways and propagate to the Pat<> we construct below.
6690 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6691 let OutOperandList = FragD.Outs;
6692 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6693 string TypeList = "." # FragD.ptx_elt_type
6694 # "." # FragA.ptx_elt_type
6695 # "." # FragB.ptx_elt_type
6696 # "." # FragC.ptx_elt_type;
6697 let AsmString = "mma.sync.aligned."
6701 # !if(Satfinite, ".satfinite", "")
6704 # FragD.regstring # ",\n\t\t"
6705 # FragA.regstring # ",\n\t\t"
6706 # FragB.regstring # ",\n\t\t"
6707 # FragC.regstring # ";";
6710 defset list<WMMA_INSTR> MMAs = {
6711 foreach layout_a = ["row", "col"] in {
6712 foreach layout_b = ["row", "col"] in {
6713 foreach satf = [0, 1] in {
6714 foreach op = NVVM_MMA_OPS.all_mma_ops in {
6715 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6716 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6717 def : MMA<WMMA_REGINFO<op[0], "mma">,
6718 WMMA_REGINFO<op[1], "mma">,
6719 WMMA_REGINFO<op[2], "mma">,
6720 WMMA_REGINFO<op[3], "mma">,
6721 layout_a, layout_b, satf, b1op>;
6731 // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6733 class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6735 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6736 Requires<Frag.Predicates> {
6737 // Build PatFrag that only matches particular address space.
6738 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6739 !cond(!eq(Space, ".shared"): AS_match.shared,
6740 true: AS_match.generic)>;
6741 // Build AS-constrained pattern.
6742 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6744 let OutOperandList = Frag.Outs;
6745 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6746 let AsmString = "ldmatrix.sync.aligned."
6749 # !if(Transposed, ".trans", "")
6751 # "." # Frag.ptx_elt_type
6752 # " " # Frag.regstring # ", [$src];";
6755 // Create all ldmatrix variants
6756 defset list<WMMA_INSTR> LDMATRIXs = {
6757 foreach transposed = [false, true] in {
6758 foreach space = [".shared", ""] in {
6759 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6760 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6761 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6762 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6769 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6770 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6771 // the instruction record.
6772 class MMA_PAT<WMMA_INSTR wi>
6773 : Pat<wi.IntrinsicPattern,
6774 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6776 Requires<wi.Predicates>;
6778 // Build intrinsic->instruction patterns for all MMA instructions.
6779 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6782 multiclass MAPA<string suffix, Intrinsic Intr> {
6783 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6784 "mapa" # suffix # ".u32\t$d, $a, $b;",
6785 [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6786 Requires<[hasSM<90>, hasPTX<78>]>;
6787 def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6788 "mapa" # suffix # ".u32\t$d, $a, $b;",
6789 [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6790 Requires<[hasSM<90>, hasPTX<78>]>;
6791 def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6792 "mapa" # suffix # ".u64\t$d, $a, $b;",
6793 [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6794 Requires<[hasSM<90>, hasPTX<78>]>;
6795 def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6796 "mapa" # suffix # ".u64\t$d, $a, $b;",
6797 [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6798 Requires<[hasSM<90>, hasPTX<78>]>;
6801 defm mapa : MAPA<"", int_nvvm_mapa>;
6802 defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6805 multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6806 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6807 "getctarank" # suffix # ".u32\t$d, $a;",
6808 [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6809 Requires<[hasSM<90>, hasPTX<78>]>;
6810 def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6811 "getctarank" # suffix # ".u64\t$d, $a;",
6812 [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6813 Requires<[hasSM<90>, hasPTX<78>]>;
6816 defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
6817 defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6819 def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6820 "mov.pred\t$d, %is_explicit_cluster;",
6821 [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6822 Requires<[hasSM<90>, hasPTX<78>]>;
6824 // setmaxnreg inc/dec intrinsics
6825 let isConvergent = true in {
6826 multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
6827 def : NVPTXInst<(outs), (ins i32imm:$reg_count),
6828 "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
6829 [(Intr timm:$reg_count)]>,
6830 Requires<[hasSM90a, hasPTX<80>]>;
6833 defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
6834 defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;