1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10 float f = (float)N->getValueAPF().convertToFloat();
14 def immFloat1 : PatLeaf<(fpimm), [{
15 float f = (float)N->getValueAPF().convertToFloat();
19 def immDouble0 : PatLeaf<(fpimm), [{
20 double d = (double)N->getValueAPF().convertToDouble();
24 def immDouble1 : PatLeaf<(fpimm), [{
25 double d = (double)N->getValueAPF().convertToDouble();
31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
41 // A node that will be replaced with the current PTX version.
43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
46 // (i32 0) will be XForm'ed to the currently used PTX version.
47 dag version = (PTXVerXform (i32 0));
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55 [prefix # !sub(n, 1)]),
59 class THREADMASK_INFO<bit sync> {
60 list<bit> ret = !if(sync, [0, 1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = true in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
69 [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
72 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74 "bar.sync \t$src1, $src2;",
75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
78 ".reg .pred \t%p1; \n\t",
79 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
85 ".reg .pred \t%p1; \n\t",
86 ".reg .pred \t%p2; \n\t",
87 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
94 ".reg .pred \t%p1; \n\t",
95 ".reg .pred \t%p2; \n\t",
96 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103 [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106 [(int_nvvm_bar_warp_sync imm:$i)]>,
107 Requires<[hasPTX<60>, hasSM<30>]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110 Requires<[hasPTX<60>, hasSM<30>]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113 [(int_nvvm_barrier_sync imm:$i)]>,
114 Requires<[hasPTX<60>, hasSM<30>]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116 [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117 Requires<[hasPTX<60>, hasSM<30>]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120 "barrier.sync \t$id, $cnt;",
121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122 Requires<[hasPTX<60>, hasSM<30>]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124 "barrier.sync \t$id, $cnt;",
125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126 Requires<[hasPTX<60>, hasSM<30>]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128 "barrier.sync \t$id, $cnt;",
129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130 Requires<[hasPTX<60>, hasSM<30>]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132 "barrier.sync \t$id, $cnt;",
133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134 Requires<[hasPTX<60>, hasSM<30>]>;
136 class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
137 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
138 NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
141 def barrier_cluster_arrive:
142 INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
143 def barrier_cluster_arrive_relaxed:
144 INT_BARRIER_CLUSTER<"arrive.relaxed",
145 int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
146 def barrier_cluster_wait:
147 INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
149 // 'aligned' versions of the cluster barrier intrinsics
150 def barrier_cluster_arrive_aligned:
151 INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
152 def barrier_cluster_arrive_relaxed_aligned:
153 INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
154 int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
155 def barrier_cluster_wait_aligned:
156 INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
158 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
159 bit offset_imm, bit mask_imm, bit threadmask_imm>
160 : NVPTXInst<(outs), (ins), "?", []> {
161 NVPTXRegClass rc = !cond(
162 !eq(reg, "i32"): Int32Regs,
163 !eq(reg, "f32"): Float32Regs);
164 string IntrName = "int_nvvm_shfl_"
165 # !if(sync, "sync_", "")
168 # !if(return_pred, "p", "");
169 Intrinsic Intr = !cast<Intrinsic>(IntrName);
170 let InOperandList = !con(
172 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
175 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
176 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
178 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
179 let AsmString = "shfl."
180 # !if(sync, "sync.", "")
183 # !if(return_pred, "|$pred", "") # ", "
184 # "$src, $offset, $mask"
185 # !if(sync, ", $threadmask", "")
189 !foreach(tmp, OutOperandList,
191 !subst(i32imm, imm, tmp))),
192 (set !foreach(tmp, InOperandList,
194 !subst(i32imm, imm, tmp))))
198 foreach sync = [false, true] in {
199 foreach mode = ["up", "down", "bfly", "idx"] in {
200 foreach regclass = ["i32", "f32"] in {
201 foreach return_pred = [false, true] in {
202 foreach offset_imm = [false, true] in {
203 foreach mask_imm = [false, true] in {
204 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
205 def : SHFL_INSTR<sync, mode, regclass, return_pred,
206 offset_imm, mask_imm, threadmask_imm>,
207 Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
216 // vote.{all,any,uni,ballot}
217 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
218 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
219 "vote." # mode # " \t$dest, $pred;",
220 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
221 Requires<[hasPTX<60>, hasSM<30>]>;
224 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
225 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
226 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
227 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
229 // vote.sync.{all,any,uni,ballot}
230 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
231 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
232 "vote.sync." # mode # " \t$dest, $pred, $mask;",
233 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
234 Requires<[hasPTX<60>, hasSM<30>]>;
235 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
236 "vote.sync." # mode #" \t$dest, $pred, $mask;",
237 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
238 Requires<[hasPTX<60>, hasSM<30>]>;
241 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
242 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
243 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
244 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
246 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
248 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
249 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
250 [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
251 Requires<[hasPTX<60>, hasSM<70>]>;
252 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
253 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
254 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
255 Requires<[hasPTX<60>, hasSM<70>]>;
256 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
257 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
258 [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
259 Requires<[hasPTX<60>, hasSM<70>]>;
260 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
261 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
262 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
263 Requires<[hasPTX<60>, hasSM<70>]>;
267 def ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins),
268 "activemask.b32 \t$dest;",
269 [(set Int32Regs:$dest, (int_nvvm_activemask))]>,
270 Requires<[hasPTX<62>, hasSM<30>]>;
272 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
274 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
277 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
279 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
280 (ins i32imm:$mask, ImmOp:$value),
281 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
282 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
283 Requires<[hasPTX<60>, hasSM<70>]>;
284 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
285 (ins Int32Regs:$mask, ImmOp:$value),
286 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
287 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
288 Requires<[hasPTX<60>, hasSM<70>]>;
289 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
290 (ins i32imm:$mask, regclass:$value),
291 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
292 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
293 Requires<[hasPTX<60>, hasSM<70>]>;
294 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
295 (ins Int32Regs:$mask, regclass:$value),
296 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
297 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
298 Requires<[hasPTX<60>, hasSM<70>]>;
300 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
302 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
305 multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
306 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
307 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
308 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
309 Requires<[hasPTX<70>, hasSM<80>]>;
312 defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
313 defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
314 defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
315 defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
316 defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
317 defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
318 defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
319 defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
321 } // isConvergent = true
323 //-----------------------------------
324 // Explicit Memory Fence Functions
325 //-----------------------------------
326 class MEMBAR<string StrOp, Intrinsic IntOP> :
327 NVPTXInst<(outs), (ins),
330 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
331 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
332 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
334 def INT_FENCE_SC_CLUSTER:
335 MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
336 Requires<[hasPTX<78>, hasSM<90>]>;
338 //-----------------------------------
339 // Async Copy Functions
340 //-----------------------------------
342 multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
343 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
344 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
345 [(Intrin Int32Regs:$addr)]>,
346 Requires<[hasPTX<70>, hasSM<80>]>;
347 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
348 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
349 [(Intrin Int64Regs:$addr)]>,
350 Requires<[hasPTX<70>, hasSM<80>]>;
353 defm CP_ASYNC_MBARRIER_ARRIVE :
354 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
355 defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
356 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
357 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
358 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
359 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
360 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
362 multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
363 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
364 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
365 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
366 Requires<[hasPTX<70>, hasSM<80>]>;
367 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
368 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
369 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
370 Requires<[hasPTX<70>, hasSM<80>]>;
371 // Variant with src_size parameter
372 def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
373 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
374 [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
375 Requires<[hasPTX<70>, hasSM<80>]>;
376 def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
377 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
378 [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
379 Requires<[hasPTX<70>, hasSM<80>]>;
380 def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
381 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
382 [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
383 Requires<[hasPTX<70>, hasSM<80>]>;
384 def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
385 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
386 [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
387 Requires<[hasPTX<70>, hasSM<80>]>;
390 defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
391 CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
392 int_nvvm_cp_async_ca_shared_global_4_s>;
394 defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
395 CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
396 int_nvvm_cp_async_ca_shared_global_8_s>;
398 defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
399 CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
400 int_nvvm_cp_async_ca_shared_global_16_s>;
402 defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
403 CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
404 int_nvvm_cp_async_cg_shared_global_16_s>;
406 def CP_ASYNC_COMMIT_GROUP :
407 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
408 Requires<[hasPTX<70>, hasSM<80>]>;
410 def CP_ASYNC_WAIT_GROUP :
411 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
412 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
413 Requires<[hasPTX<70>, hasSM<80>]>;
415 def CP_ASYNC_WAIT_ALL :
416 NVPTXInst<(outs), (ins), "cp.async.wait_all;",
417 [(int_nvvm_cp_async_wait_all)]>,
418 Requires<[hasPTX<70>, hasSM<80>]>;
420 // cp.async.bulk variants of the commit/wait group
421 def CP_ASYNC_BULK_COMMIT_GROUP :
422 NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
423 [(int_nvvm_cp_async_bulk_commit_group)]>,
424 Requires<[hasPTX<80>, hasSM<90>]>;
426 def CP_ASYNC_BULK_WAIT_GROUP :
427 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
428 [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
429 Requires<[hasPTX<80>, hasSM<90>]>;
431 def CP_ASYNC_BULK_WAIT_GROUP_READ :
432 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
433 [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
434 Requires<[hasPTX<80>, hasSM<90>]>;
436 //-----------------------------------
437 // MBarrier Functions
438 //-----------------------------------
440 multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
441 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
442 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
443 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
444 Requires<[hasPTX<70>, hasSM<80>]>;
445 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
446 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
447 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
448 Requires<[hasPTX<70>, hasSM<80>]>;
451 defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
452 defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
453 int_nvvm_mbarrier_init_shared>;
455 multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
456 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
457 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
458 [(Intrin Int32Regs:$addr)]>,
459 Requires<[hasPTX<70>, hasSM<80>]>;
460 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
461 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
462 [(Intrin Int64Regs:$addr)]>,
463 Requires<[hasPTX<70>, hasSM<80>]>;
466 defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
467 defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
468 int_nvvm_mbarrier_inval_shared>;
470 multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
471 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
472 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
473 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
474 Requires<[hasPTX<70>, hasSM<80>]>;
475 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
476 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
477 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
478 Requires<[hasPTX<70>, hasSM<80>]>;
481 defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
482 defm MBARRIER_ARRIVE_SHARED :
483 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
485 multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
486 def _32 : NVPTXInst<(outs Int64Regs:$state),
487 (ins Int32Regs:$addr, Int32Regs:$count),
488 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
489 ".b64 $state, [$addr], $count;"),
490 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
491 Requires<[hasPTX<70>, hasSM<80>]>;
492 def _64 : NVPTXInst<(outs Int64Regs:$state),
493 (ins Int64Regs:$addr, Int32Regs:$count),
494 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
495 ".b64 $state, [$addr], $count;"),
496 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
497 Requires<[hasPTX<70>, hasSM<80>]>;
500 defm MBARRIER_ARRIVE_NOCOMPLETE :
501 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
502 defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
503 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
505 multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
506 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
507 !strconcat("mbarrier.arrive_drop", AddrSpace,
508 ".b64 $state, [$addr];"),
509 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
510 Requires<[hasPTX<70>, hasSM<80>]>;
511 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
512 !strconcat("mbarrier.arrive_drop", AddrSpace,
513 ".b64 $state, [$addr];"),
514 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
515 Requires<[hasPTX<70>, hasSM<80>]>;
518 defm MBARRIER_ARRIVE_DROP :
519 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
520 defm MBARRIER_ARRIVE_DROP_SHARED :
521 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
523 multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
524 def _32 : NVPTXInst<(outs Int64Regs:$state),
525 (ins Int32Regs:$addr, Int32Regs:$count),
526 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
527 ".b64 $state, [$addr], $count;"),
528 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
529 Requires<[hasPTX<70>, hasSM<80>]>;
530 def _64 : NVPTXInst<(outs Int64Regs:$state),
531 (ins Int64Regs:$addr, Int32Regs:$count),
532 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
533 ".b64 $state, [$addr], $count;"),
534 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
535 Requires<[hasPTX<70>, hasSM<80>]>;
538 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
539 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
540 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
541 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
542 int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
544 multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
545 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
546 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
547 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
548 Requires<[hasPTX<70>, hasSM<80>]>;
549 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
550 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
551 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
552 Requires<[hasPTX<70>, hasSM<80>]>;
555 defm MBARRIER_TEST_WAIT :
556 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
557 defm MBARRIER_TEST_WAIT_SHARED :
558 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
560 class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
561 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
562 "mbarrier.pending_count.b64 $res, $state;",
563 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
564 Requires<[hasPTX<70>, hasSM<80>]>;
566 def MBARRIER_PENDING_COUNT :
567 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
569 //-----------------------------------
571 //-----------------------------------
573 // Map min(1.0, max(0.0, x)) to sat(x)
574 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
576 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
577 // Same story for fmax, fmin.
579 def : Pat<(int_nvvm_fmin_f immFloat1,
580 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
581 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
582 def : Pat<(int_nvvm_fmin_f immFloat1,
583 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
584 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
585 def : Pat<(int_nvvm_fmin_f
586 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
587 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
588 def : Pat<(int_nvvm_fmin_f
589 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
590 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
592 def : Pat<(int_nvvm_fmin_d immDouble1,
593 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
594 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
595 def : Pat<(int_nvvm_fmin_d immDouble1,
596 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
597 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
598 def : Pat<(int_nvvm_fmin_d
599 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
600 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
601 def : Pat<(int_nvvm_fmin_d
602 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
603 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
606 // We need a full string for OpcStr here because we need to deal with case like
608 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
609 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
610 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
612 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
615 // We need a full string for OpcStr here because we need to deal with the case
616 // like INT_PTX_NATIVE_POWR_F.
617 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
618 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
619 list<Predicate> Preds = []>
620 : NVPTXInst<(outs t_regclass:$dst),
621 (ins s0_regclass:$src0, s1_regclass:$src1),
623 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
626 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
627 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
628 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
629 : NVPTXInst<(outs t_regclass:$dst),
630 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
632 [(set t_regclass:$dst,
633 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
640 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
641 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
643 def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;",
644 [(int_nvvm_nanosleep imm:$i)]>,
645 Requires<[hasPTX<63>, hasSM<70>]>;
646 def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
647 [(int_nvvm_nanosleep Int32Regs:$i)]>,
648 Requires<[hasPTX<63>, hasSM<70>]>;
653 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
654 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
655 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
656 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
657 def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
659 [hasPTX<70>, hasSM<80>]>;
660 def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
661 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
662 [hasPTX<70>, hasSM<80>]>;
663 def INT_NVVM_FMIN_XORSIGN_ABS_F :
664 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
665 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
666 [hasPTX<72>, hasSM<86>]>;
667 def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
668 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
669 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
670 [hasPTX<72>, hasSM<86>]>;
671 def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
672 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
673 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
674 [hasPTX<72>, hasSM<86>]>;
675 def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
676 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
677 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
678 [hasPTX<72>, hasSM<86>]>;
680 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
681 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
682 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
683 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
684 def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
685 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
686 [hasPTX<70>, hasSM<80>]>;
687 def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
688 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
689 [hasPTX<70>, hasSM<80>]>;
690 def INT_NVVM_FMAX_XORSIGN_ABS_F :
691 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
692 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
693 [hasPTX<72>, hasSM<86>]>;
694 def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
695 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
696 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
697 [hasPTX<72>, hasSM<86>]>;
698 def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
699 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
700 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
701 [hasPTX<72>, hasSM<86>]>;
702 def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
703 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
704 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
705 [hasPTX<72>, hasSM<86>]>;
707 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
708 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
709 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
710 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
713 // Min Max f16, f16x2, bf16, bf16x2
716 class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
717 list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
720 NVPTXRegClass RegClass = RC;
721 list<Predicate> Predicates = Preds;
724 multiclass MIN_MAX<string IntName> {
726 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
727 int_nvvm_fmax_f16), Int16Regs>,
728 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
729 int_nvvm_fmax_ftz_f16), Int16Regs>,
730 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
731 int_nvvm_fmax_nan_f16), Int16Regs>,
732 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
733 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
734 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
735 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
736 Int16Regs, [hasPTX<72>, hasSM<86>]>,
737 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
738 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
739 Int16Regs, [hasPTX<72>, hasSM<86>]>,
740 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
741 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
742 Int16Regs, [hasPTX<72>, hasSM<86>]>,
743 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
744 int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
745 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
746 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
747 int_nvvm_fmax_f16x2), Int32Regs>,
748 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
749 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
750 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
751 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
752 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
753 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
754 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
755 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
756 Int32Regs, [hasPTX<72>, hasSM<86>]>,
757 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
758 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
759 Int32Regs, [hasPTX<72>, hasSM<86>]>,
760 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
761 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
762 Int32Regs, [hasPTX<72>, hasSM<86>]>,
763 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
764 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
765 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
766 Int32Regs, [hasPTX<72>, hasSM<86>]>,
767 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
768 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
769 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
770 int_nvvm_fmax_nan_bf16), Int16Regs>,
771 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
772 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
773 Int16Regs, [hasPTX<72>, hasSM<86>]>,
774 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
775 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
776 Int16Regs, [hasPTX<72>, hasSM<86>]>,
777 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
778 int_nvvm_fmax_bf16x2), Int32Regs>,
779 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
780 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
781 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
782 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
783 Int32Regs, [hasPTX<72>, hasSM<86>]>,
784 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
785 int_nvvm_fmin_nan_xorsign_abs_bf16x2,
786 int_nvvm_fmax_nan_xorsign_abs_bf16x2),
787 Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
788 def P.Variant : F_MATH_2<!strconcat(
789 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
790 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
794 defm INT_NVVM_FMIN : MIN_MAX<"min">;
795 defm INT_NVVM_FMAN : MIN_MAX<"max">;
801 def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs,
802 Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
803 def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs,
804 Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
805 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
806 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
807 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
808 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
809 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
810 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
811 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
812 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
814 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
815 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
816 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
817 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
818 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
819 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
820 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
821 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
822 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
823 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
824 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
825 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
826 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
827 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
828 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
829 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
831 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
832 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
833 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
834 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
835 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
836 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
837 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
838 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
840 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
841 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
842 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
843 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
849 def INT_NVVM_DIV_APPROX_FTZ_F
850 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
851 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
852 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
853 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
855 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
856 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
857 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
858 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
859 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
860 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
861 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
862 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
863 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
864 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
865 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
866 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
867 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
868 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
869 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
870 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
872 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
873 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
874 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
875 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
876 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
877 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
878 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
879 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
885 def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;",
886 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
887 def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;",
888 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
889 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
890 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
891 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
892 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
893 def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;",
894 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
895 def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
896 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
902 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
903 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
904 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
905 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
906 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
907 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
909 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
910 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
911 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
912 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
913 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
914 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
920 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
921 Float32Regs, int_nvvm_fabs_ftz_f>;
922 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
923 Float32Regs, int_nvvm_fabs_f>;
925 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
926 Float64Regs, int_nvvm_fabs_d>;
929 // Abs, Neg bf16, bf16x2
932 def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
933 Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
934 def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
935 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
936 def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
937 Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
938 def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
939 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
945 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
946 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
947 def : Pat<(int_nvvm_round_f Float32Regs:$a),
948 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
949 def : Pat<(int_nvvm_round_d Float64Regs:$a),
950 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
956 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
957 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
958 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
959 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
960 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
961 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
967 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
968 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
969 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
970 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
971 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
972 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
978 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
979 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
980 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
981 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
982 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
983 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
984 def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
985 Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
986 def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
987 Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
989 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
990 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
991 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
992 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
993 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
994 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
1000 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
1001 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
1002 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
1003 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
1005 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
1006 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
1007 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
1008 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
1014 class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
1015 list<Predicate> Preds = []> {
1018 NVPTXRegClass RegClass = RC;
1019 list<Predicate> Predicates = Preds;
1022 multiclass FMA_INST {
1024 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
1025 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
1026 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
1027 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
1029 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
1030 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
1031 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
1032 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
1033 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
1034 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
1035 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
1036 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
1038 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
1039 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
1040 [hasPTX<42>, hasSM<53>]>,
1041 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
1042 [hasPTX<42>, hasSM<53>]>,
1043 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
1044 [hasPTX<42>, hasSM<53>]>,
1045 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
1046 [hasPTX<70>, hasSM<80>]>,
1047 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
1048 [hasPTX<70>, hasSM<80>]>,
1050 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1051 FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1052 [hasPTX<70>, hasSM<80>]>,
1053 FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1054 [hasPTX<70>, hasSM<80>]>,
1055 FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1056 [hasPTX<70>, hasSM<80>]>,
1057 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1058 [hasPTX<70>, hasSM<80>]>,
1059 FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1060 [hasPTX<70>, hasSM<80>]>,
1062 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1063 [hasPTX<42>, hasSM<53>]>,
1064 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1065 [hasPTX<42>, hasSM<53>]>,
1066 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1067 [hasPTX<42>, hasSM<53>]>,
1068 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1069 Int32Regs, [hasPTX<42>, hasSM<53>]>,
1070 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1071 [hasPTX<70>, hasSM<80>]>,
1072 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1073 Int32Regs, [hasPTX<70>, hasSM<80>]>,
1074 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1075 [hasPTX<70>, hasSM<80>]>,
1076 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1077 [hasPTX<70>, hasSM<80>]>
1080 F_MATH_3<!strconcat("fma",
1081 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1082 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1086 defm INT_NVVM_FMA : FMA_INST;
1092 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1093 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1094 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1095 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1096 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1097 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1098 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1099 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1100 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1101 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1102 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1103 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1104 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1105 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1106 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1107 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1109 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1110 Float64Regs, int_nvvm_rcp_rn_d>;
1111 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1112 Float64Regs, int_nvvm_rcp_rz_d>;
1113 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1114 Float64Regs, int_nvvm_rcp_rm_d>;
1115 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1116 Float64Regs, int_nvvm_rcp_rp_d>;
1118 def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1119 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1120 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1121 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1127 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1128 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1129 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1130 Float32Regs, int_nvvm_sqrt_rn_f>;
1131 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1132 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1133 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1134 Float32Regs, int_nvvm_sqrt_rz_f>;
1135 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1136 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1137 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1138 Float32Regs, int_nvvm_sqrt_rm_f>;
1139 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1140 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1141 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1142 Float32Regs, int_nvvm_sqrt_rp_f>;
1143 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1144 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1145 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1146 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1148 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1149 Float64Regs, int_nvvm_sqrt_rn_d>;
1150 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1151 Float64Regs, int_nvvm_sqrt_rz_d>;
1152 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1153 Float64Regs, int_nvvm_sqrt_rm_d>;
1154 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1155 Float64Regs, int_nvvm_sqrt_rp_d>;
1157 // nvvm_sqrt intrinsic
1158 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1159 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1160 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1161 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1162 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1163 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1164 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1165 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1171 def INT_NVVM_RSQRT_APPROX_FTZ_F
1172 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1173 int_nvvm_rsqrt_approx_ftz_f>;
1174 def INT_NVVM_RSQRT_APPROX_FTZ_D
1175 : F_MATH_1<"rsqrt.approx.ftz.f64 \t$dst, $src0;", Float64Regs, Float64Regs,
1176 int_nvvm_rsqrt_approx_ftz_d>;
1178 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1179 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1180 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1181 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1183 // 1.0f / sqrt_approx -> rsqrt_approx
1184 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f Float32Regs:$a)),
1185 (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1186 Requires<[doRsqrtOpt]>;
1187 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f Float32Regs:$a)),
1188 (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1189 Requires<[doRsqrtOpt]>;
1190 // same for int_nvvm_sqrt_f when non-precision sqrt is requested
1191 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
1192 (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1193 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1194 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
1195 (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1196 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1198 def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
1199 (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1200 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1201 def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
1202 (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1203 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1208 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1209 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1210 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1211 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1212 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1213 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1214 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1215 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1216 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1217 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1218 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1219 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1220 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1221 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1222 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1223 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1225 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1226 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1227 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1228 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1229 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1230 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1231 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1232 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1238 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1239 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1240 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1241 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1242 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1243 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1244 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1245 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1246 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1247 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1248 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1249 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1250 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1251 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1252 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1253 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1255 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1256 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1257 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1258 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1259 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1260 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1261 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1262 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1264 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1265 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1266 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1267 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1268 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1269 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1270 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1271 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1273 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1274 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1275 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1276 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1277 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1278 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1279 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1280 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1282 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1283 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1284 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1285 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1286 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1287 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1288 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1289 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1291 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1292 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1293 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1294 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1295 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1296 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1297 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1298 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1299 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1300 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1301 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1302 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1303 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1304 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1305 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1306 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1308 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1309 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1310 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1311 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1312 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1313 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1314 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1315 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1316 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1317 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1318 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1319 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1320 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1321 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1322 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1323 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1325 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1326 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1327 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1328 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1329 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1330 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1331 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1332 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1334 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1335 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1336 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1337 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1338 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1339 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1340 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1341 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1343 def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1344 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1345 def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1346 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1347 def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1348 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1349 def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1350 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1352 def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1353 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1354 def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1355 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1356 def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1357 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1358 def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1359 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1361 def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1362 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1363 def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1364 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1365 def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1366 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1367 def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1368 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1371 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1372 "cvt.rna.tf32.f32 \t$dest, $a;",
1373 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1375 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1376 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1378 def INT_NVVM_D2I_LO : F_MATH_1<
1379 !strconcat("{{\n\t",
1380 ".reg .b32 %temp; \n\t",
1381 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1383 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1384 def INT_NVVM_D2I_HI : F_MATH_1<
1385 !strconcat("{{\n\t",
1386 ".reg .b32 %temp; \n\t",
1387 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1389 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1391 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1392 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1393 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1394 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1395 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1396 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1397 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1398 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1399 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1400 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1401 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1402 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1403 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1404 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1405 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1406 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1408 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1409 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1410 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1411 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1412 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1413 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1414 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1415 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1416 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1417 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1418 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1419 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1420 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1421 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1422 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1423 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1425 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1426 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1427 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1428 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1429 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1430 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1431 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1432 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1434 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1435 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1436 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1437 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1438 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1439 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1440 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1441 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1443 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1444 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1445 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1446 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1447 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1448 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1449 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1450 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1452 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1453 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1454 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1455 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1456 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1457 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1458 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1459 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1461 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1462 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1463 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1464 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1465 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1466 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1467 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1468 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1470 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1471 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1472 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1473 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1474 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1475 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1476 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1477 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1480 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1481 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1482 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1483 (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1489 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1490 Float32Regs, int_nvvm_bitcast_f2i>;
1491 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1492 Int32Regs, int_nvvm_bitcast_i2f>;
1494 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1495 Int64Regs, int_nvvm_bitcast_ll2d>;
1496 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1497 Float64Regs, int_nvvm_bitcast_d2ll>;
1503 class INT_FNS_MBO<dag ins, dag Operands>
1504 : NVPTXInst<(outs Int32Regs:$dst), ins,
1505 "fns.b32 \t$dst, $mask, $base, $offset;",
1506 [(set Int32Regs:$dst, Operands )]>,
1507 Requires<[hasPTX<60>, hasSM<30>]>;
1509 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1510 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1511 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
1512 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
1513 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
1514 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
1515 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
1516 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
1517 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1518 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1519 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
1520 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
1521 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
1522 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
1523 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
1524 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
1526 //-----------------------------------
1528 //-----------------------------------
1530 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1531 : PatFrag<ops, frag, AS_match.global>;
1532 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1533 : PatFrag<ops, frag, AS_match.shared>;
1534 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1535 : PatFrag<ops, frag, AS_match.generic>;
1537 multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1538 ValueType regT, NVPTXRegClass regclass,
1539 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1540 Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1541 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1542 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1543 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1545 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1546 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1547 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1548 Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
1550 multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1551 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1552 list<Predicate> Pred = []> {
1553 defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1554 IntOp, IMMType, IMM, Pred>;
1555 defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1556 IntOp, IMMType, IMM, Pred>;
1559 // has 2 operands, neg the second one
1560 multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1561 ValueType regT, NVPTXRegClass regclass,
1562 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1563 list<Predicate> Pred> {
1564 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1567 ".reg \t.s", TypeStr, " temp; \n\t",
1568 "neg.s", TypeStr, " \ttemp, $b; \n\t",
1569 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1571 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1574 multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1575 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1576 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1578 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1583 multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1584 ValueType regT, NVPTXRegClass regclass,
1585 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1586 Operand IMMType, list<Predicate> Pred> {
1587 def reg : NVPTXInst<(outs regclass:$dst),
1588 (ins ptrclass:$addr, regclass:$b, regclass:$c),
1589 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1590 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1593 def imm1 : NVPTXInst<(outs regclass:$dst),
1594 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1595 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1596 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1599 def imm2 : NVPTXInst<(outs regclass:$dst),
1600 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1601 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1602 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1605 def imm3 : NVPTXInst<(outs regclass:$dst),
1606 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1607 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1608 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1611 multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1612 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1613 defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1614 IntOp, IMMType, Pred>;
1615 defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1616 IntOp, IMMType, Pred>;
1621 def atomic_load_add_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1622 (atomic_load_add_i32 node:$a, node:$b)>;
1623 def atomic_load_add_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1624 (atomic_load_add_i32 node:$a, node:$b)>;
1625 def atomic_load_add_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1626 (atomic_load_add_i32 node:$a, node:$b)>;
1627 def atomic_load_add_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1628 (atomic_load_add_i64 node:$a, node:$b)>;
1629 def atomic_load_add_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1630 (atomic_load_add_i64 node:$a, node:$b)>;
1631 def atomic_load_add_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1632 (atomic_load_add_i64 node:$a, node:$b)>;
1633 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1634 (atomic_load_fadd node:$a, node:$b)>;
1635 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1636 (atomic_load_fadd node:$a, node:$b)>;
1637 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1638 (atomic_load_fadd node:$a, node:$b)>;
1640 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1641 atomic_load_add_i32_g, i32imm, imm>;
1642 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1643 atomic_load_add_i32_s, i32imm, imm>;
1644 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1645 atomic_load_add_i32_gen, i32imm, imm>;
1646 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1647 ".add", atomic_load_add_i32_gen, i32imm, imm>;
1649 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1650 atomic_load_add_i64_g, i64imm, imm>;
1651 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1652 atomic_load_add_i64_s, i64imm, imm>;
1653 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1654 atomic_load_add_i64_gen, i64imm, imm>;
1655 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1656 ".add", atomic_load_add_i64_gen, i64imm, imm>;
1658 defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
1659 atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1660 defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
1661 atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1662 defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
1663 atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1665 defm INT_PTX_ATOM_ADD_G_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".global", ".bf16", ".add.noftz",
1666 atomic_load_add_g, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1667 defm INT_PTX_ATOM_ADD_S_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".shared", ".bf16", ".add.noftz",
1668 atomic_load_add_s, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1669 defm INT_PTX_ATOM_ADD_GEN_BF16 : F_ATOMIC_2<bf16, Int16Regs, "", ".bf16", ".add.noftz",
1670 atomic_load_add_gen, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1672 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1673 atomic_load_add_g, f32imm, fpimm>;
1674 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1675 atomic_load_add_s, f32imm, fpimm>;
1676 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1677 atomic_load_add_gen, f32imm, fpimm>;
1679 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1680 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1681 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1682 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1683 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1684 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1688 def atomic_load_sub_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1689 (atomic_load_sub_i32 node:$a, node:$b)>;
1690 def atomic_load_sub_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1691 (atomic_load_sub_i32 node:$a, node:$b)>;
1692 def atomic_load_sub_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1693 (atomic_load_sub_i32 node:$a, node:$b)>;
1694 def atomic_load_sub_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1695 (atomic_load_sub_i64 node:$a, node:$b)>;
1696 def atomic_load_sub_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1697 (atomic_load_sub_i64 node:$a, node:$b)>;
1698 def atomic_load_sub_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1699 (atomic_load_sub_i64 node:$a, node:$b)>;
1701 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1702 atomic_load_sub_i32_g>;
1703 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1704 atomic_load_sub_i64_g>;
1705 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1706 atomic_load_sub_i32_gen>;
1707 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1708 ".add", atomic_load_sub_i32_gen>;
1709 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1710 atomic_load_sub_i32_s>;
1711 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1712 atomic_load_sub_i64_s>;
1713 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1714 atomic_load_sub_i64_gen>;
1715 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1716 ".add", atomic_load_sub_i64_gen>;
1720 def atomic_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1721 (atomic_swap_i32 node:$a, node:$b)>;
1722 def atomic_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1723 (atomic_swap_i32 node:$a, node:$b)>;
1724 def atomic_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1725 (atomic_swap_i32 node:$a, node:$b)>;
1726 def atomic_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1727 (atomic_swap_i64 node:$a, node:$b)>;
1728 def atomic_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1729 (atomic_swap_i64 node:$a, node:$b)>;
1730 def atomic_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1731 (atomic_swap_i64 node:$a, node:$b)>;
1733 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1734 atomic_swap_i32_g, i32imm, imm>;
1735 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1736 atomic_swap_i32_s, i32imm, imm>;
1737 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1738 atomic_swap_i32_gen, i32imm, imm>;
1739 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1740 ".exch", atomic_swap_i32_gen, i32imm, imm>;
1741 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1742 atomic_swap_i64_g, i64imm, imm>;
1743 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1744 atomic_swap_i64_s, i64imm, imm>;
1745 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1746 atomic_swap_i64_gen, i64imm, imm>;
1747 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1748 ".exch", atomic_swap_i64_gen, i64imm, imm>;
1752 def atomic_load_max_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1753 , (atomic_load_max_i32 node:$a, node:$b)>;
1754 def atomic_load_max_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1755 (atomic_load_max_i32 node:$a, node:$b)>;
1756 def atomic_load_max_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1757 (atomic_load_max_i32 node:$a, node:$b)>;
1758 def atomic_load_max_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1759 , (atomic_load_max_i64 node:$a, node:$b)>;
1760 def atomic_load_max_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1761 (atomic_load_max_i64 node:$a, node:$b)>;
1762 def atomic_load_max_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1763 (atomic_load_max_i64 node:$a, node:$b)>;
1764 def atomic_load_umax_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1765 (atomic_load_umax_i32 node:$a, node:$b)>;
1766 def atomic_load_umax_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1767 (atomic_load_umax_i32 node:$a, node:$b)>;
1768 def atomic_load_umax_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1769 (atomic_load_umax_i32 node:$a, node:$b)>;
1770 def atomic_load_umax_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1771 (atomic_load_umax_i64 node:$a, node:$b)>;
1772 def atomic_load_umax_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1773 (atomic_load_umax_i64 node:$a, node:$b)>;
1774 def atomic_load_umax_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1775 (atomic_load_umax_i64 node:$a, node:$b)>;
1777 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1778 ".max", atomic_load_max_i32_g, i32imm, imm>;
1779 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1780 ".max", atomic_load_max_i32_s, i32imm, imm>;
1781 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1782 atomic_load_max_i32_gen, i32imm, imm>;
1783 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1784 ".s32", ".max", atomic_load_max_i32_gen, i32imm, imm>;
1785 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1786 ".max", atomic_load_max_i64_g, i64imm, imm, [hasSM<32>]>;
1787 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1788 ".max", atomic_load_max_i64_s, i64imm, imm, [hasSM<32>]>;
1789 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1790 atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
1791 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1792 ".s64", ".max", atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
1793 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1794 ".max", atomic_load_umax_i32_g, i32imm, imm>;
1795 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1796 ".max", atomic_load_umax_i32_s, i32imm, imm>;
1797 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1798 atomic_load_umax_i32_gen, i32imm, imm>;
1799 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1800 ".u32", ".max", atomic_load_umax_i32_gen, i32imm, imm>;
1801 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1802 ".max", atomic_load_umax_i64_g, i64imm, imm, [hasSM<32>]>;
1803 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1804 ".max", atomic_load_umax_i64_s, i64imm, imm, [hasSM<32>]>;
1805 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1806 atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
1807 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1808 ".u64", ".max", atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
1812 def atomic_load_min_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1813 (atomic_load_min_i32 node:$a, node:$b)>;
1814 def atomic_load_min_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1815 (atomic_load_min_i32 node:$a, node:$b)>;
1816 def atomic_load_min_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1817 (atomic_load_min_i32 node:$a, node:$b)>;
1818 def atomic_load_min_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1819 (atomic_load_min_i64 node:$a, node:$b)>;
1820 def atomic_load_min_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1821 (atomic_load_min_i64 node:$a, node:$b)>;
1822 def atomic_load_min_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1823 (atomic_load_min_i64 node:$a, node:$b)>;
1824 def atomic_load_umin_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1825 (atomic_load_umin_i32 node:$a, node:$b)>;
1826 def atomic_load_umin_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1827 (atomic_load_umin_i32 node:$a, node:$b)>;
1828 def atomic_load_umin_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1829 (atomic_load_umin_i32 node:$a, node:$b)>;
1830 def atomic_load_umin_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1831 (atomic_load_umin_i64 node:$a, node:$b)>;
1832 def atomic_load_umin_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1833 (atomic_load_umin_i64 node:$a, node:$b)>;
1834 def atomic_load_umin_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1835 (atomic_load_umin_i64 node:$a, node:$b)>;
1837 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1838 ".min", atomic_load_min_i32_g, i32imm, imm>;
1839 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1840 ".min", atomic_load_min_i32_s, i32imm, imm>;
1841 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1842 atomic_load_min_i32_gen, i32imm, imm>;
1843 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1844 ".s32", ".min", atomic_load_min_i32_gen, i32imm, imm>;
1845 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1846 ".min", atomic_load_min_i64_g, i64imm, imm, [hasSM<32>]>;
1847 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1848 ".min", atomic_load_min_i64_s, i64imm, imm, [hasSM<32>]>;
1849 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1850 atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
1851 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1852 ".s64", ".min", atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
1853 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1854 ".min", atomic_load_umin_i32_g, i32imm, imm>;
1855 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1856 ".min", atomic_load_umin_i32_s, i32imm, imm>;
1857 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1858 atomic_load_umin_i32_gen, i32imm, imm>;
1859 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1860 ".u32", ".min", atomic_load_umin_i32_gen, i32imm, imm>;
1861 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1862 ".min", atomic_load_umin_i64_g, i64imm, imm, [hasSM<32>]>;
1863 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1864 ".min", atomic_load_umin_i64_s, i64imm, imm, [hasSM<32>]>;
1865 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1866 atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
1867 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1868 ".u64", ".min", atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
1870 // atom_inc atom_dec
1872 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1873 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1874 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1875 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1876 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1877 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1878 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1879 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1880 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1881 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1882 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1883 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1885 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1886 atomic_load_inc_32_g, i32imm, imm>;
1887 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1888 atomic_load_inc_32_s, i32imm, imm>;
1889 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1890 atomic_load_inc_32_gen, i32imm, imm>;
1891 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1892 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1893 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1894 atomic_load_dec_32_g, i32imm, imm>;
1895 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1896 atomic_load_dec_32_s, i32imm, imm>;
1897 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1898 atomic_load_dec_32_gen, i32imm, imm>;
1899 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1900 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1904 def atomic_load_and_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1905 (atomic_load_and_i32 node:$a, node:$b)>;
1906 def atomic_load_and_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1907 (atomic_load_and_i32 node:$a, node:$b)>;
1908 def atomic_load_and_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1909 (atomic_load_and_i32 node:$a, node:$b)>;
1910 def atomic_load_and_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1911 (atomic_load_and_i64 node:$a, node:$b)>;
1912 def atomic_load_and_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1913 (atomic_load_and_i64 node:$a, node:$b)>;
1914 def atomic_load_and_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1915 (atomic_load_and_i64 node:$a, node:$b)>;
1917 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1918 atomic_load_and_i32_g, i32imm, imm>;
1919 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1920 atomic_load_and_i32_s, i32imm, imm>;
1921 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1922 atomic_load_and_i32_gen, i32imm, imm>;
1923 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1924 ".and", atomic_load_and_i32_gen, i32imm, imm>;
1925 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1926 atomic_load_and_i64_g, i64imm, imm, [hasSM<32>]>;
1927 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1928 atomic_load_and_i64_s, i64imm, imm, [hasSM<32>]>;
1929 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1930 atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
1931 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1932 ".and", atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
1936 def atomic_load_or_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1937 (atomic_load_or_i32 node:$a, node:$b)>;
1938 def atomic_load_or_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1939 (atomic_load_or_i32 node:$a, node:$b)>;
1940 def atomic_load_or_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1941 (atomic_load_or_i32 node:$a, node:$b)>;
1942 def atomic_load_or_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1943 (atomic_load_or_i64 node:$a, node:$b)>;
1944 def atomic_load_or_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1945 (atomic_load_or_i64 node:$a, node:$b)>;
1946 def atomic_load_or_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1947 (atomic_load_or_i64 node:$a, node:$b)>;
1949 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1950 atomic_load_or_i32_g, i32imm, imm>;
1951 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1952 atomic_load_or_i32_gen, i32imm, imm>;
1953 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1954 ".or", atomic_load_or_i32_gen, i32imm, imm>;
1955 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1956 atomic_load_or_i32_s, i32imm, imm>;
1957 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1958 atomic_load_or_i64_g, i64imm, imm, [hasSM<32>]>;
1959 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1960 atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
1961 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1962 ".or", atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
1963 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1964 atomic_load_or_i64_s, i64imm, imm, [hasSM<32>]>;
1968 def atomic_load_xor_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1969 (atomic_load_xor_i32 node:$a, node:$b)>;
1970 def atomic_load_xor_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1971 (atomic_load_xor_i32 node:$a, node:$b)>;
1972 def atomic_load_xor_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1973 (atomic_load_xor_i32 node:$a, node:$b)>;
1974 def atomic_load_xor_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1975 (atomic_load_xor_i64 node:$a, node:$b)>;
1976 def atomic_load_xor_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1977 (atomic_load_xor_i64 node:$a, node:$b)>;
1978 def atomic_load_xor_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1979 (atomic_load_xor_i64 node:$a, node:$b)>;
1981 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1982 atomic_load_xor_i32_g, i32imm, imm>;
1983 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1984 atomic_load_xor_i32_s, i32imm, imm>;
1985 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1986 atomic_load_xor_i32_gen, i32imm, imm>;
1987 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1988 ".xor", atomic_load_xor_i32_gen, i32imm, imm>;
1989 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1990 atomic_load_xor_i64_g, i64imm, imm, [hasSM<32>]>;
1991 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1992 atomic_load_xor_i64_s, i64imm, imm, [hasSM<32>]>;
1993 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1994 atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
1995 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1996 ".xor", atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
2000 def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2001 (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2002 def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2003 (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2004 def atomic_cmp_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2005 (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2006 def atomic_cmp_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2007 (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2008 def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2009 (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2010 def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2011 (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2013 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
2014 atomic_cmp_swap_i32_g, i32imm>;
2015 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
2016 atomic_cmp_swap_i32_s, i32imm>;
2017 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
2018 atomic_cmp_swap_i32_gen, i32imm>;
2019 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
2020 ".cas", atomic_cmp_swap_i32_gen, i32imm>;
2021 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
2022 atomic_cmp_swap_i64_g, i64imm>;
2023 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
2024 atomic_cmp_swap_i64_s, i64imm>;
2025 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
2026 atomic_cmp_swap_i64_gen, i64imm>;
2027 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
2028 ".cas", atomic_cmp_swap_i64_gen, i64imm>;
2030 // Support for scoped atomic operations. Matches
2031 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
2032 // and converts it into the appropriate instruction.
2033 // NOTE: not all possible combinations are implemented
2034 // 'space' is limited to generic as it's the only one needed to support CUDA.
2035 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
2036 class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
2037 dag ins, dag Operands>
2038 : NVPTXInst<(outs regclass:$result), ins,
2040 [(set (regT regclass:$result), Operands)]>,
2043 // Define instruction variants for all addressing modes.
2044 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
2045 ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2046 SDNode Imm, ValueType ImmTy,
2047 list<Predicate> Preds> {
2048 let AddedComplexity = 1 in {
2049 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2050 (ins Int16Regs:$src, regclass:$b),
2051 (Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
2052 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2053 (ins Int32Regs:$src, regclass:$b),
2054 (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
2055 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2056 (ins Int64Regs:$src, regclass:$b),
2057 (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
2059 // tablegen can't infer argument types from Intrinsic (though it can
2060 // from Instruction) so we have to enforce specific type on
2061 // immediates via explicit cast to ImmTy.
2062 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2063 (ins Int16Regs:$src, ImmType:$b),
2064 (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
2065 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2066 (ins Int32Regs:$src, ImmType:$b),
2067 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
2068 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2069 (ins Int64Regs:$src, ImmType:$b),
2070 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
2073 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
2074 ValueType regT, NVPTXRegClass regclass,
2075 Operand ImmType, SDNode Imm, ValueType ImmTy,
2076 list<Predicate> Preds> {
2077 // Variants for register/immediate permutations of $b and $c
2078 let AddedComplexity = 2 in {
2079 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2080 (ins Int32Regs:$src, regclass:$b, regclass:$c),
2081 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2082 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2083 (ins Int64Regs:$src, regclass:$b, regclass:$c),
2084 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2086 let AddedComplexity = 1 in {
2087 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2088 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
2089 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2090 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2091 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
2092 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2093 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2094 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2095 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2096 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2097 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2098 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2100 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2101 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2102 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2103 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2104 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2105 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2108 // Constructs intrinsic name and instruction asm strings.
2109 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2110 string ScopeStr, string SpaceStr,
2111 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2112 ValueType ImmTy, list<Predicate> Preds> {
2113 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2114 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2115 # "." # OpStr # "." # TypeStr
2116 # " \t$result, [$src], $b;",
2118 "int_nvvm_atomic_" # OpStr
2119 # "_" # SpaceStr # "_" # IntTypeStr
2120 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2121 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2123 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2124 string ScopeStr, string SpaceStr,
2125 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2126 ValueType ImmTy, list<Predicate> Preds> {
2127 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2128 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2129 # "." # OpStr # "." # TypeStr
2130 # " \t$result, [$src], $b, $c;",
2132 "int_nvvm_atomic_" # OpStr
2133 # "_" # SpaceStr # "_" # IntTypeStr
2134 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2135 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2138 // Constructs variants for different address spaces.
2139 // For now we only need variants for generic space pointers.
2140 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2141 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2142 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2143 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2144 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2146 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2147 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2148 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2149 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2150 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2153 // Constructs variants for different scopes of atomic op.
2154 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2155 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2156 ValueType ImmTy, list<Predicate> Preds> {
2157 // .gpu scope is default and is currently covered by existing
2158 // atomics w/o explicitly specified scope.
2159 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2160 regT, regclass, ImmType, Imm, ImmTy,
2161 !listconcat(Preds,[hasAtomScope])>;
2162 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2163 regT, regclass, ImmType, Imm, ImmTy,
2164 !listconcat(Preds,[hasAtomScope])>;
2166 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2167 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2168 list<Predicate> Preds> {
2169 // No need to define ".gpu"-scoped atomics. They do the same thing
2170 // as the regular, non-scoped atomics defined elsewhere.
2171 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2172 regT, regclass, ImmType, Imm, ImmTy,
2173 !listconcat(Preds,[hasAtomScope])>;
2174 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2175 regT, regclass, ImmType, Imm, ImmTy,
2176 !listconcat(Preds,[hasAtomScope])>;
2180 multiclass ATOM2_add_impl<string OpStr> {
2181 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2182 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2183 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2184 defm _bf16 : ATOM2S_impl<OpStr, "f", "bf16", bf16, Int16Regs, bf16imm, fpimm, bf16,
2185 [hasSM<90>, hasPTX<78>]>;
2186 defm _f16 : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
2187 [hasSM<70>, hasPTX<63>]>;
2188 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2190 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2194 // atom.{and,or,xor}
2195 multiclass ATOM2_bitwise_impl<string OpStr> {
2196 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2197 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2198 [hasAtomBitwise64]>;
2202 multiclass ATOM2_exch_impl<string OpStr> {
2203 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2204 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2208 multiclass ATOM2_minmax_impl<string OpStr> {
2209 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2210 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2211 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2213 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2218 multiclass ATOM2_incdec_impl<string OpStr> {
2219 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2223 multiclass ATOM3_cas_impl<string OpStr> {
2224 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2225 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2228 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2229 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2230 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2231 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2232 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2233 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2234 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2235 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2236 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
2237 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2239 //-----------------------------------
2240 // Support for ldu on sm_20 or later
2241 //-----------------------------------
2243 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
2244 // read-only in a kernel.
2248 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2249 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2250 !strconcat("ldu.global.", TyStr),
2251 []>, Requires<[hasLDU]>;
2252 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2253 !strconcat("ldu.global.", TyStr),
2254 []>, Requires<[hasLDU]>;
2255 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2256 !strconcat("ldu.global.", TyStr),
2257 []>, Requires<[hasLDU]>;
2258 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2259 !strconcat("ldu.global.", TyStr),
2260 []>, Requires<[hasLDU]>;
2261 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2262 !strconcat("ldu.global.", TyStr),
2263 []>, Requires<[hasLDU]>;
2266 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2267 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2268 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2269 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2270 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2271 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2275 // Elementized vector ldu
2276 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2277 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2278 (ins Int32Regs:$src),
2279 !strconcat("ldu.global.", TyStr), []>;
2280 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2281 (ins Int64Regs:$src),
2282 !strconcat("ldu.global.", TyStr), []>;
2283 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2285 !strconcat("ldu.global.", TyStr), []>;
2286 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2288 !strconcat("ldu.global.", TyStr), []>;
2289 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2291 !strconcat("ldu.global.", TyStr), []>;
2294 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2295 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2296 regclass:$dst4), (ins Int32Regs:$src),
2297 !strconcat("ldu.global.", TyStr), []>;
2298 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2299 regclass:$dst4), (ins Int64Regs:$src),
2300 !strconcat("ldu.global.", TyStr), []>;
2301 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2302 regclass:$dst4), (ins MEMri:$src),
2303 !strconcat("ldu.global.", TyStr), []>;
2304 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2305 regclass:$dst4), (ins MEMri64:$src),
2306 !strconcat("ldu.global.", TyStr), []>;
2307 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2308 regclass:$dst4), (ins imemAny:$src),
2309 !strconcat("ldu.global.", TyStr), []>;
2312 defm INT_PTX_LDU_G_v2i8_ELE
2313 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2314 defm INT_PTX_LDU_G_v2i16_ELE
2315 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2316 defm INT_PTX_LDU_G_v2i32_ELE
2317 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2318 defm INT_PTX_LDU_G_v2f32_ELE
2319 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2320 defm INT_PTX_LDU_G_v2i64_ELE
2321 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2322 defm INT_PTX_LDU_G_v2f64_ELE
2323 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2324 defm INT_PTX_LDU_G_v4i8_ELE
2325 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2326 defm INT_PTX_LDU_G_v4i16_ELE
2327 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2329 defm INT_PTX_LDU_G_v4i32_ELE
2330 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2332 defm INT_PTX_LDU_G_v4f16_ELE
2333 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2335 defm INT_PTX_LDU_G_v4f16x2_ELE
2336 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2338 defm INT_PTX_LDU_G_v4f32_ELE
2339 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2343 //-----------------------------------
2344 // Support for ldg on sm_35 or later
2345 //-----------------------------------
2347 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
2348 // non-coherent texture cache, and therefore the values read must be read-only
2349 // during the lifetime of the kernel.
2351 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2352 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2353 !strconcat("ld.global.nc.", TyStr),
2354 []>, Requires<[hasLDG]>;
2355 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2356 !strconcat("ld.global.nc.", TyStr),
2357 []>, Requires<[hasLDG]>;
2358 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2359 !strconcat("ld.global.nc.", TyStr),
2360 []>, Requires<[hasLDG]>;
2361 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2362 !strconcat("ld.global.nc.", TyStr),
2363 []>, Requires<[hasLDG]>;
2364 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2365 !strconcat("ld.global.nc.", TyStr),
2366 []>, Requires<[hasLDG]>;
2369 defm INT_PTX_LDG_GLOBAL_i8
2370 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2371 defm INT_PTX_LDG_GLOBAL_i16
2372 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2373 defm INT_PTX_LDG_GLOBAL_i32
2374 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2375 defm INT_PTX_LDG_GLOBAL_i64
2376 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2377 defm INT_PTX_LDG_GLOBAL_f32
2378 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2379 defm INT_PTX_LDG_GLOBAL_f64
2380 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2384 // Elementized vector ldg
2385 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2386 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2387 (ins Int32Regs:$src),
2388 !strconcat("ld.global.nc.", TyStr), []>;
2389 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2390 (ins Int64Regs:$src),
2391 !strconcat("ld.global.nc.", TyStr), []>;
2392 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2394 !strconcat("ld.global.nc.", TyStr), []>;
2395 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2397 !strconcat("ld.global.nc.", TyStr), []>;
2398 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2400 !strconcat("ld.global.nc.", TyStr), []>;
2403 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2404 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2405 regclass:$dst4), (ins Int32Regs:$src),
2406 !strconcat("ld.global.nc.", TyStr), []>;
2407 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2408 regclass:$dst4), (ins Int64Regs:$src),
2409 !strconcat("ld.global.nc.", TyStr), []>;
2410 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2411 regclass:$dst4), (ins MEMri:$src),
2412 !strconcat("ld.global.nc.", TyStr), []>;
2413 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2414 regclass:$dst4), (ins MEMri64:$src),
2415 !strconcat("ld.global.nc.", TyStr), []>;
2416 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2417 regclass:$dst4), (ins imemAny:$src),
2418 !strconcat("ld.global.nc.", TyStr), []>;
2421 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2422 defm INT_PTX_LDG_G_v2i8_ELE
2423 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2424 defm INT_PTX_LDG_G_v2i16_ELE
2425 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2426 defm INT_PTX_LDG_G_v2i32_ELE
2427 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2428 defm INT_PTX_LDG_G_v2f32_ELE
2429 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2430 defm INT_PTX_LDG_G_v2i64_ELE
2431 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2432 defm INT_PTX_LDG_G_v2f64_ELE
2433 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2434 defm INT_PTX_LDG_G_v4i8_ELE
2435 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2436 defm INT_PTX_LDG_G_v4i16_ELE
2437 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2438 defm INT_PTX_LDG_G_v4i32_ELE
2439 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2440 defm INT_PTX_LDG_G_v4f32_ELE
2441 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2444 multiclass NG_TO_G<string Str, Intrinsic Intrin, Predicate ShortPtr> {
2445 def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2446 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2447 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2448 def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2449 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2450 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2451 def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2452 "{{ .reg .b64 %tmp;\n\t"
2453 #" cvt.u64.u32 \t%tmp, $src;\n\t"
2454 #" cvta." # Str # ".u64 \t$result, %tmp; }}",
2455 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2456 Requires<[ShortPtr]>;
2459 multiclass G_TO_NG<string Str, Intrinsic Intrin, Predicate ShortPtr> {
2460 def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2461 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2462 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2463 def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2464 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2465 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2466 def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2467 "{{ .reg .b64 %tmp;\n\t"
2468 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2469 #" cvt.u32.u64 \t$result, %tmp; }}",
2470 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2471 Requires<[ShortPtr]>;
2474 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>;
2475 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>;
2476 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>;
2477 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>;
2478 defm cvta_param : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>;
2480 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>;
2481 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>;
2482 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>;
2483 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>;
2485 // nvvm.ptr.gen.to.param
2486 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2487 (ins Int32Regs:$src),
2488 "mov.u32 \t$result, $src;",
2489 [(set Int32Regs:$result,
2490 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2491 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2492 (ins Int64Regs:$src),
2493 "mov.u64 \t$result, $src;",
2494 [(set Int64Regs:$result,
2495 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2498 // nvvm.move intrinsicc
2499 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2500 "mov.b16 \t$r, $s;",
2502 (int_nvvm_move_i16 Int16Regs:$s))]>;
2503 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2504 "mov.b32 \t$r, $s;",
2506 (int_nvvm_move_i32 Int32Regs:$s))]>;
2507 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2508 "mov.b64 \t$r, $s;",
2510 (int_nvvm_move_i64 Int64Regs:$s))]>;
2511 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2512 "mov.f32 \t$r, $s;",
2513 [(set Float32Regs:$r,
2514 (int_nvvm_move_float Float32Regs:$s))]>;
2515 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2516 "mov.f64 \t$r, $s;",
2517 [(set Float64Regs:$r,
2518 (int_nvvm_move_double Float64Regs:$s))]>;
2519 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2520 "mov.u32 \t$r, $s;",
2522 (int_nvvm_move_ptr Int32Regs:$s))]>;
2523 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2524 "mov.u64 \t$r, $s;",
2526 (int_nvvm_move_ptr Int64Regs:$s))]>;
2528 // @TODO: Are these actually needed, or will we always just see symbols
2529 // copied to registers first?
2530 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2531 "mov.u32 \t$r, $s;",
2533 (int_nvvm_move_ptr texternalsym:$s))]>;
2534 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2535 "mov.u64 \t$r, $s;",
2537 (int_nvvm_move_ptr texternalsym:$s))]>;*/
2540 // MoveParam %r1, param
2541 // ptr_local_to_gen %r2, %r1
2542 // ptr_gen_to_local %r3, %r2
2546 // @TODO: Revisit this. There is a type
2547 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2548 // instructions are not currently defined. However, we can use the ptr
2549 // variants and the asm printer will do the right thing.
2550 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2551 (MoveParam texternalsym:$src)))),
2552 (nvvm_move_ptr64 texternalsym:$src)>;
2553 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2554 (MoveParam texternalsym:$src)))),
2555 (nvvm_move_ptr32 texternalsym:$src)>;
2558 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2559 "mov.u64 \t$result, $src;", []>;
2561 //-----------------------------------
2562 // Compiler Error Warn
2563 // - Just ignore them in codegen
2564 //-----------------------------------
2566 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2567 "// llvm.nvvm.compiler.warn()",
2568 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2569 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2570 "// llvm.nvvm.compiler.warn()",
2571 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2572 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2573 "// llvm.nvvm.compiler.error()",
2574 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2575 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2576 "// llvm.nvvm.compiler.error()",
2577 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2582 multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2583 def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2584 "isspacep." # suffix # "\t$d, $a;",
2585 [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2587 def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2588 "isspacep." # suffix # "\t$d, $a;",
2589 [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2593 defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2594 defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2595 defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>;
2596 defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2597 defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2598 int_nvvm_isspacep_shared_cluster,
2599 [hasPTX<78>, hasSM<90>]>;
2601 // Special register reads
2602 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2603 (ins SpecialRegs:$r),
2604 "mov.b32 \t$d, $r;", []>;
2606 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2607 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2608 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2609 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2610 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2611 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2612 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2613 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2614 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2615 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2616 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2617 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2618 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2619 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2620 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2621 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2622 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2623 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2624 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2625 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2626 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2627 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2628 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2629 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2630 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2631 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2632 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2633 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2634 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2635 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2636 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2637 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2640 // rotate builtin support
2642 def ROTATE_B32_HW_IMM
2643 : NVPTXInst<(outs Int32Regs:$dst),
2644 (ins Int32Regs:$src, i32imm:$amt),
2645 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2646 [(set Int32Regs:$dst,
2647 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2648 Requires<[hasHWROT32]> ;
2650 def ROTATE_B32_HW_REG
2651 : NVPTXInst<(outs Int32Regs:$dst),
2652 (ins Int32Regs:$src, Int32Regs:$amt),
2653 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2654 [(set Int32Regs:$dst,
2655 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2656 Requires<[hasHWROT32]> ;
2658 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2659 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2660 Requires<[noHWROT32]> ;
2662 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2663 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2664 Requires<[noHWROT32]> ;
2666 let hasSideEffects = false in {
2667 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2668 !strconcat("{{\n\t",
2669 ".reg .b32 %dummy;\n\t",
2670 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2674 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2675 !strconcat("{{\n\t",
2676 ".reg .b32 %dummy;\n\t",
2677 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2682 let hasSideEffects = false in {
2684 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2685 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2688 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2689 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2690 (GET_LO_INT64 Int64Regs:$src))> ;
2692 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2694 let hasSideEffects = false in {
2695 def SHF_L_WRAP_B32_IMM
2696 : NVPTXInst<(outs Int32Regs:$dst),
2697 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2698 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2699 Requires<[hasHWROT32]>;
2701 def SHF_L_WRAP_B32_REG
2702 : NVPTXInst<(outs Int32Regs:$dst),
2703 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2704 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2705 Requires<[hasHWROT32]>;
2707 def SHF_R_WRAP_B32_IMM
2708 : NVPTXInst<(outs Int32Regs:$dst),
2709 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2710 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2711 Requires<[hasHWROT32]>;
2713 def SHF_R_WRAP_B32_REG
2714 : NVPTXInst<(outs Int32Regs:$dst),
2715 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2716 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2717 Requires<[hasHWROT32]>;
2720 // HW version of rotate 64
2721 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2723 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2724 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2725 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2726 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2727 Requires<[hasHWROT32]>;
2729 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2731 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2732 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2733 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2734 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2735 Requires<[hasHWROT32]>;
2738 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2740 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2741 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2742 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2743 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2744 Requires<[hasHWROT32]>;
2746 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2748 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2749 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2750 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2751 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2752 Requires<[hasHWROT32]>;
2754 // SW version of rotate 64
2755 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2756 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2757 Requires<[noHWROT32]>;
2758 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2759 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2760 Requires<[noHWROT32]>;
2761 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2762 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2763 Requires<[noHWROT32]>;
2764 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2765 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2766 Requires<[noHWROT32]>;
2769 //-----------------------------------
2770 // Texture Intrinsics
2771 //-----------------------------------
2773 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2774 // also defined in NVPTXReplaceImageHandles.cpp
2776 // texmode_independent
2777 let IsTex = true, IsTexModeUnified = false in {
2778 // Texture fetch instructions using handles
2780 class TEX_1D_base<string inst, NVPTXRegClass outtype,
2781 NVPTXRegClass intype, dag texsamp>
2782 : NVPTXInst<(outs outtype:$r, outtype:$g,
2783 outtype:$b, outtype:$a),
2784 !con(texsamp, (ins intype:$x)),
2785 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2788 multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2789 def _RR : TEX_1D_base<inst, outtype, intype,
2790 (ins Int64Regs:$t, Int64Regs:$s)>;
2791 def _RI : TEX_1D_base<inst, outtype, intype,
2792 (ins Int64Regs:$t, i64imm:$s)>;
2793 def _IR : TEX_1D_base<inst, outtype, intype,
2794 (ins i64imm:$t, Int64Regs:$s)>;
2795 def _II : TEX_1D_base<inst, outtype, intype,
2796 (ins i64imm:$t, i64imm:$s)>;
2799 defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2800 defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2801 defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2802 defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2803 defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2804 defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2806 class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2807 NVPTXRegClass intype, dag texsamp>
2808 : NVPTXInst<(outs outtype:$r, outtype:$g,
2809 outtype:$b, outtype:$a),
2810 !con(texsamp, (ins intype:$x, intype:$lod)),
2811 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2814 multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2815 NVPTXRegClass intype> {
2816 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2817 (ins Int64Regs:$t, Int64Regs:$s)>;
2818 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2819 (ins Int64Regs:$t, i64imm:$s)>;
2820 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2821 (ins i64imm:$t, Int64Regs:$s)>;
2822 def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2823 (ins i64imm:$t, i64imm:$s)>;
2826 defm TEX_1D_F32_F32_LEVEL :
2827 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2828 defm TEX_1D_S32_F32_LEVEL :
2829 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2830 defm TEX_1D_U32_F32_LEVEL :
2831 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2833 class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2834 NVPTXRegClass intype, dag texsamp>
2835 : NVPTXInst<(outs outtype:$r, outtype:$g,
2836 outtype:$b, outtype:$a),
2837 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2838 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2839 " \\{$gradx\\}, \\{$grady\\};",
2842 multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2843 NVPTXRegClass intype> {
2844 def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2845 (ins Int64Regs:$t, Int64Regs:$s)>;
2846 def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2847 (ins Int64Regs:$t, i64imm:$s)>;
2848 def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2849 (ins i64imm:$t, Int64Regs:$s)>;
2850 def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2851 (ins i64imm:$t, i64imm:$s)>;
2854 defm TEX_1D_F32_F32_GRAD
2855 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2856 defm TEX_1D_S32_F32_GRAD
2857 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2858 defm TEX_1D_U32_F32_GRAD
2859 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2861 class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2862 NVPTXRegClass intype, dag texsamp>
2863 : NVPTXInst<(outs outtype:$r, outtype:$g,
2864 outtype:$b, outtype:$a),
2865 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2866 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2869 multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2870 NVPTXRegClass intype> {
2871 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2872 (ins Int64Regs:$t, Int64Regs:$s)>;
2873 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2874 (ins Int64Regs:$t, i64imm:$s)>;
2875 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2876 (ins i64imm:$t, Int64Regs:$s)>;
2877 def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2878 (ins i64imm:$t, i64imm:$s)>;
2881 defm TEX_1D_ARRAY_F32_F32
2882 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2883 defm TEX_1D_ARRAY_F32_S32
2884 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2885 defm TEX_1D_ARRAY_S32_S32
2886 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2887 defm TEX_1D_ARRAY_S32_F32
2888 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2889 defm TEX_1D_ARRAY_U32_S32
2890 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2891 defm TEX_1D_ARRAY_U32_F32
2892 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2894 class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2895 NVPTXRegClass intype, dag texsamp>
2896 : NVPTXInst<(outs outtype:$r, outtype:$g,
2897 outtype:$b, outtype:$a),
2898 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2899 inst # " \t\\{$r, $g, $b, $a\\},"
2900 " [$t, $s, \\{$l, $x\\}], $lod;",
2903 multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2904 NVPTXRegClass intype> {
2905 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2906 (ins Int64Regs:$t, Int64Regs:$s)>;
2907 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2908 (ins Int64Regs:$t, i64imm:$s)>;
2909 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2910 (ins i64imm:$t, Int64Regs:$s)>;
2911 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2912 (ins i64imm:$t, i64imm:$s)>;
2915 defm TEX_1D_ARRAY_F32_F32_LEVEL
2916 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2917 defm TEX_1D_ARRAY_S32_F32_LEVEL
2918 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2919 defm TEX_1D_ARRAY_U32_F32_LEVEL
2920 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2922 class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2923 NVPTXRegClass intype, dag texsamp>
2924 : NVPTXInst<(outs outtype:$r, outtype:$g,
2925 outtype:$b, outtype:$a),
2926 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2927 intype:$gradx, intype:$grady)),
2928 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2929 " \\{$gradx\\}, \\{$grady\\};",
2932 multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2933 NVPTXRegClass intype> {
2934 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2935 (ins Int64Regs:$t, Int64Regs:$s)>;
2936 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2937 (ins Int64Regs:$t, i64imm:$s)>;
2938 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2939 (ins i64imm:$t, Int64Regs:$s)>;
2940 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2941 (ins i64imm:$t, i64imm:$s)>;
2944 defm TEX_1D_ARRAY_F32_F32_GRAD
2945 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2946 defm TEX_1D_ARRAY_S32_F32_GRAD
2947 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2948 defm TEX_1D_ARRAY_U32_F32_GRAD
2949 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2951 class TEX_2D_base<string inst, NVPTXRegClass outtype,
2952 NVPTXRegClass intype, dag texsamp>
2953 : NVPTXInst<(outs outtype:$r, outtype:$g,
2954 outtype:$b, outtype:$a),
2955 !con(texsamp, (ins intype:$x, intype:$y)),
2956 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2959 multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2960 def _RR : TEX_2D_base<inst, outtype, intype,
2961 (ins Int64Regs:$t, Int64Regs:$s)>;
2962 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2963 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2964 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2967 defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2968 defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2969 defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2970 defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2971 defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2972 defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2974 class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2975 NVPTXRegClass intype, dag texsamp>
2976 : NVPTXInst<(outs outtype:$r, outtype:$g,
2977 outtype:$b, outtype:$a),
2978 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2979 inst # " \t\\{$r, $g, $b, $a\\},"
2980 " [$t, $s, \\{$x, $y\\}], $lod;",
2983 multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2984 NVPTXRegClass intype> {
2985 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2986 (ins Int64Regs:$t, Int64Regs:$s)>;
2987 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2988 (ins Int64Regs:$t, i64imm:$s)>;
2989 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2990 (ins i64imm:$t, Int64Regs:$s)>;
2991 def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2992 (ins i64imm:$t, i64imm:$s)>;
2995 defm TEX_2D_F32_F32_LEVEL :
2996 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2997 defm TEX_2D_S32_F32_LEVEL :
2998 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2999 defm TEX_2D_U32_F32_LEVEL :
3000 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3002 class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3003 NVPTXRegClass intype, dag texsamp>
3004 : NVPTXInst<(outs outtype:$r, outtype:$g,
3005 outtype:$b, outtype:$a),
3006 !con(texsamp, (ins intype:$x, intype:$y,
3007 intype:$gradx0, intype:$gradx1,
3008 intype:$grady0, intype:$grady1)),
3009 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
3010 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3013 multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
3014 NVPTXRegClass intype> {
3015 def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
3016 (ins Int64Regs:$t, Int64Regs:$s)>;
3017 def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
3018 (ins Int64Regs:$t, i64imm:$s)>;
3019 def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
3020 (ins i64imm:$t, Int64Regs:$s)>;
3021 def _II : TEX_2D_GRAD_base<inst, outtype, intype,
3022 (ins i64imm:$t, i64imm:$s)>;
3025 defm TEX_2D_F32_F32_GRAD :
3026 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3027 defm TEX_2D_S32_F32_GRAD :
3028 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3029 defm TEX_2D_U32_F32_GRAD :
3030 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3032 class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3033 NVPTXRegClass intype, dag texsamp>
3034 : NVPTXInst<(outs outtype:$r, outtype:$g,
3035 outtype:$b, outtype:$a),
3036 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
3037 inst # " \t\\{$r, $g, $b, $a\\},"
3038 " [$t, $s, \\{$l, $x, $y, $y\\}];",
3041 multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
3042 NVPTXRegClass intype> {
3043 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
3044 (ins Int64Regs:$t, Int64Regs:$s)>;
3045 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
3046 (ins Int64Regs:$t, i64imm:$s)>;
3047 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
3048 (ins i64imm:$t, Int64Regs:$s)>;
3049 def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
3050 (ins i64imm:$t, i64imm:$s)>;
3053 defm TEX_2D_ARRAY_F32_F32
3054 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3055 defm TEX_2D_ARRAY_F32_S32
3056 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3057 defm TEX_2D_ARRAY_S32_S32
3058 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3059 defm TEX_2D_ARRAY_S32_F32
3060 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3061 defm TEX_2D_ARRAY_U32_S32
3062 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3063 defm TEX_2D_ARRAY_U32_F32
3064 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3066 class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3067 NVPTXRegClass intype, dag texsamp>
3068 : NVPTXInst<(outs outtype:$r, outtype:$g,
3069 outtype:$b, outtype:$a),
3070 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3072 inst # " \t\\{$r, $g, $b, $a\\},"
3073 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
3076 multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3077 NVPTXRegClass intype> {
3078 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3079 (ins Int64Regs:$t, Int64Regs:$s)>;
3080 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3081 (ins Int64Regs:$t, i64imm:$s)>;
3082 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3083 (ins i64imm:$t, Int64Regs:$s)>;
3084 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3085 (ins i64imm:$t, i64imm:$s)>;
3088 defm TEX_2D_ARRAY_F32_F32_LEVEL
3089 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3090 defm TEX_2D_ARRAY_S32_F32_LEVEL
3091 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3092 defm TEX_2D_ARRAY_U32_F32_LEVEL
3093 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3095 class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3096 NVPTXRegClass intype, dag texsamp>
3097 : NVPTXInst<(outs outtype:$r, outtype:$g,
3098 outtype:$b, outtype:$a),
3099 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3100 intype:$gradx0, intype:$gradx1,
3101 intype:$grady0, intype:$grady1)),
3102 inst # " \t\\{$r, $g, $b, $a\\},"
3103 " [$t, $s, \\{$l, $x, $y, $y\\}],"
3104 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3107 multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3108 NVPTXRegClass intype> {
3109 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3110 (ins Int64Regs:$t, Int64Regs:$s)>;
3111 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3112 (ins Int64Regs:$t, i64imm:$s)>;
3113 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3114 (ins i64imm:$t, Int64Regs:$s)>;
3115 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3116 (ins i64imm:$t, i64imm:$s)>;
3119 defm TEX_2D_ARRAY_F32_F32_GRAD
3120 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3121 defm TEX_2D_ARRAY_S32_F32_GRAD
3122 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3123 defm TEX_2D_ARRAY_U32_F32_GRAD
3124 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3126 class TEX_3D_base<string inst, NVPTXRegClass outtype,
3127 NVPTXRegClass intype, dag texsamp>
3128 : NVPTXInst<(outs outtype:$r, outtype:$g,
3129 outtype:$b, outtype:$a),
3130 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3131 inst # " \t\\{$r, $g, $b, $a\\},"
3132 " [$t, $s, \\{$x, $y, $z, $z\\}];",
3135 multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3136 def _RR : TEX_3D_base<inst, outtype, intype,
3137 (ins Int64Regs:$t, Int64Regs:$s)>;
3138 def _RI : TEX_3D_base<inst, outtype, intype,
3139 (ins Int64Regs:$t, i64imm:$s)>;
3140 def _IR : TEX_3D_base<inst, outtype, intype,
3141 (ins i64imm:$t, Int64Regs:$s)>;
3142 def _II : TEX_3D_base<inst, outtype, intype,
3143 (ins i64imm:$t, i64imm:$s)>;
3146 defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3147 defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3148 defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3149 defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3150 defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3151 defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3153 class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3154 NVPTXRegClass intype, dag texsamp>
3155 : NVPTXInst<(outs outtype:$r, outtype:$g,
3156 outtype:$b, outtype:$a),
3157 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3159 inst # " \t\\{$r, $g, $b, $a\\},"
3160 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3163 multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3164 NVPTXRegClass intype> {
3165 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3166 (ins Int64Regs:$t, Int64Regs:$s)>;
3167 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3168 (ins Int64Regs:$t, i64imm:$s)>;
3169 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3170 (ins i64imm:$t, Int64Regs:$s)>;
3171 def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3172 (ins i64imm:$t, i64imm:$s)>;
3175 defm TEX_3D_F32_F32_LEVEL
3176 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3177 defm TEX_3D_S32_F32_LEVEL
3178 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3179 defm TEX_3D_U32_F32_LEVEL
3180 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3182 class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3183 NVPTXRegClass intype, dag texsamp>
3184 : NVPTXInst<(outs outtype:$r, outtype:$g,
3185 outtype:$b, outtype:$a),
3186 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3187 intype :$gradx0, intype:$gradx1,
3188 intype:$gradx2, intype:$grady0,
3189 intype:$grady1, intype:$grady2)),
3190 inst # " \t\\{$r, $g, $b, $a\\},"
3191 " [$t, $s, \\{$x, $y, $z, $z\\}],"
3192 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3193 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3196 multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3197 NVPTXRegClass intype> {
3198 def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3199 (ins Int64Regs:$t, Int64Regs:$s)>;
3200 def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3201 (ins Int64Regs:$t, i64imm:$s)>;
3202 def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3203 (ins i64imm:$t, Int64Regs:$s)>;
3204 def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3205 (ins i64imm:$t, i64imm:$s)>;
3208 defm TEX_3D_F32_F32_GRAD
3209 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3210 defm TEX_3D_S32_F32_GRAD
3211 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3212 defm TEX_3D_U32_F32_GRAD
3213 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3215 class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3216 NVPTXRegClass intype, dag texsamp>
3217 : NVPTXInst<(outs outtype:$r, outtype:$g,
3218 outtype:$b, outtype:$a),
3219 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3220 inst # " \t\\{$r, $g, $b, $a\\},"
3221 " [$t, $s, \\{$x, $y, $z, $z\\}];",
3224 multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3225 def _RR : TEX_CUBE_base<inst, outtype, intype,
3226 (ins Int64Regs:$t, Int64Regs:$s)>;
3227 def _RI : TEX_CUBE_base<inst, outtype, intype,
3228 (ins Int64Regs:$t, i64imm:$s)>;
3229 def _IR : TEX_CUBE_base<inst, outtype, intype,
3230 (ins i64imm:$t, Int64Regs:$s)>;
3231 def _II : TEX_CUBE_base<inst, outtype, intype,
3232 (ins i64imm:$t, i64imm:$s)>;
3235 defm TEX_CUBE_F32_F32
3236 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3237 defm TEX_CUBE_S32_F32
3238 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3239 defm TEX_CUBE_U32_F32
3240 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3242 class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3243 NVPTXRegClass intype, dag texsamp>
3244 : NVPTXInst<(outs outtype:$r, outtype:$g,
3245 outtype:$b, outtype:$a),
3246 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3248 inst # " \t\\{$r, $g, $b, $a\\},"
3249 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3252 multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3253 NVPTXRegClass intype> {
3254 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3255 (ins Int64Regs:$t, Int64Regs:$s)>;
3256 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3257 (ins Int64Regs:$t, i64imm:$s)>;
3258 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3259 (ins i64imm:$t, Int64Regs:$s)>;
3260 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3261 (ins i64imm:$t, i64imm:$s)>;
3264 defm TEX_CUBE_F32_F32_LEVEL
3265 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3266 defm TEX_CUBE_S32_F32_LEVEL
3267 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3268 defm TEX_CUBE_U32_F32_LEVEL
3269 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3271 class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3272 NVPTXRegClass intype, dag texsamp>
3273 : NVPTXInst<(outs outtype:$r, outtype:$g,
3274 outtype:$b, outtype:$a),
3275 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3277 inst # " \t\\{$r, $g, $b, $a\\},"
3278 " [$t, $s, \\{$l, $x, $y, $z\\}];",
3281 multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3282 NVPTXRegClass intype> {
3283 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3284 (ins Int64Regs:$t, Int64Regs:$s)>;
3285 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3286 (ins Int64Regs:$t, i64imm:$s)>;
3287 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3288 (ins i64imm:$t, Int64Regs:$s)>;
3289 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3290 (ins i64imm:$t, i64imm:$s)>;
3293 defm TEX_CUBE_ARRAY_F32_F32
3294 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3295 defm TEX_CUBE_ARRAY_S32_F32
3296 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3297 defm TEX_CUBE_ARRAY_U32_F32
3298 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3300 class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3301 NVPTXRegClass intype, dag texsamp>
3302 : NVPTXInst<(outs outtype:$r, outtype:$g,
3303 outtype:$b, outtype:$a),
3304 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3305 intype:$z, intype:$lod)),
3306 inst # " \t\\{$r, $g, $b, $a\\},"
3307 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3310 multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3311 NVPTXRegClass intype> {
3312 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3313 (ins Int64Regs:$t, Int64Regs:$s)>;
3314 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3315 (ins Int64Regs:$t, i64imm:$s)>;
3316 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3317 (ins i64imm:$t, Int64Regs:$s)>;
3318 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3319 (ins i64imm:$t, i64imm:$s)>;
3322 defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3323 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3324 Float32Regs, Float32Regs>;
3325 defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3326 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3327 Int32Regs, Float32Regs>;
3328 defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3329 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3330 Int32Regs, Float32Regs>;
3332 class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3333 NVPTXRegClass intype, dag texsamp>
3334 : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3335 outtype:$v2, outtype:$v3),
3336 !con(texsamp, (ins intype:$x, intype:$y)),
3337 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3340 multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3341 def _RR : TLD4_2D_base<inst, outtype, intype,
3342 (ins Int64Regs:$t, Int64Regs:$s)>;
3343 def _RI : TLD4_2D_base<inst, outtype, intype,
3344 (ins Int64Regs:$t, i64imm:$s)>;
3345 def _IR : TLD4_2D_base<inst, outtype, intype,
3346 (ins i64imm:$t, Int64Regs:$s)>;
3347 def _II : TLD4_2D_base<inst, outtype, intype,
3348 (ins i64imm:$t, i64imm:$s)>;
3351 defm TLD4_R_2D_F32_F32
3352 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3353 defm TLD4_G_2D_F32_F32
3354 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3355 defm TLD4_B_2D_F32_F32
3356 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3357 defm TLD4_A_2D_F32_F32
3358 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3360 defm TLD4_R_2D_S32_F32
3361 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3362 defm TLD4_G_2D_S32_F32
3363 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3364 defm TLD4_B_2D_S32_F32
3365 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3366 defm TLD4_A_2D_S32_F32
3367 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3369 defm TLD4_R_2D_U32_F32
3370 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3371 defm TLD4_G_2D_U32_F32
3372 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3373 defm TLD4_B_2D_U32_F32
3374 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3375 defm TLD4_A_2D_U32_F32
3376 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3382 let IsTex = true, IsTexModeUnified = true in {
3383 // Texture fetch instructions using handles
3385 class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3386 NVPTXRegClass intype, dag tex>
3387 : NVPTXInst<(outs outtype:$r, outtype:$g,
3388 outtype:$b, outtype:$a),
3389 !con(tex, (ins intype:$x)),
3390 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3393 multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3394 NVPTXRegClass intype> {
3395 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3396 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3399 defm TEX_UNIFIED_1D_F32_S32
3400 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3401 defm TEX_UNIFIED_1D_F32_F32
3402 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3403 defm TEX_UNIFIED_1D_S32_S32
3404 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3405 defm TEX_UNIFIED_1D_S32_F32
3406 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3407 defm TEX_UNIFIED_1D_U32_S32
3408 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3409 defm TEX_UNIFIED_1D_U32_F32
3410 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3412 class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3413 NVPTXRegClass intype, dag tex>
3414 : NVPTXInst<(outs outtype:$r, outtype:$g,
3415 outtype:$b, outtype:$a),
3416 !con(tex, (ins intype:$x, intype:$lod)),
3417 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3420 multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3421 NVPTXRegClass intype> {
3422 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3423 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3426 defm TEX_UNIFIED_1D_F32_F32_LEVEL
3427 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3428 defm TEX_UNIFIED_1D_S32_F32_LEVEL
3429 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3430 defm TEX_UNIFIED_1D_U32_F32_LEVEL
3431 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3433 class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3434 NVPTXRegClass intype, dag tex>
3435 : NVPTXInst<(outs outtype:$r, outtype:$g,
3436 outtype:$b, outtype:$a),
3437 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3438 inst # " \t\\{$r, $g, $b, $a\\},"
3439 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3442 multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3443 NVPTXRegClass intype> {
3444 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3445 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3448 defm TEX_UNIFIED_1D_F32_F32_GRAD
3449 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3450 defm TEX_UNIFIED_1D_S32_F32_GRAD
3451 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3452 defm TEX_UNIFIED_1D_U32_F32_GRAD
3453 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3455 class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3456 NVPTXRegClass intype, dag tex>
3457 : NVPTXInst<(outs outtype:$r, outtype:$g,
3458 outtype:$b, outtype:$a),
3459 !con(tex, (ins Int32Regs:$l, intype:$x)),
3460 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3463 multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3464 NVPTXRegClass intype> {
3465 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3466 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3469 defm TEX_UNIFIED_1D_ARRAY_F32_S32
3470 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3471 defm TEX_UNIFIED_1D_ARRAY_F32_F32
3472 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3473 defm TEX_UNIFIED_1D_ARRAY_S32_S32
3474 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3475 defm TEX_UNIFIED_1D_ARRAY_S32_F32
3476 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3477 defm TEX_UNIFIED_1D_ARRAY_U32_S32
3478 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3479 defm TEX_UNIFIED_1D_ARRAY_U32_F32
3480 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3482 class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3483 NVPTXRegClass intype, dag tex>
3484 : NVPTXInst<(outs outtype:$r, outtype:$g,
3485 outtype:$b, outtype:$a),
3486 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3487 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3490 multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3491 NVPTXRegClass intype> {
3492 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3493 (ins Int64Regs:$t)>;
3494 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3498 defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3499 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3500 Float32Regs, Float32Regs>;
3501 defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3502 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3503 Int32Regs, Float32Regs>;
3504 defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3505 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3506 Int32Regs, Float32Regs>;
3508 class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3509 NVPTXRegClass intype, dag tex>
3510 : NVPTXInst<(outs outtype:$r, outtype:$g,
3511 outtype:$b, outtype:$a),
3512 !con(tex, (ins Int32Regs:$l, intype:$x,
3513 intype:$gradx, intype:$grady)),
3514 inst # " \t\\{$r, $g, $b, $a\\},"
3515 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3518 multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3519 NVPTXRegClass intype> {
3520 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3521 (ins Int64Regs:$t)>;
3522 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3526 defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3527 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3528 Float32Regs, Float32Regs>;
3529 defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3530 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3531 Int32Regs, Float32Regs>;
3532 defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3533 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3534 Int32Regs, Float32Regs>;
3536 class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3537 NVPTXRegClass intype, dag tex>
3538 : NVPTXInst<(outs outtype:$r, outtype:$g,
3539 outtype:$b, outtype:$a),
3540 !con(tex, (ins intype:$x, intype:$y)),
3541 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3544 multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3545 NVPTXRegClass intype> {
3546 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3547 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3550 defm TEX_UNIFIED_2D_F32_S32
3551 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3552 defm TEX_UNIFIED_2D_F32_F32
3553 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3554 defm TEX_UNIFIED_2D_S32_S32
3555 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3556 defm TEX_UNIFIED_2D_S32_F32
3557 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3558 defm TEX_UNIFIED_2D_U32_S32
3559 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3560 defm TEX_UNIFIED_2D_U32_F32
3561 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3563 class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3564 NVPTXRegClass intype, dag tex>
3565 : NVPTXInst<(outs outtype:$r, outtype:$g,
3566 outtype:$b, outtype:$a),
3567 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3568 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3571 multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3572 NVPTXRegClass intype> {
3573 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3574 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3577 defm TEX_UNIFIED_2D_F32_F32_LEVEL
3578 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3579 defm TEX_UNIFIED_2D_S32_F32_LEVEL
3580 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3581 defm TEX_UNIFIED_2D_U32_F32_LEVEL
3582 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3584 class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3585 NVPTXRegClass intype, dag tex>
3586 : NVPTXInst<(outs outtype:$r, outtype:$g,
3587 outtype:$b, outtype:$a),
3588 !con(tex, (ins intype:$x, intype:$y,
3589 intype:$gradx0, intype:$gradx1,
3590 intype:$grady0, intype:$grady1)),
3591 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3592 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3594 multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3595 NVPTXRegClass intype> {
3596 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3597 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3600 defm TEX_UNIFIED_2D_F32_F32_GRAD
3601 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3602 defm TEX_UNIFIED_2D_S32_F32_GRAD
3603 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3604 defm TEX_UNIFIED_2D_U32_F32_GRAD
3605 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3607 class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3608 NVPTXRegClass intype, dag tex>
3609 : NVPTXInst<(outs outtype:$r, outtype:$g,
3610 outtype:$b, outtype:$a),
3611 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3612 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3614 multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3615 NVPTXRegClass intype> {
3616 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3617 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3620 defm TEX_UNIFIED_2D_ARRAY_F32_S32
3621 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3622 defm TEX_UNIFIED_2D_ARRAY_F32_F32
3623 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3624 defm TEX_UNIFIED_2D_ARRAY_S32_S32
3625 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3626 defm TEX_UNIFIED_2D_ARRAY_S32_F32
3627 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3628 defm TEX_UNIFIED_2D_ARRAY_U32_S32
3629 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3630 defm TEX_UNIFIED_2D_ARRAY_U32_F32
3631 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3633 class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3634 NVPTXRegClass intype, dag tex>
3635 : NVPTXInst<(outs outtype:$r, outtype:$g,
3636 outtype:$b, outtype:$a),
3637 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3639 inst # " \t\\{$r, $g, $b, $a\\},"
3640 " [$t, \\{$l, $x, $y, $y\\}], $lod;",
3642 multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3643 NVPTXRegClass intype> {
3644 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3645 (ins Int64Regs:$t)>;
3646 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3650 defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3651 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3652 Float32Regs, Float32Regs>;
3653 defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3654 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3655 Int32Regs, Float32Regs>;
3656 defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3657 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3658 Int32Regs, Float32Regs>;
3660 class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3661 NVPTXRegClass intype, dag tex>
3662 : NVPTXInst<(outs outtype:$r, outtype:$g,
3663 outtype:$b, outtype:$a),
3664 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3665 intype:$gradx0, intype:$gradx1,
3666 intype:$grady0, intype:$grady1)),
3667 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3668 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3670 multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3671 NVPTXRegClass intype> {
3672 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3673 (ins Int64Regs:$t)>;
3674 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3678 defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3679 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3680 Float32Regs, Float32Regs>;
3681 defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3682 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3683 Int32Regs, Float32Regs>;
3684 defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3685 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3686 Int32Regs, Float32Regs>;
3688 class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3689 NVPTXRegClass intype, dag tex>
3690 : NVPTXInst<(outs outtype:$r, outtype:$g,
3691 outtype:$b, outtype:$a),
3692 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3693 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3695 multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3696 NVPTXRegClass intype> {
3697 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3698 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3701 defm TEX_UNIFIED_3D_F32_S32
3702 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3703 defm TEX_UNIFIED_3D_F32_F32
3704 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3705 defm TEX_UNIFIED_3D_S32_S32
3706 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3707 defm TEX_UNIFIED_3D_S32_F32
3708 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3709 defm TEX_UNIFIED_3D_U32_S32
3710 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3711 defm TEX_UNIFIED_3D_U32_F32
3712 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3714 class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3715 NVPTXRegClass intype, dag tex>
3716 : NVPTXInst<(outs outtype:$r, outtype:$g,
3717 outtype:$b, outtype:$a),
3718 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3719 inst # " \t\\{$r, $g, $b, $a\\},"
3720 " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3722 multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3723 NVPTXRegClass intype> {
3724 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3725 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3728 defm TEX_UNIFIED_3D_F32_F32_LEVEL
3729 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3730 defm TEX_UNIFIED_3D_S32_F32_LEVEL
3731 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3732 defm TEX_UNIFIED_3D_U32_F32_LEVEL
3733 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3735 class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3736 NVPTXRegClass intype, dag tex>
3737 : NVPTXInst<(outs outtype:$r, outtype:$g,
3738 outtype:$b, outtype:$a),
3739 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3740 intype:$gradx0, intype:$gradx1,
3741 intype:$gradx2, intype:$grady0,
3742 intype:$grady1, intype:$grady2)),
3743 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3744 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3745 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3747 multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3748 NVPTXRegClass intype> {
3749 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3750 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3753 defm TEX_UNIFIED_3D_F32_F32_GRAD
3754 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3755 defm TEX_UNIFIED_3D_S32_F32_GRAD
3756 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3757 defm TEX_UNIFIED_3D_U32_F32_GRAD
3758 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3760 class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3761 NVPTXRegClass intype, dag tex>
3762 : NVPTXInst<(outs outtype:$r, outtype:$g,
3763 outtype:$b, outtype:$a),
3764 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3765 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3767 multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3768 NVPTXRegClass intype> {
3769 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3770 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3773 defm TEX_UNIFIED_CUBE_F32_F32
3774 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3775 defm TEX_UNIFIED_CUBE_S32_F32
3776 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3777 defm TEX_UNIFIED_CUBE_U32_F32
3778 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3780 class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3781 NVPTXRegClass intype, dag tex>
3782 : NVPTXInst<(outs outtype:$r, outtype:$g,
3783 outtype:$b, outtype:$a),
3784 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3785 inst # " \t\\{$r, $g, $b, $a\\},"
3786 " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3788 multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3789 NVPTXRegClass intype> {
3790 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3791 (ins Int64Regs:$t)>;
3792 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3796 defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3797 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3798 Float32Regs, Float32Regs>;
3799 defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3800 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3801 Int32Regs, Float32Regs>;
3802 defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3803 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3804 Int32Regs, Float32Regs>;
3806 class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3807 NVPTXRegClass intype, dag tex>
3808 : NVPTXInst<(outs outtype:$r, outtype:$g,
3809 outtype:$b, outtype:$a),
3810 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3811 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3813 multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3814 NVPTXRegClass intype> {
3815 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3816 (ins Int64Regs:$t)>;
3817 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3821 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3822 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3823 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3824 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3825 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3826 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3828 class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3829 NVPTXRegClass intype, dag tex>
3830 : NVPTXInst<(outs outtype:$r, outtype:$g,
3831 outtype:$b, outtype:$a),
3832 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3834 inst # " \t\\{$r, $g, $b, $a\\},"
3835 " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3837 multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3838 NVPTXRegClass intype> {
3839 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3840 (ins Int64Regs:$t)>;
3841 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3845 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3846 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3847 Float32Regs, Float32Regs>;
3848 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3849 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3850 Int32Regs, Float32Regs>;
3851 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3852 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3853 Int32Regs, Float32Regs>;
3855 class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
3856 NVPTXRegClass intype, dag tex>
3857 : NVPTXInst<(outs outtype:$r, outtype:$g,
3858 outtype:$b, outtype:$a),
3859 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3860 intype:$gradx0, intype:$gradx1,
3861 intype:$gradx2, intype:$grady0,
3862 intype:$grady1, intype:$grady2)),
3863 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3864 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3865 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3868 multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
3869 NVPTXRegClass intype> {
3870 def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3871 def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3874 defm TEX_UNIFIED_CUBE_F32_F32_GRAD
3875 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3876 defm TEX_UNIFIED_CUBE_S32_F32_GRAD
3877 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3878 defm TEX_UNIFIED_CUBE_U32_F32_GRAD
3879 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3881 class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3882 NVPTXRegClass intype, dag tex>
3883 : NVPTXInst<(outs outtype:$r, outtype:$g,
3884 outtype:$b, outtype:$a),
3885 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3886 intype:$gradx0, intype:$gradx1,
3887 intype:$gradx2, intype:$grady0,
3888 intype:$grady1, intype:$grady2)),
3889 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
3890 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3891 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3893 multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3894 NVPTXRegClass intype> {
3895 def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3896 (ins Int64Regs:$t)>;
3897 def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3901 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
3902 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
3903 Float32Regs, Float32Regs>;
3904 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
3905 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
3906 Int32Regs, Float32Regs>;
3907 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
3908 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
3909 Int32Regs, Float32Regs>;
3911 class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3912 NVPTXRegClass intype, dag tex>
3913 : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3914 outtype:$v2, outtype:$v3),
3915 !con(tex, (ins intype:$x, intype:$y)),
3916 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3918 multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3919 NVPTXRegClass intype> {
3920 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3921 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3924 defm TLD4_UNIFIED_R_2D_F32_F32
3925 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3926 defm TLD4_UNIFIED_G_2D_F32_F32
3927 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3928 defm TLD4_UNIFIED_B_2D_F32_F32
3929 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3930 defm TLD4_UNIFIED_A_2D_F32_F32
3931 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3933 defm TLD4_UNIFIED_R_2D_S32_F32
3934 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3935 defm TLD4_UNIFIED_G_2D_S32_F32
3936 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3937 defm TLD4_UNIFIED_B_2D_S32_F32
3938 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3939 defm TLD4_UNIFIED_A_2D_S32_F32
3940 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3942 defm TLD4_UNIFIED_R_2D_U32_F32
3943 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3944 defm TLD4_UNIFIED_G_2D_U32_F32
3945 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3946 defm TLD4_UNIFIED_B_2D_U32_F32
3947 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3948 defm TLD4_UNIFIED_A_2D_U32_F32
3949 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3955 //=== Surface load instructions
3957 let IsSuld = true in {
3959 class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3960 : NVPTXInst<(outs outtype:$r),
3961 !con(surf, (ins Int32Regs:$x)),
3962 inst # " \\{$r\\}, [$s, \\{$x\\}];",
3964 multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3965 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3966 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3969 defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3970 defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3971 defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3972 defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3974 defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3975 defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3976 defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3977 defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3979 defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3980 defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3981 defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3982 defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3984 class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3985 : NVPTXInst<(outs outtype:$r),
3986 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3987 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3989 multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3990 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3991 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3994 defm SULD_1D_ARRAY_I8_CLAMP
3995 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3996 defm SULD_1D_ARRAY_I16_CLAMP
3997 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3998 defm SULD_1D_ARRAY_I32_CLAMP
3999 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
4000 defm SULD_1D_ARRAY_I64_CLAMP
4001 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
4003 defm SULD_1D_ARRAY_I8_TRAP
4004 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
4005 defm SULD_1D_ARRAY_I16_TRAP
4006 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
4007 defm SULD_1D_ARRAY_I32_TRAP
4008 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
4009 defm SULD_1D_ARRAY_I64_TRAP
4010 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
4012 defm SULD_1D_ARRAY_I8_ZERO
4013 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
4014 defm SULD_1D_ARRAY_I16_ZERO
4015 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
4016 defm SULD_1D_ARRAY_I32_ZERO
4017 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
4018 defm SULD_1D_ARRAY_I64_ZERO
4019 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
4021 class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
4022 : NVPTXInst<(outs outtype:$r),
4023 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4024 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
4026 multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
4027 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
4028 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
4031 defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
4032 defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
4033 defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
4034 defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
4036 defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
4037 defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
4038 defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
4039 defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
4041 defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
4042 defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
4043 defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
4044 defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
4046 class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
4047 : NVPTXInst<(outs outtype:$r),
4048 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4049 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4051 multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
4052 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
4053 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
4056 defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
4057 defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
4058 defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
4059 defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
4061 defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
4062 defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
4063 defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
4064 defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
4066 defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
4067 defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
4068 defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
4069 defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
4071 class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
4072 : NVPTXInst<(outs outtype:$r),
4073 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4074 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4076 multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
4077 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
4078 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
4081 defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
4082 defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
4083 defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
4084 defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
4086 defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
4087 defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
4088 defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
4089 defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
4091 defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
4092 defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
4093 defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
4094 defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
4099 class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4100 : NVPTXInst<(outs outtype:$r, outtype:$g),
4101 !con(surf, (ins Int32Regs:$x)),
4102 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
4104 multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
4105 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4106 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
4109 defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
4110 defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
4111 defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
4112 defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
4114 defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
4115 defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
4116 defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
4117 defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
4119 defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
4120 defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
4121 defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
4122 defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
4124 class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4125 : NVPTXInst<(outs outtype:$r, outtype:$g),
4126 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4127 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4129 multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4130 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4131 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4134 defm SULD_1D_ARRAY_V2I8_CLAMP
4135 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
4136 defm SULD_1D_ARRAY_V2I16_CLAMP
4137 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
4138 defm SULD_1D_ARRAY_V2I32_CLAMP
4139 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
4140 defm SULD_1D_ARRAY_V2I64_CLAMP
4141 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
4143 defm SULD_1D_ARRAY_V2I8_TRAP
4144 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
4145 defm SULD_1D_ARRAY_V2I16_TRAP
4146 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
4147 defm SULD_1D_ARRAY_V2I32_TRAP
4148 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4149 defm SULD_1D_ARRAY_V2I64_TRAP
4150 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4152 defm SULD_1D_ARRAY_V2I8_ZERO
4153 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4154 defm SULD_1D_ARRAY_V2I16_ZERO
4155 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4156 defm SULD_1D_ARRAY_V2I32_ZERO
4157 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4158 defm SULD_1D_ARRAY_V2I64_ZERO
4159 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4161 class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4162 : NVPTXInst<(outs outtype:$r, outtype:$g),
4163 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4164 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4166 multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4167 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4168 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4171 defm SULD_2D_V2I8_CLAMP
4172 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4173 defm SULD_2D_V2I16_CLAMP
4174 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4175 defm SULD_2D_V2I32_CLAMP
4176 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4177 defm SULD_2D_V2I64_CLAMP
4178 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4180 defm SULD_2D_V2I8_TRAP
4181 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4182 defm SULD_2D_V2I16_TRAP
4183 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4184 defm SULD_2D_V2I32_TRAP
4185 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4186 defm SULD_2D_V2I64_TRAP
4187 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4189 defm SULD_2D_V2I8_ZERO
4190 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4191 defm SULD_2D_V2I16_ZERO
4192 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4193 defm SULD_2D_V2I32_ZERO
4194 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4195 defm SULD_2D_V2I64_ZERO
4196 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4198 class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4199 : NVPTXInst<(outs outtype:$r, outtype:$g),
4200 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4201 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4203 multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4204 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4205 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4208 defm SULD_2D_ARRAY_V2I8_CLAMP
4209 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4210 defm SULD_2D_ARRAY_V2I16_CLAMP
4211 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4212 defm SULD_2D_ARRAY_V2I32_CLAMP
4213 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4214 defm SULD_2D_ARRAY_V2I64_CLAMP
4215 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4217 defm SULD_2D_ARRAY_V2I8_TRAP
4218 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4219 defm SULD_2D_ARRAY_V2I16_TRAP
4220 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4221 defm SULD_2D_ARRAY_V2I32_TRAP
4222 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4223 defm SULD_2D_ARRAY_V2I64_TRAP
4224 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4226 defm SULD_2D_ARRAY_V2I8_ZERO
4227 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4228 defm SULD_2D_ARRAY_V2I16_ZERO
4229 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4230 defm SULD_2D_ARRAY_V2I32_ZERO
4231 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4232 defm SULD_2D_ARRAY_V2I64_ZERO
4233 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4235 class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4236 : NVPTXInst<(outs outtype:$r, outtype:$g),
4237 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4238 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4240 multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4241 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4242 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4245 defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4246 defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4247 defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4248 defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4250 defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4251 defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4252 defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4253 defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4255 defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4256 defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4257 defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4258 defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4264 class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4265 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4266 !con(surf, (ins Int32Regs:$x)),
4267 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4269 multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4270 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4271 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4274 defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4275 defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4276 defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4278 defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4279 defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4280 defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4282 defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4283 defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4284 defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4286 class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4287 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4288 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4289 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4291 multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4292 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4293 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4296 defm SULD_1D_ARRAY_V4I8_CLAMP
4297 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4298 defm SULD_1D_ARRAY_V4I16_CLAMP
4299 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4300 defm SULD_1D_ARRAY_V4I32_CLAMP
4301 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4303 defm SULD_1D_ARRAY_V4I8_TRAP
4304 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4305 defm SULD_1D_ARRAY_V4I16_TRAP
4306 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4307 defm SULD_1D_ARRAY_V4I32_TRAP
4308 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4310 defm SULD_1D_ARRAY_V4I8_ZERO
4311 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4312 defm SULD_1D_ARRAY_V4I16_ZERO
4313 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4314 defm SULD_1D_ARRAY_V4I32_ZERO
4315 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4317 class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4318 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4319 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4320 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4322 multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4323 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4324 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4327 defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4328 defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4329 defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4331 defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4332 defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4333 defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4335 defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4336 defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4337 defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4339 class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4340 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4341 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4342 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4344 multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4345 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4346 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4349 defm SULD_2D_ARRAY_V4I8_CLAMP
4350 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4351 defm SULD_2D_ARRAY_V4I16_CLAMP
4352 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4353 defm SULD_2D_ARRAY_V4I32_CLAMP
4354 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4356 defm SULD_2D_ARRAY_V4I8_TRAP
4357 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4358 defm SULD_2D_ARRAY_V4I16_TRAP
4359 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4360 defm SULD_2D_ARRAY_V4I32_TRAP
4361 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4363 defm SULD_2D_ARRAY_V4I8_ZERO
4364 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4365 defm SULD_2D_ARRAY_V4I16_ZERO
4366 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4367 defm SULD_2D_ARRAY_V4I32_ZERO
4368 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4370 class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4371 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4372 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4373 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4375 multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4376 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4377 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4380 defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4381 defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4382 defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4384 defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4385 defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4386 defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4388 defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4389 defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4390 defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4394 //-----------------------------------
4395 // Texture Query Intrinsics
4396 //-----------------------------------
4398 let IsSurfTexQuery = true in {
4399 def TXQ_CHANNEL_ORDER_R
4400 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4401 "txq.channel_order.b32 \t$d, [$a];",
4403 def TXQ_CHANNEL_ORDER_I
4404 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4405 "txq.channel_order.b32 \t$d, [$a];",
4407 def TXQ_CHANNEL_DATA_TYPE_R
4408 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4409 "txq.channel_data_type.b32 \t$d, [$a];",
4411 def TXQ_CHANNEL_DATA_TYPE_I
4412 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4413 "txq.channel_data_type.b32 \t$d, [$a];",
4416 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4417 "txq.width.b32 \t$d, [$a];",
4420 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4421 "txq.width.b32 \t$d, [$a];",
4424 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4425 "txq.height.b32 \t$d, [$a];",
4428 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4429 "txq.height.b32 \t$d, [$a];",
4432 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4433 "txq.depth.b32 \t$d, [$a];",
4436 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4437 "txq.depth.b32 \t$d, [$a];",
4439 def TXQ_ARRAY_SIZE_R
4440 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4441 "txq.array_size.b32 \t$d, [$a];",
4443 def TXQ_ARRAY_SIZE_I
4444 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4445 "txq.array_size.b32 \t$d, [$a];",
4447 def TXQ_NUM_SAMPLES_R
4448 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4449 "txq.num_samples.b32 \t$d, [$a];",
4451 def TXQ_NUM_SAMPLES_I
4452 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4453 "txq.num_samples.b32 \t$d, [$a];",
4455 def TXQ_NUM_MIPMAP_LEVELS_R
4456 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4457 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4459 def TXQ_NUM_MIPMAP_LEVELS_I
4460 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4461 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4465 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4466 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4467 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4468 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4469 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4470 (TXQ_WIDTH_R Int64Regs:$a)>;
4471 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4472 (TXQ_HEIGHT_R Int64Regs:$a)>;
4473 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4474 (TXQ_DEPTH_R Int64Regs:$a)>;
4475 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4476 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4477 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4478 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4479 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4480 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4483 //-----------------------------------
4484 // Surface Query Intrinsics
4485 //-----------------------------------
4487 let IsSurfTexQuery = true in {
4488 def SUQ_CHANNEL_ORDER_R
4489 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4490 "suq.channel_order.b32 \t$d, [$a];",
4492 def SUQ_CHANNEL_ORDER_I
4493 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4494 "suq.channel_order.b32 \t$d, [$a];",
4496 def SUQ_CHANNEL_DATA_TYPE_R
4497 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4498 "suq.channel_data_type.b32 \t$d, [$a];",
4500 def SUQ_CHANNEL_DATA_TYPE_I
4501 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4502 "suq.channel_data_type.b32 \t$d, [$a];",
4505 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4506 "suq.width.b32 \t$d, [$a];",
4509 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4510 "suq.width.b32 \t$d, [$a];",
4513 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4514 "suq.height.b32 \t$d, [$a];",
4517 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4518 "suq.height.b32 \t$d, [$a];",
4521 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4522 "suq.depth.b32 \t$d, [$a];",
4525 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4526 "suq.depth.b32 \t$d, [$a];",
4528 def SUQ_ARRAY_SIZE_R
4529 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4530 "suq.array_size.b32 \t$d, [$a];",
4532 def SUQ_ARRAY_SIZE_I
4533 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4534 "suq.array_size.b32 \t$d, [$a];",
4538 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4539 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4540 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4541 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4542 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4543 (SUQ_WIDTH_R Int64Regs:$a)>;
4544 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4545 (SUQ_HEIGHT_R Int64Regs:$a)>;
4546 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4547 (SUQ_DEPTH_R Int64Regs:$a)>;
4548 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4549 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4552 //===- Handle Query -------------------------------------------------------===//
4554 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4556 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4557 "istypep.samplerref \t$d, $a;",
4558 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4560 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4561 "istypep.surfref \t$d, $a;",
4562 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4564 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4565 "istypep.texref \t$d, $a;",
4566 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4568 //===- Surface Stores -----------------------------------------------------===//
4570 let IsSust = true in {
4572 class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4574 !con(surf, (ins Int32Regs:$x, intype:$r)),
4575 inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4577 multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4578 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4579 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4582 defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4583 defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4584 defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4585 defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4587 defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4588 defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4589 defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4590 defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4592 defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4593 defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4594 defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4595 defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4597 defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4598 defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4599 defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4601 class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4603 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4604 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4606 multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4607 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4608 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4611 defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4612 defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4613 defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4614 defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4616 defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4617 defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4618 defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4619 defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4621 defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4622 defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4623 defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4624 defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4626 defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4627 defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4628 defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4630 class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4632 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4633 intype:$b, intype:$a)),
4634 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4636 multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4637 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4638 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4641 defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4642 defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4643 defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4645 defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4646 defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4647 defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4649 defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4650 defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4651 defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4653 defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4654 defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4655 defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4657 class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4659 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4660 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4662 multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4663 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4664 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4667 defm SUST_B_1D_ARRAY_B8_CLAMP
4668 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4669 defm SUST_B_1D_ARRAY_B16_CLAMP
4670 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4671 defm SUST_B_1D_ARRAY_B32_CLAMP
4672 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4673 defm SUST_B_1D_ARRAY_B64_CLAMP
4674 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4676 defm SUST_B_1D_ARRAY_B8_TRAP
4677 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4678 defm SUST_B_1D_ARRAY_B16_TRAP
4679 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4680 defm SUST_B_1D_ARRAY_B32_TRAP
4681 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4682 defm SUST_B_1D_ARRAY_B64_TRAP
4683 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4685 defm SUST_B_1D_ARRAY_B8_ZERO
4686 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4687 defm SUST_B_1D_ARRAY_B16_ZERO
4688 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4689 defm SUST_B_1D_ARRAY_B32_ZERO
4690 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4691 defm SUST_B_1D_ARRAY_B64_ZERO
4692 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4694 defm SUST_P_1D_ARRAY_B8_TRAP
4695 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4696 defm SUST_P_1D_ARRAY_B16_TRAP
4697 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4698 defm SUST_P_1D_ARRAY_B32_TRAP
4699 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4701 class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4703 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4704 intype:$r, intype:$g)),
4705 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4707 multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4708 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4709 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4712 defm SUST_B_1D_ARRAY_V2B8_CLAMP
4713 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4714 defm SUST_B_1D_ARRAY_V2B16_CLAMP
4715 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4716 defm SUST_B_1D_ARRAY_V2B32_CLAMP
4717 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4718 defm SUST_B_1D_ARRAY_V2B64_CLAMP
4719 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4721 defm SUST_B_1D_ARRAY_V2B8_TRAP
4722 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4723 defm SUST_B_1D_ARRAY_V2B16_TRAP
4724 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4725 defm SUST_B_1D_ARRAY_V2B32_TRAP
4726 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4727 defm SUST_B_1D_ARRAY_V2B64_TRAP
4728 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4730 defm SUST_B_1D_ARRAY_V2B8_ZERO
4731 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4732 defm SUST_B_1D_ARRAY_V2B16_ZERO
4733 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4734 defm SUST_B_1D_ARRAY_V2B32_ZERO
4735 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4736 defm SUST_B_1D_ARRAY_V2B64_ZERO
4737 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4739 defm SUST_P_1D_ARRAY_V2B8_TRAP
4740 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4741 defm SUST_P_1D_ARRAY_V2B16_TRAP
4742 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4743 defm SUST_P_1D_ARRAY_V2B32_TRAP
4744 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4746 class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4748 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4749 intype:$r, intype:$g, intype:$b, intype:$a)),
4750 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4752 multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4753 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4754 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4757 defm SUST_B_1D_ARRAY_V4B8_CLAMP
4758 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4759 defm SUST_B_1D_ARRAY_V4B16_CLAMP
4760 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4761 defm SUST_B_1D_ARRAY_V4B32_CLAMP
4762 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4764 defm SUST_B_1D_ARRAY_V4B8_TRAP
4765 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4766 defm SUST_B_1D_ARRAY_V4B16_TRAP
4767 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4768 defm SUST_B_1D_ARRAY_V4B32_TRAP
4769 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4771 defm SUST_B_1D_ARRAY_V4B8_ZERO
4772 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4773 defm SUST_B_1D_ARRAY_V4B16_ZERO
4774 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4775 defm SUST_B_1D_ARRAY_V4B32_ZERO
4776 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4778 defm SUST_P_1D_ARRAY_V4B8_TRAP
4779 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4780 defm SUST_P_1D_ARRAY_V4B16_TRAP
4781 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4782 defm SUST_P_1D_ARRAY_V4B32_TRAP
4783 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4785 class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4787 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4788 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4790 multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4791 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4792 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4795 defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4796 defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4797 defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4798 defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4800 defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4801 defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4802 defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4803 defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4805 defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4806 defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4807 defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4808 defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4810 defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4811 defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4812 defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4814 class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4816 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4817 intype:$r, intype:$g)),
4818 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4820 multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4821 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4822 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4825 defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4826 defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4827 defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4828 defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4830 defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4831 defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4832 defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4833 defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4835 defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4836 defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4837 defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4838 defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4840 defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4841 defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4842 defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4844 class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4846 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4847 intype:$r, intype:$g, intype:$b, intype:$a)),
4848 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4850 multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4851 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4852 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4855 defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4856 defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4857 defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4859 defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4860 defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4861 defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4863 defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4864 defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4865 defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4867 defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4868 defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4869 defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4871 class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4873 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4875 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4877 multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4878 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4879 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4882 defm SUST_B_2D_ARRAY_B8_CLAMP
4883 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4884 defm SUST_B_2D_ARRAY_B16_CLAMP
4885 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4886 defm SUST_B_2D_ARRAY_B32_CLAMP
4887 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4888 defm SUST_B_2D_ARRAY_B64_CLAMP
4889 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4891 defm SUST_B_2D_ARRAY_B8_TRAP
4892 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4893 defm SUST_B_2D_ARRAY_B16_TRAP
4894 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4895 defm SUST_B_2D_ARRAY_B32_TRAP
4896 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4897 defm SUST_B_2D_ARRAY_B64_TRAP
4898 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4900 defm SUST_B_2D_ARRAY_B8_ZERO
4901 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4902 defm SUST_B_2D_ARRAY_B16_ZERO
4903 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4904 defm SUST_B_2D_ARRAY_B32_ZERO
4905 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4906 defm SUST_B_2D_ARRAY_B64_ZERO
4907 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4909 defm SUST_P_2D_ARRAY_B8_TRAP
4910 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4911 defm SUST_P_2D_ARRAY_B16_TRAP
4912 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4913 defm SUST_P_2D_ARRAY_B32_TRAP
4914 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4916 class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4918 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4919 intype:$r, intype:$g)),
4920 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4922 multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4923 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4924 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4927 defm SUST_B_2D_ARRAY_V2B8_CLAMP
4928 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4929 defm SUST_B_2D_ARRAY_V2B16_CLAMP
4930 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4931 defm SUST_B_2D_ARRAY_V2B32_CLAMP
4932 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4933 defm SUST_B_2D_ARRAY_V2B64_CLAMP
4934 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4936 defm SUST_B_2D_ARRAY_V2B8_TRAP
4937 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4938 defm SUST_B_2D_ARRAY_V2B16_TRAP
4939 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4940 defm SUST_B_2D_ARRAY_V2B32_TRAP
4941 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4942 defm SUST_B_2D_ARRAY_V2B64_TRAP
4943 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4945 defm SUST_B_2D_ARRAY_V2B8_ZERO
4946 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4947 defm SUST_B_2D_ARRAY_V2B16_ZERO
4948 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4949 defm SUST_B_2D_ARRAY_V2B32_ZERO
4950 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4951 defm SUST_B_2D_ARRAY_V2B64_ZERO
4952 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4954 defm SUST_P_2D_ARRAY_V2B8_TRAP
4955 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4956 defm SUST_P_2D_ARRAY_V2B16_TRAP
4957 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4958 defm SUST_P_2D_ARRAY_V2B32_TRAP
4959 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4961 class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4963 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4964 intype:$r, intype:$g, intype:$b, intype:$a)),
4965 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4967 multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4968 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4969 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4972 defm SUST_B_2D_ARRAY_V4B8_CLAMP
4973 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4974 defm SUST_B_2D_ARRAY_V4B16_CLAMP
4975 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4976 defm SUST_B_2D_ARRAY_V4B32_CLAMP
4977 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4979 defm SUST_B_2D_ARRAY_V4B8_TRAP
4980 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4981 defm SUST_B_2D_ARRAY_V4B16_TRAP
4982 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4983 defm SUST_B_2D_ARRAY_V4B32_TRAP
4984 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4986 defm SUST_B_2D_ARRAY_V4B8_ZERO
4987 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4988 defm SUST_B_2D_ARRAY_V4B16_ZERO
4989 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4990 defm SUST_B_2D_ARRAY_V4B32_ZERO
4991 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4993 defm SUST_P_2D_ARRAY_V4B8_TRAP
4994 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4995 defm SUST_P_2D_ARRAY_V4B16_TRAP
4996 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4997 defm SUST_P_2D_ARRAY_V4B32_TRAP
4998 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
5000 class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
5002 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5004 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5006 multiclass SUST_3D<string inst, NVPTXRegClass intype> {
5007 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
5008 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
5011 defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
5012 defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
5013 defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
5014 defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
5016 defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
5017 defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
5018 defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
5019 defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
5021 defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
5022 defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
5023 defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
5024 defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
5026 defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
5027 defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
5028 defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
5030 class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
5032 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5033 intype:$r, intype:$g)),
5034 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
5036 multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
5037 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
5038 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
5041 defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
5042 defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
5043 defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
5044 defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
5046 defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
5047 defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
5048 defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
5049 defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
5051 defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
5052 defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
5053 defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
5054 defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
5056 defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
5057 defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
5058 defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
5060 class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5062 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5063 intype:$r, intype:$g, intype:$b, intype:$a)),
5064 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
5066 multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
5067 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5068 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
5071 defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
5072 defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
5073 defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
5075 defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
5076 defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
5077 defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
5079 defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
5080 defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
5081 defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
5083 defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
5084 defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
5085 defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
5089 // Surface store instruction patterns
5090 // I'm not sure why we can't just include these in the instruction definitions,
5091 // but TableGen complains of type errors :(
5094 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5095 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5096 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5098 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5099 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5100 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5102 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5103 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5104 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5106 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5107 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5108 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5110 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5111 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5112 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5113 Int16Regs:$r, Int16Regs:$g)>;
5115 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5116 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5117 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5118 Int16Regs:$r, Int16Regs:$g)>;
5120 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5121 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5122 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5123 Int32Regs:$r, Int32Regs:$g)>;
5125 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5126 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5127 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5128 Int64Regs:$r, Int64Regs:$g)>;
5130 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5131 Int64Regs:$s, Int32Regs:$x,
5132 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5133 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5134 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5136 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5137 Int64Regs:$s, Int32Regs:$x,
5138 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5139 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5140 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5142 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5143 Int64Regs:$s, Int32Regs:$x,
5144 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5145 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5146 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5150 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5151 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5152 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5155 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5156 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5157 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5160 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5161 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5162 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5165 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5166 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5167 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5170 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5171 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5172 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5173 Int16Regs:$r, Int16Regs:$g)>;
5175 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5176 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5177 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5178 Int16Regs:$r, Int16Regs:$g)>;
5180 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5181 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5182 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5183 Int32Regs:$r, Int32Regs:$g)>;
5185 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5186 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5187 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5188 Int64Regs:$r, Int64Regs:$g)>;
5190 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5191 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5192 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5193 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5194 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5196 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5197 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5198 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5199 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5200 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5202 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5203 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5204 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5205 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5206 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5210 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5211 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5212 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5215 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5216 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5217 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5220 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5221 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5222 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5225 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5226 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5227 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5230 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5231 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5232 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5233 Int16Regs:$r, Int16Regs:$g)>;
5235 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5236 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5237 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5238 Int16Regs:$r, Int16Regs:$g)>;
5240 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5241 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5242 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5243 Int32Regs:$r, Int32Regs:$g)>;
5245 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5246 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5247 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5248 Int64Regs:$r, Int64Regs:$g)>;
5250 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5251 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5252 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5253 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5254 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5256 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5257 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5258 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5259 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5260 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5262 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5263 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5264 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5265 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5266 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5270 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5271 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5272 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5273 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5276 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5277 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5278 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5279 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5282 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5283 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5284 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5285 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5288 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5289 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5290 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5291 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5294 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5295 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5296 Int16Regs:$r, Int16Regs:$g),
5297 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5298 Int32Regs:$x, Int32Regs:$y,
5299 Int16Regs:$r, Int16Regs:$g)>;
5301 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5302 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5303 Int16Regs:$r, Int16Regs:$g),
5304 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5305 Int32Regs:$x, Int32Regs:$y,
5306 Int16Regs:$r, Int16Regs:$g)>;
5308 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5309 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5311 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5312 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5314 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5315 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5317 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5318 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5320 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5321 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5322 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5323 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5324 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5325 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5327 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5328 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5329 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5330 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5331 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5332 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5334 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5335 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5336 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5337 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5338 Int32Regs:$x, Int32Regs:$y,
5339 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5343 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5344 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5346 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5347 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5350 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5351 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5353 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5354 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5357 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5358 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5360 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5361 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5364 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5365 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5367 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5368 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5371 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5372 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5373 Int16Regs:$r, Int16Regs:$g),
5374 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5375 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5376 Int16Regs:$r, Int16Regs:$g)>;
5378 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5379 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5380 Int16Regs:$r, Int16Regs:$g),
5381 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5382 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5383 Int16Regs:$r, Int16Regs:$g)>;
5385 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5386 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5387 Int32Regs:$r, Int32Regs:$g),
5388 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5389 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5390 Int32Regs:$r, Int32Regs:$g)>;
5392 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5393 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5394 Int64Regs:$r, Int64Regs:$g),
5395 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5396 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5397 Int64Regs:$r, Int64Regs:$g)>;
5399 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5400 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5401 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5402 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5403 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5404 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5406 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5407 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5408 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5409 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5410 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5411 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5413 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5414 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5415 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5416 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5417 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5418 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5422 def : Pat<(int_nvvm_sust_b_1d_i8_trap
5423 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5424 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5426 def : Pat<(int_nvvm_sust_b_1d_i16_trap
5427 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5428 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5430 def : Pat<(int_nvvm_sust_b_1d_i32_trap
5431 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5432 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5434 def : Pat<(int_nvvm_sust_b_1d_i64_trap
5435 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5436 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5438 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5439 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5440 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5441 Int16Regs:$r, Int16Regs:$g)>;
5443 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5444 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5445 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5446 Int16Regs:$r, Int16Regs:$g)>;
5448 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5449 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5450 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5451 Int32Regs:$r, Int32Regs:$g)>;
5453 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5454 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5455 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5456 Int64Regs:$r, Int64Regs:$g)>;
5458 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5459 Int64Regs:$s, Int32Regs:$x,
5460 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5461 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5462 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5464 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5465 Int64Regs:$s, Int32Regs:$x,
5466 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5467 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5468 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5470 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5471 Int64Regs:$s, Int32Regs:$x,
5472 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5473 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5474 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5478 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5479 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5480 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5483 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5484 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5485 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5488 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5489 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5490 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5493 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5494 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5495 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5498 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5499 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5500 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5501 Int16Regs:$r, Int16Regs:$g)>;
5503 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5504 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5505 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5506 Int16Regs:$r, Int16Regs:$g)>;
5508 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5509 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5510 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5511 Int32Regs:$r, Int32Regs:$g)>;
5513 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5514 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5515 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5516 Int64Regs:$r, Int64Regs:$g)>;
5518 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5519 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5520 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5521 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5522 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5524 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5525 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5526 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5527 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5528 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5530 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5531 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5532 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5533 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5534 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5538 def : Pat<(int_nvvm_sust_b_2d_i8_trap
5539 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5540 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5543 def : Pat<(int_nvvm_sust_b_2d_i16_trap
5544 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5545 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5548 def : Pat<(int_nvvm_sust_b_2d_i32_trap
5549 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5550 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5553 def : Pat<(int_nvvm_sust_b_2d_i64_trap
5554 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5555 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5558 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5559 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5560 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5561 Int16Regs:$r, Int16Regs:$g)>;
5563 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5564 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5565 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5566 Int16Regs:$r, Int16Regs:$g)>;
5568 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5569 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5570 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5571 Int32Regs:$r, Int32Regs:$g)>;
5573 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5574 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5575 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5576 Int64Regs:$r, Int64Regs:$g)>;
5578 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5579 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5580 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5581 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5582 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5584 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5585 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5586 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5587 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5588 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5590 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5591 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5592 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5593 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5594 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5598 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5599 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5600 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5601 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5604 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5605 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5606 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5607 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5610 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5611 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5612 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5613 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5616 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5617 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5618 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5619 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5622 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5623 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5624 Int16Regs:$r, Int16Regs:$g),
5625 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5626 Int32Regs:$x, Int32Regs:$y,
5627 Int16Regs:$r, Int16Regs:$g)>;
5629 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5630 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5631 Int16Regs:$r, Int16Regs:$g),
5632 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5633 Int32Regs:$x, Int32Regs:$y,
5634 Int16Regs:$r, Int16Regs:$g)>;
5636 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5637 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5639 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5640 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5642 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5643 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5645 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5646 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5648 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5649 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5650 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5651 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5652 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5653 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5655 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5656 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5657 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5658 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5659 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5660 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5662 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5663 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5664 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5665 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5666 Int32Regs:$x, Int32Regs:$y,
5667 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5671 def : Pat<(int_nvvm_sust_b_3d_i8_trap
5672 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5674 (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5675 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5678 def : Pat<(int_nvvm_sust_b_3d_i16_trap
5679 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5681 (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5682 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5685 def : Pat<(int_nvvm_sust_b_3d_i32_trap
5686 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5688 (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5689 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5692 def : Pat<(int_nvvm_sust_b_3d_i64_trap
5693 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5695 (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5696 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5699 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5700 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5701 Int16Regs:$r, Int16Regs:$g),
5702 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5703 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5704 Int16Regs:$r, Int16Regs:$g)>;
5706 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5707 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5708 Int16Regs:$r, Int16Regs:$g),
5709 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5710 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5711 Int16Regs:$r, Int16Regs:$g)>;
5713 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5714 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5715 Int32Regs:$r, Int32Regs:$g),
5716 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5717 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5718 Int32Regs:$r, Int32Regs:$g)>;
5720 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5721 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5722 Int64Regs:$r, Int64Regs:$g),
5723 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5724 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5725 Int64Regs:$r, Int64Regs:$g)>;
5727 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5728 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5729 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5730 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5731 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5732 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5734 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5735 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5736 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5737 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5738 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5739 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5741 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5742 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5743 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5744 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5745 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5746 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5750 def : Pat<(int_nvvm_sust_b_1d_i8_zero
5751 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5752 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5754 def : Pat<(int_nvvm_sust_b_1d_i16_zero
5755 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5756 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5758 def : Pat<(int_nvvm_sust_b_1d_i32_zero
5759 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5760 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5762 def : Pat<(int_nvvm_sust_b_1d_i64_zero
5763 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5764 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5766 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5767 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5768 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5769 Int16Regs:$r, Int16Regs:$g)>;
5771 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5772 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5773 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5774 Int16Regs:$r, Int16Regs:$g)>;
5776 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5777 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5778 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5779 Int32Regs:$r, Int32Regs:$g)>;
5781 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5782 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5783 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5784 Int64Regs:$r, Int64Regs:$g)>;
5786 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5787 Int64Regs:$s, Int32Regs:$x,
5788 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5789 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5790 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5792 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5793 Int64Regs:$s, Int32Regs:$x,
5794 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5795 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5796 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5798 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5799 Int64Regs:$s, Int32Regs:$x,
5800 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5801 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5802 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5806 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5807 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5808 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5811 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5812 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5813 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5816 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5817 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5818 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5821 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5822 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5823 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5826 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5827 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5828 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5829 Int16Regs:$r, Int16Regs:$g)>;
5831 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5832 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5833 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5834 Int16Regs:$r, Int16Regs:$g)>;
5836 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5837 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5838 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5839 Int32Regs:$r, Int32Regs:$g)>;
5841 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5842 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5843 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5844 Int64Regs:$r, Int64Regs:$g)>;
5846 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5847 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5848 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5849 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5850 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5852 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5853 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5854 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5855 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5856 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5858 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5859 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5860 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5861 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5862 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5866 def : Pat<(int_nvvm_sust_b_2d_i8_zero
5867 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5868 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5871 def : Pat<(int_nvvm_sust_b_2d_i16_zero
5872 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5876 def : Pat<(int_nvvm_sust_b_2d_i32_zero
5877 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5878 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5881 def : Pat<(int_nvvm_sust_b_2d_i64_zero
5882 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5883 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5886 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5887 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5888 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5889 Int16Regs:$r, Int16Regs:$g)>;
5891 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5892 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5893 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5894 Int16Regs:$r, Int16Regs:$g)>;
5896 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5897 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5898 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5899 Int32Regs:$r, Int32Regs:$g)>;
5901 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5902 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5903 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5904 Int64Regs:$r, Int64Regs:$g)>;
5906 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5907 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5908 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5909 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5910 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5912 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5913 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5914 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5915 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5916 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5918 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5919 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5920 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5921 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5922 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5926 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5927 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5928 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5929 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5932 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5933 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5934 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5935 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5938 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5939 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5940 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5941 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5944 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5945 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5946 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5947 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5950 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5951 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5952 Int16Regs:$r, Int16Regs:$g),
5953 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5954 Int32Regs:$x, Int32Regs:$y,
5955 Int16Regs:$r, Int16Regs:$g)>;
5957 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5958 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5959 Int16Regs:$r, Int16Regs:$g),
5960 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5961 Int32Regs:$x, Int32Regs:$y,
5962 Int16Regs:$r, Int16Regs:$g)>;
5964 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5965 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5967 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5968 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5970 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5971 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5973 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5974 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5976 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5977 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5978 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5979 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5980 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5981 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5983 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5984 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5985 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5986 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5987 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5988 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5990 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5991 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5992 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5993 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5994 Int32Regs:$x, Int32Regs:$y,
5995 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5999 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6000 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6002 (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
6003 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6006 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6007 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6009 (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
6010 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6013 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6014 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6016 (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
6017 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6020 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6021 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6023 (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
6024 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6027 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6028 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6029 Int16Regs:$r, Int16Regs:$g),
6030 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
6031 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6032 Int16Regs:$r, Int16Regs:$g)>;
6034 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6035 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6036 Int16Regs:$r, Int16Regs:$g),
6037 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
6038 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6039 Int16Regs:$r, Int16Regs:$g)>;
6041 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6042 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6043 Int32Regs:$r, Int32Regs:$g),
6044 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
6045 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6046 Int32Regs:$r, Int32Regs:$g)>;
6048 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6049 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6050 Int64Regs:$r, Int64Regs:$g),
6051 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
6052 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6053 Int64Regs:$r, Int64Regs:$g)>;
6055 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6056 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6057 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6058 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
6059 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6060 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6062 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6063 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6064 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6065 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
6066 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6067 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6069 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6070 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6071 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6072 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
6073 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6074 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6079 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6080 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6081 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6083 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6084 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6085 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6087 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6088 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6089 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6091 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6092 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6093 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6094 Int16Regs:$r, Int16Regs:$g)>;
6096 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6097 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6098 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6099 Int16Regs:$r, Int16Regs:$g)>;
6101 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6102 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6103 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6104 Int32Regs:$r, Int32Regs:$g)>;
6106 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6107 Int64Regs:$s, Int32Regs:$x,
6108 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6109 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6110 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6112 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6113 Int64Regs:$s, Int32Regs:$x,
6114 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6115 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6116 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6118 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6119 Int64Regs:$s, Int32Regs:$x,
6120 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6121 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6122 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6126 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6127 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6128 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6131 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6133 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6136 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6137 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6138 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6141 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6143 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6144 Int16Regs:$r, Int16Regs:$g)>;
6146 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6147 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6148 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6149 Int16Regs:$r, Int16Regs:$g)>;
6151 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6152 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6153 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6154 Int32Regs:$r, Int32Regs:$g)>;
6156 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6157 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6158 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6159 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6160 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6162 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6163 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6164 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6165 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6166 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6168 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6169 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6170 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6171 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6172 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6176 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6178 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6181 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6183 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6186 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6188 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6191 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6192 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6193 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6194 Int16Regs:$r, Int16Regs:$g)>;
6196 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6197 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6198 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199 Int16Regs:$r, Int16Regs:$g)>;
6201 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6202 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6203 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6204 Int32Regs:$r, Int32Regs:$g)>;
6206 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6207 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6208 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6209 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6210 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6212 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6213 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6215 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6216 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6218 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6219 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6220 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6221 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6222 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6226 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6227 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6228 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6229 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6232 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6233 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6234 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6235 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6238 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6239 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6240 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6241 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6244 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6245 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6246 Int16Regs:$r, Int16Regs:$g),
6247 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6248 Int32Regs:$x, Int32Regs:$y,
6249 Int16Regs:$r, Int16Regs:$g)>;
6251 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6252 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6253 Int16Regs:$r, Int16Regs:$g),
6254 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6255 Int32Regs:$x, Int32Regs:$y,
6256 Int16Regs:$r, Int16Regs:$g)>;
6258 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6259 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6261 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6262 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6264 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6265 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6266 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6267 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6268 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6269 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6271 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6272 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6273 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6274 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6275 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6276 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6278 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6279 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6280 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6281 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6282 Int32Regs:$x, Int32Regs:$y,
6283 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6287 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6288 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6290 (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6291 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6294 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6295 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6297 (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6298 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6301 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6302 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304 (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6305 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6308 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6309 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6310 Int16Regs:$r, Int16Regs:$g),
6311 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6312 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6313 Int16Regs:$r, Int16Regs:$g)>;
6315 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6316 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6317 Int16Regs:$r, Int16Regs:$g),
6318 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6319 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6320 Int16Regs:$r, Int16Regs:$g)>;
6322 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6323 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6324 Int32Regs:$r, Int32Regs:$g),
6325 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6326 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6327 Int32Regs:$r, Int32Regs:$g)>;
6329 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6330 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6331 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6332 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6333 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6334 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6336 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6337 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6338 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6339 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6340 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6341 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6343 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6344 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6345 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6346 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6347 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6348 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6350 //-----------------------------------
6351 // Read Special Registers
6352 //-----------------------------------
6354 class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6355 : NVPTXInst<(outs Int64Regs:$d), (ins),
6356 !strconcat("mov.u64 \t$d, %", regname, ";"),
6357 [(set Int64Regs:$d, (intop))]>,
6360 class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6361 : NVPTXInst<(outs Int32Regs:$d), (ins),
6362 !strconcat("mov.u32 \t$d, %", regname, ";"),
6363 [(set Int32Regs:$d, (intop))]>,
6366 multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6367 foreach suffix = ["x", "y", "z", "w"] in {
6368 defvar reg = regname # "." # suffix;
6369 defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6370 def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>;
6374 // TODO Add read vector-version of special registers
6376 defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">;
6377 defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">;
6378 defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6379 defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6381 defm INT_PTX_SREG_CLUSTERID :
6382 PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6383 defm INT_PTX_SREG_NCLUSTERID :
6384 PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6385 defm INT_PTX_SREG_CLUSTER_CTAID :
6386 PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6387 defm INT_PTX_SREG_CLUSTER_NCTAID:
6388 PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6390 def INT_PTX_SREG_CLUSTER_CTARANK :
6391 PTX_READ_SREG_R32<"cluster_ctarank",
6392 int_nvvm_read_ptx_sreg_cluster_ctarank,
6393 [hasSM<90>, hasPTX<78>]>;
6394 def INT_PTX_SREG_CLUSTER_NCTARANK:
6395 PTX_READ_SREG_R32<"cluster_nctarank",
6396 int_nvvm_read_ptx_sreg_cluster_nctarank,
6397 [hasSM<90>, hasPTX<78>]>;
6400 def INT_PTX_SREG_LANEID :
6401 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6402 def INT_PTX_SREG_WARPID :
6403 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6404 def INT_PTX_SREG_NWARPID :
6405 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6406 def INT_PTX_SREG_SMID :
6407 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6408 def INT_PTX_SREG_NSMID :
6409 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6410 def INT_PTX_SREG_GRIDID :
6411 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6413 def INT_PTX_SREG_LANEMASK_EQ :
6414 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6415 def INT_PTX_SREG_LANEMASK_LE :
6416 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6417 def INT_PTX_SREG_LANEMASK_LT :
6418 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6419 def INT_PTX_SREG_LANEMASK_GE :
6420 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6421 def INT_PTX_SREG_LANEMASK_GT :
6422 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6424 let hasSideEffects = 1 in {
6425 def INT_PTX_SREG_CLOCK :
6426 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6427 def INT_PTX_SREG_CLOCK64 :
6428 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6429 def INT_PTX_SREG_GLOBALTIMER :
6430 PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
6433 def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
6434 def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;
6436 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6437 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6438 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6439 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6441 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6442 // handle the constant.
6443 def INT_PTX_SREG_WARPSIZE :
6444 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6445 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6447 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6448 // In addition to target-independent fields provided by WMMA_REGS, it adds
6449 // the fields commonly used to implement specific PTX instruction -- register
6450 // types and names, constraints, parts of assembly, etc.
6451 class WMMA_REGINFO<WMMA_REGS r, string op>
6452 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6453 // NVPTX register types used to carry fragment data.
6454 NVPTXRegClass regclass = !cond(
6455 !eq(ptx_elt_type, "f16") : Int32Regs,
6456 !eq(ptx_elt_type, "f32") : Float32Regs,
6457 !eq(ptx_elt_type, "f64") : Float64Regs,
6458 !eq(ptx_elt_type, "bf16") : Int32Regs,
6459 !eq(ptx_elt_type, "tf32") : Int32Regs,
6460 !eq(ptx_elt_type, "s32") : Int32Regs,
6461 !eq(ptx_elt_type, "b16") : Int32Regs,
6462 !eq(ptx_elt_type, "s8") : Int32Regs,
6463 !eq(ptx_elt_type, "u8") : Int32Regs,
6464 !eq(ptx_elt_type, "s4") : Int32Regs,
6465 !eq(ptx_elt_type, "u4") : Int32Regs,
6466 !eq(ptx_elt_type, "b1") : Int32Regs);
6468 // Instruction input/output arguments for the fragment.
6469 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6471 // List of register names for the fragment -- ["ra0", "ra1",...]
6472 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6474 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6475 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6477 // Predicates for particular fragment variant. Technically those are
6478 // per-instruction predicates, but currently all fragments that can be used in
6479 // a given instruction are subject to the same constraints, so an instruction
6480 // can use predicates from any of its fragments. If/when this is no
6481 // longer the case, we can concat all per-fragment predicates to enforce that
6482 // all fragments of the instruction are viable.
6483 list<Predicate> Predicates = !cond(
6484 // fp16 -> fp16/fp32 @ m16n16k16
6485 !and(!eq(geom, "m16n16k16"),
6486 !or(!eq(ptx_elt_type, "f16"),
6487 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6489 !and(!eq(geom,"m8n8k4"),
6490 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6492 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6493 !and(!or(!eq(geom, "m8n32k16"),
6494 !eq(geom, "m32n8k16")),
6495 !or(!eq(ptx_elt_type, "f16"),
6496 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6498 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6499 !and(!or(!eq(geom,"m16n16k16"),
6500 !eq(geom,"m8n32k16"),
6501 !eq(geom,"m32n8k16")),
6502 !or(!eq(ptx_elt_type, "u8"),
6503 !eq(ptx_elt_type, "s8"),
6504 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6506 !and(!or(!eq(geom,"m16n16k16"),
6507 !eq(geom,"m8n32k16"),
6508 !eq(geom,"m32n8k16")),
6509 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6511 !and(!eq(geom,"m16n16k8"),
6512 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6514 !and(!eq(geom,"m16n16k8"),
6515 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6517 // b1 -> s32 @ m8n8k128(b1)
6519 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6521 // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6523 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6525 !or(!eq(geom,"m16n8k8"),
6526 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6528 !and(!ne(ptx_elt_type,"f64"),
6529 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6531 // mma m8n8k32 requires higher PTX version
6533 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6535 !and(!eq(ptx_elt_type,"f64"),
6536 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6539 !or(!eq(geom, "m16n8k16"),
6540 !eq(geom, "m16n8k4"),
6541 !eq(geom, "m16n8k32"),
6542 !eq(geom, "m16n8k64"),
6543 !eq(geom, "m8n8k128"),
6544 !eq(geom, "m16n8k128"),
6545 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6547 !and(!eq(op,"ldmatrix"),
6548 !eq(ptx_elt_type,"b16"),
6549 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6551 // template DAGs for instruction inputs/output.
6552 dag Outs = !dag(outs, ptx_regs, reg_names);
6553 dag Ins = !dag(ins, ptx_regs, reg_names);
6556 // Convert dag of arguments into a dag to match given intrinsic.
6557 class BuildPatternI<Intrinsic Intr, dag Ins> {
6558 // Build a dag pattern that matches the intrinsic call.
6559 dag ret = !foreach(tmp, Ins,
6560 !subst(imem, ADDRvar,
6561 !subst(MEMri64, ADDRri64,
6562 !subst(MEMri, ADDRri,
6563 !subst(ins, Intr, tmp)))));
6566 // Same as above, but uses PatFrag instead of an Intrinsic.
6567 class BuildPatternPF<PatFrag Intr, dag Ins> {
6568 // Build a dag pattern that matches the intrinsic call.
6569 dag ret = !foreach(tmp, Ins,
6570 !subst(imem, ADDRvar,
6571 !subst(MEMri64, ADDRri64,
6572 !subst(MEMri, ADDRri,
6573 !subst(ins, Intr, tmp)))));
6576 // Common WMMA-related fields used for building patterns for all MMA instructions.
6577 class WMMA_INSTR<string _Intr, list<dag> _Args>
6578 : NVPTXInst<(outs), (ins), "?", []> {
6579 Intrinsic Intr = !cast<Intrinsic>(_Intr);
6580 // Concatenate all arguments into a single dag.
6581 dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6582 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6583 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6587 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6590 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6592 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6593 [!con((ins SrcOp:$src),
6594 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6595 Requires<Frag.Predicates> {
6596 // Load/store intrinsics are overloaded on pointer's address space.
6597 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6598 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6599 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6600 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6601 // Build PatFrag that only matches particular address space.
6602 PatFrag IntrFrag = PatFrag<PFOperands,
6604 !cond(!eq(Space, ".shared"): AS_match.shared,
6605 !eq(Space, ".global"): AS_match.global,
6606 true: AS_match.generic)>;
6607 // Build AS-constrained pattern.
6608 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6610 let OutOperandList = Frag.Outs;
6611 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6612 let AsmString = "wmma.load."
6619 # "." # Frag.ptx_elt_type # " \t"
6622 # !if(WithStride, ", $ldm", "")
6627 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6629 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6630 bit WithStride, DAGOperand DstOp>
6631 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6632 [!con((ins DstOp:$dst),
6634 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6635 Requires<Frag.Predicates> {
6637 // Load/store intrinsics are overloaded on pointer's address space.
6638 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6639 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6640 dag PFOperands = !con((ops node:$dst),
6641 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6642 !if(WithStride, (ops node:$ldm), (ops)));
6643 // Build PatFrag that only matches particular address space.
6644 PatFrag IntrFrag = PatFrag<PFOperands,
6645 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6646 !cond(!eq(Space, ".shared"): AS_match.shared,
6647 !eq(Space, ".global"): AS_match.global,
6648 true: AS_match.generic)>;
6649 // Build AS-constrained pattern.
6650 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6652 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6653 let OutOperandList = (outs);
6654 let AsmString = "wmma.store.d.sync"
6659 # "." # Frag.ptx_elt_type
6662 # !if(WithStride, ", $ldm", "")
6666 // Create all load/store variants
6667 defset list<WMMA_INSTR> MMA_LDSTs = {
6668 foreach layout = ["row", "col"] in {
6669 foreach stride = [false, true] in {
6670 foreach space = [".global", ".shared", ""] in {
6671 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6672 foreach frag = NVVM_MMA_OPS.all_ld_ops in
6673 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6674 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6675 foreach frag = NVVM_MMA_OPS.all_st_ops in
6676 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6677 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6684 // B1 instruction variants need extra constraints.
6685 class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6687 WMMA_REGINFO Frag = FragA;
6688 list<Predicate> ret = !listconcat(
6690 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6694 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6695 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6696 string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6697 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6698 [FragA.Ins, FragB.Ins, FragC.Ins]>,
6699 // Requires does not seem to have effect on Instruction w/o Patterns.
6700 // We set it here anyways and propagate to the Pat<> we construct below.
6701 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6702 let OutOperandList = FragD.Outs;
6703 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6704 string TypeList = !cond(
6705 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6706 # "." # FragC.ptx_elt_type,
6707 1: "." # FragD.ptx_elt_type
6708 # "." # FragA.ptx_elt_type
6709 # "." # FragB.ptx_elt_type
6710 # "." # FragC.ptx_elt_type,
6712 let AsmString = "wmma.mma"
6719 # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6721 # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6722 # FragD.regstring # ",\n\t\t"
6723 # FragA.regstring # ",\n\t\t"
6724 # FragB.regstring # ",\n\t\t"
6725 # FragC.regstring # ";";
6728 let isConvergent = true in {
6729 defset list<WMMA_INSTR> WMMAs = {
6730 foreach layout_a = ["row", "col"] in {
6731 foreach layout_b = ["row", "col"] in {
6732 foreach satf = [0, 1] in {
6733 foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6734 foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6735 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6736 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6737 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6738 WMMA_REGINFO<op[1], "wmma.mma">,
6739 WMMA_REGINFO<op[2], "wmma.mma">,
6740 WMMA_REGINFO<op[3], "wmma.mma">,
6741 layout_a, layout_b, satf, rnd, b1op>;
6753 class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6754 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6755 string ALayout, string BLayout, int Satfinite, string b1op>
6756 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6757 [FragA.Ins, FragB.Ins, FragC.Ins]>,
6758 // Requires does not seem to have effect on Instruction w/o Patterns.
6759 // We set it here anyways and propagate to the Pat<> we construct below.
6760 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6761 let OutOperandList = FragD.Outs;
6762 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6763 string TypeList = "." # FragD.ptx_elt_type
6764 # "." # FragA.ptx_elt_type
6765 # "." # FragB.ptx_elt_type
6766 # "." # FragC.ptx_elt_type;
6767 let AsmString = "mma.sync.aligned."
6771 # !if(Satfinite, ".satfinite", "")
6774 # FragD.regstring # ",\n\t\t"
6775 # FragA.regstring # ",\n\t\t"
6776 # FragB.regstring # ",\n\t\t"
6777 # FragC.regstring # ";";
6780 let isConvergent = true in {
6781 defset list<WMMA_INSTR> MMAs = {
6782 foreach layout_a = ["row", "col"] in {
6783 foreach layout_b = ["row", "col"] in {
6784 foreach satf = [0, 1] in {
6785 foreach op = NVVM_MMA_OPS.all_mma_ops in {
6786 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6787 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6788 def : MMA<WMMA_REGINFO<op[0], "mma">,
6789 WMMA_REGINFO<op[1], "mma">,
6790 WMMA_REGINFO<op[2], "mma">,
6791 WMMA_REGINFO<op[3], "mma">,
6792 layout_a, layout_b, satf, b1op>;
6803 // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6805 class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6807 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6808 Requires<Frag.Predicates> {
6809 // Build PatFrag that only matches particular address space.
6810 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6811 !cond(!eq(Space, ".shared"): AS_match.shared,
6812 true: AS_match.generic)>;
6813 // Build AS-constrained pattern.
6814 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6816 let OutOperandList = Frag.Outs;
6817 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6818 let AsmString = "ldmatrix.sync.aligned."
6821 # !if(Transposed, ".trans", "")
6823 # "." # Frag.ptx_elt_type
6824 # " " # Frag.regstring # ", [$src];";
6827 // Create all ldmatrix variants
6828 defset list<WMMA_INSTR> LDMATRIXs = {
6829 foreach transposed = [false, true] in {
6830 foreach space = [".shared", ""] in {
6831 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6832 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6833 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6834 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6841 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6842 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6843 // the instruction record.
6844 class MMA_PAT<WMMA_INSTR wi>
6845 : Pat<wi.IntrinsicPattern,
6846 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6848 Requires<wi.Predicates>;
6850 // Build intrinsic->instruction patterns for all MMA instructions.
6851 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6854 multiclass MAPA<string suffix, Intrinsic Intr> {
6855 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6856 "mapa" # suffix # ".u32\t$d, $a, $b;",
6857 [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6858 Requires<[hasSM<90>, hasPTX<78>]>;
6859 def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6860 "mapa" # suffix # ".u32\t$d, $a, $b;",
6861 [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6862 Requires<[hasSM<90>, hasPTX<78>]>;
6863 def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6864 "mapa" # suffix # ".u64\t$d, $a, $b;",
6865 [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6866 Requires<[hasSM<90>, hasPTX<78>]>;
6867 def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6868 "mapa" # suffix # ".u64\t$d, $a, $b;",
6869 [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6870 Requires<[hasSM<90>, hasPTX<78>]>;
6873 defm mapa : MAPA<"", int_nvvm_mapa>;
6874 defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6877 multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6878 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6879 "getctarank" # suffix # ".u32\t$d, $a;",
6880 [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6881 Requires<[hasSM<90>, hasPTX<78>]>;
6882 def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6883 "getctarank" # suffix # ".u64\t$d, $a;",
6884 [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6885 Requires<[hasSM<90>, hasPTX<78>]>;
6888 defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
6889 defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6891 def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6892 "mov.pred\t$d, %is_explicit_cluster;",
6893 [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6894 Requires<[hasSM<90>, hasPTX<78>]>;
6896 // setmaxnreg inc/dec intrinsics
6897 let isConvergent = true in {
6898 multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
6899 def : NVPTXInst<(outs), (ins i32imm:$reg_count),
6900 "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
6901 [(Intr timm:$reg_count)]>,
6902 Requires<[hasSM90a, hasPTX<80>]>;
6905 defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
6906 defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
6910 def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>;