1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10 float f = (float)N->getValueAPF().convertToFloat();
14 def immFloat1 : PatLeaf<(fpimm), [{
15 float f = (float)N->getValueAPF().convertToFloat();
19 def immDouble0 : PatLeaf<(fpimm), [{
20 double d = (double)N->getValueAPF().convertToDouble();
24 def immDouble1 : PatLeaf<(fpimm), [{
25 double d = (double)N->getValueAPF().convertToDouble();
31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
41 // A node that will be replaced with the current PTX version.
43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
46 // (i32 0) will be XForm'ed to the currently used PTX version.
47 dag version = (PTXVerXform (i32 0));
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55 [prefix # !sub(n, 1)]),
59 class THREADMASK_INFO<bit sync> {
60 list<bit> ret = !if(sync, [0, 1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = true in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
69 [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
72 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74 "bar.sync \t$src1, $src2;",
75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
78 ".reg .pred \t%p1; \n\t",
79 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
85 ".reg .pred \t%p1; \n\t",
86 ".reg .pred \t%p2; \n\t",
87 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
94 ".reg .pred \t%p1; \n\t",
95 ".reg .pred \t%p2; \n\t",
96 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103 [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106 [(int_nvvm_bar_warp_sync imm:$i)]>,
107 Requires<[hasPTX<60>, hasSM<30>]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110 Requires<[hasPTX<60>, hasSM<30>]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113 [(int_nvvm_barrier_sync imm:$i)]>,
114 Requires<[hasPTX<60>, hasSM<30>]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116 [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117 Requires<[hasPTX<60>, hasSM<30>]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120 "barrier.sync \t$id, $cnt;",
121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122 Requires<[hasPTX<60>, hasSM<30>]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124 "barrier.sync \t$id, $cnt;",
125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126 Requires<[hasPTX<60>, hasSM<30>]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128 "barrier.sync \t$id, $cnt;",
129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130 Requires<[hasPTX<60>, hasSM<30>]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132 "barrier.sync \t$id, $cnt;",
133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134 Requires<[hasPTX<60>, hasSM<30>]>;
135 class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
136 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
137 NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
140 def barrier_cluster_arrive:
141 INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
142 def barrier_cluster_arrive_relaxed:
143 INT_BARRIER_CLUSTER<"arrive.relaxed",
144 int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
145 def barrier_cluster_wait:
146 INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
148 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
149 bit offset_imm, bit mask_imm, bit threadmask_imm>
150 : NVPTXInst<(outs), (ins), "?", []> {
151 NVPTXRegClass rc = !cond(
152 !eq(reg, "i32"): Int32Regs,
153 !eq(reg, "f32"): Float32Regs);
154 string IntrName = "int_nvvm_shfl_"
155 # !if(sync, "sync_", "")
158 # !if(return_pred, "p", "");
159 Intrinsic Intr = !cast<Intrinsic>(IntrName);
160 let InOperandList = !con(
162 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
165 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
166 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
168 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
169 let AsmString = "shfl."
170 # !if(sync, "sync.", "")
173 # !if(return_pred, "|$pred", "") # ", "
174 # "$src, $offset, $mask"
175 # !if(sync, ", $threadmask", "")
179 !foreach(tmp, OutOperandList,
181 !subst(i32imm, imm, tmp))),
182 (set !foreach(tmp, InOperandList,
184 !subst(i32imm, imm, tmp))))
188 foreach sync = [false, true] in {
189 foreach mode = ["up", "down", "bfly", "idx"] in {
190 foreach regclass = ["i32", "f32"] in {
191 foreach return_pred = [false, true] in {
192 foreach offset_imm = [false, true] in {
193 foreach mask_imm = [false, true] in {
194 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
195 def : SHFL_INSTR<sync, mode, regclass, return_pred,
196 offset_imm, mask_imm, threadmask_imm>,
197 Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
206 // vote.{all,any,uni,ballot}
207 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
208 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
209 "vote." # mode # " \t$dest, $pred;",
210 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
211 Requires<[hasPTX<60>, hasSM<30>]>;
214 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
215 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
216 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
217 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
219 // vote.sync.{all,any,uni,ballot}
220 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
221 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
222 "vote.sync." # mode # " \t$dest, $pred, $mask;",
223 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
224 Requires<[hasPTX<60>, hasSM<30>]>;
225 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
226 "vote.sync." # mode #" \t$dest, $pred, $mask;",
227 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
228 Requires<[hasPTX<60>, hasSM<30>]>;
231 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
232 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
233 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
234 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
236 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
238 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240 [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
241 Requires<[hasPTX<60>, hasSM<70>]>;
242 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
243 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
244 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
245 Requires<[hasPTX<60>, hasSM<70>]>;
246 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
247 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
248 [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
249 Requires<[hasPTX<60>, hasSM<70>]>;
250 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
251 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
252 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
253 Requires<[hasPTX<60>, hasSM<70>]>;
256 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
258 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
261 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
263 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
264 (ins i32imm:$mask, ImmOp:$value),
265 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
266 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
267 Requires<[hasPTX<60>, hasSM<70>]>;
268 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
269 (ins Int32Regs:$mask, ImmOp:$value),
270 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
271 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
272 Requires<[hasPTX<60>, hasSM<70>]>;
273 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
274 (ins i32imm:$mask, regclass:$value),
275 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
276 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
277 Requires<[hasPTX<60>, hasSM<70>]>;
278 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
279 (ins Int32Regs:$mask, regclass:$value),
280 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
281 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
282 Requires<[hasPTX<60>, hasSM<70>]>;
284 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
286 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
289 multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
290 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
291 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
292 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
293 Requires<[hasPTX<70>, hasSM<80>]>;
296 defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
297 defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
298 defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
299 defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
300 defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
301 defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
302 defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
303 defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
305 } // isConvergent = true
307 //-----------------------------------
308 // Explicit Memory Fence Functions
309 //-----------------------------------
310 class MEMBAR<string StrOp, Intrinsic IntOP> :
311 NVPTXInst<(outs), (ins),
314 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
315 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
316 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
318 def INT_FENCE_SC_CLUSTER:
319 MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
320 Requires<[hasPTX<78>, hasSM<90>]>;
322 //-----------------------------------
323 // Async Copy Functions
324 //-----------------------------------
326 multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
327 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
328 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
329 [(Intrin Int32Regs:$addr)]>,
330 Requires<[hasPTX<70>, hasSM<80>]>;
331 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
332 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
333 [(Intrin Int64Regs:$addr)]>,
334 Requires<[hasPTX<70>, hasSM<80>]>;
337 defm CP_ASYNC_MBARRIER_ARRIVE :
338 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
339 defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
340 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
341 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
342 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
343 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
344 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
346 multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
347 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
348 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
349 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
350 Requires<[hasPTX<70>, hasSM<80>]>;
351 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
352 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
353 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
354 Requires<[hasPTX<70>, hasSM<80>]>;
355 // Variant with src_size parameter
356 def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
357 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
358 [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
359 Requires<[hasPTX<70>, hasSM<80>]>;
360 def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
361 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
362 [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
363 Requires<[hasPTX<70>, hasSM<80>]>;
364 def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
365 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
366 [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
367 Requires<[hasPTX<70>, hasSM<80>]>;
368 def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
369 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
370 [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
371 Requires<[hasPTX<70>, hasSM<80>]>;
374 defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
375 CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
376 int_nvvm_cp_async_ca_shared_global_4_s>;
378 defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
379 CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
380 int_nvvm_cp_async_ca_shared_global_8_s>;
382 defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
383 CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
384 int_nvvm_cp_async_ca_shared_global_16_s>;
386 defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
387 CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
388 int_nvvm_cp_async_cg_shared_global_16_s>;
390 def CP_ASYNC_COMMIT_GROUP :
391 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
392 Requires<[hasPTX<70>, hasSM<80>]>;
394 def CP_ASYNC_WAIT_GROUP :
395 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
396 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
397 Requires<[hasPTX<70>, hasSM<80>]>;
399 def CP_ASYNC_WAIT_ALL :
400 NVPTXInst<(outs), (ins), "cp.async.wait_all;",
401 [(int_nvvm_cp_async_wait_all)]>,
402 Requires<[hasPTX<70>, hasSM<80>]>;
404 //-----------------------------------
405 // MBarrier Functions
406 //-----------------------------------
408 multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
409 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
410 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
411 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
412 Requires<[hasPTX<70>, hasSM<80>]>;
413 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
414 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
415 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
416 Requires<[hasPTX<70>, hasSM<80>]>;
419 defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
420 defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
421 int_nvvm_mbarrier_init_shared>;
423 multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
424 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
425 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
426 [(Intrin Int32Regs:$addr)]>,
427 Requires<[hasPTX<70>, hasSM<80>]>;
428 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
429 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
430 [(Intrin Int64Regs:$addr)]>,
431 Requires<[hasPTX<70>, hasSM<80>]>;
434 defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
435 defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
436 int_nvvm_mbarrier_inval_shared>;
438 multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
439 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
440 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
441 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
442 Requires<[hasPTX<70>, hasSM<80>]>;
443 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
444 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
445 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
446 Requires<[hasPTX<70>, hasSM<80>]>;
449 defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
450 defm MBARRIER_ARRIVE_SHARED :
451 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
453 multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
454 def _32 : NVPTXInst<(outs Int64Regs:$state),
455 (ins Int32Regs:$addr, Int32Regs:$count),
456 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
457 ".b64 $state, [$addr], $count;"),
458 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
459 Requires<[hasPTX<70>, hasSM<80>]>;
460 def _64 : NVPTXInst<(outs Int64Regs:$state),
461 (ins Int64Regs:$addr, Int32Regs:$count),
462 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
463 ".b64 $state, [$addr], $count;"),
464 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
465 Requires<[hasPTX<70>, hasSM<80>]>;
468 defm MBARRIER_ARRIVE_NOCOMPLETE :
469 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
470 defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
471 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
473 multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
474 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
475 !strconcat("mbarrier.arrive_drop", AddrSpace,
476 ".b64 $state, [$addr];"),
477 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
478 Requires<[hasPTX<70>, hasSM<80>]>;
479 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
480 !strconcat("mbarrier.arrive_drop", AddrSpace,
481 ".b64 $state, [$addr];"),
482 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
483 Requires<[hasPTX<70>, hasSM<80>]>;
486 defm MBARRIER_ARRIVE_DROP :
487 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
488 defm MBARRIER_ARRIVE_DROP_SHARED :
489 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
491 multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
492 def _32 : NVPTXInst<(outs Int64Regs:$state),
493 (ins Int32Regs:$addr, Int32Regs:$count),
494 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
495 ".b64 $state, [$addr], $count;"),
496 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
497 Requires<[hasPTX<70>, hasSM<80>]>;
498 def _64 : NVPTXInst<(outs Int64Regs:$state),
499 (ins Int64Regs:$addr, Int32Regs:$count),
500 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
501 ".b64 $state, [$addr], $count;"),
502 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
503 Requires<[hasPTX<70>, hasSM<80>]>;
506 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
507 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
508 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
509 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
510 int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
512 multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
513 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
514 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
515 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
516 Requires<[hasPTX<70>, hasSM<80>]>;
517 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
518 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
519 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
520 Requires<[hasPTX<70>, hasSM<80>]>;
523 defm MBARRIER_TEST_WAIT :
524 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
525 defm MBARRIER_TEST_WAIT_SHARED :
526 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
528 class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
529 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
530 "mbarrier.pending_count.b64 $res, $state;",
531 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
532 Requires<[hasPTX<70>, hasSM<80>]>;
534 def MBARRIER_PENDING_COUNT :
535 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
537 //-----------------------------------
539 //-----------------------------------
541 // Map min(1.0, max(0.0, x)) to sat(x)
542 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
544 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
545 // Same story for fmax, fmin.
547 def : Pat<(int_nvvm_fmin_f immFloat1,
548 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
549 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
550 def : Pat<(int_nvvm_fmin_f immFloat1,
551 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
552 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
553 def : Pat<(int_nvvm_fmin_f
554 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
555 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
556 def : Pat<(int_nvvm_fmin_f
557 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
558 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
560 def : Pat<(int_nvvm_fmin_d immDouble1,
561 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
562 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
563 def : Pat<(int_nvvm_fmin_d immDouble1,
564 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
565 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
566 def : Pat<(int_nvvm_fmin_d
567 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
568 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
569 def : Pat<(int_nvvm_fmin_d
570 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
571 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
574 // We need a full string for OpcStr here because we need to deal with case like
576 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
577 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
578 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
580 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
583 // We need a full string for OpcStr here because we need to deal with the case
584 // like INT_PTX_NATIVE_POWR_F.
585 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
586 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
587 list<Predicate> Preds = []>
588 : NVPTXInst<(outs t_regclass:$dst),
589 (ins s0_regclass:$src0, s1_regclass:$src1),
591 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
594 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
595 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
596 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
597 : NVPTXInst<(outs t_regclass:$dst),
598 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
600 [(set t_regclass:$dst,
601 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
608 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
609 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
615 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
616 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
617 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
618 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
619 def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
620 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
621 [hasPTX<70>, hasSM<80>]>;
622 def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
624 [hasPTX<70>, hasSM<80>]>;
625 def INT_NVVM_FMIN_XORSIGN_ABS_F :
626 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
628 [hasPTX<72>, hasSM<86>]>;
629 def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
630 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
632 [hasPTX<72>, hasSM<86>]>;
633 def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
634 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
635 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
636 [hasPTX<72>, hasSM<86>]>;
637 def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
638 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
639 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
640 [hasPTX<72>, hasSM<86>]>;
642 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
643 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
644 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
645 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
646 def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
647 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
648 [hasPTX<70>, hasSM<80>]>;
649 def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
650 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
651 [hasPTX<70>, hasSM<80>]>;
652 def INT_NVVM_FMAX_XORSIGN_ABS_F :
653 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
654 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
655 [hasPTX<72>, hasSM<86>]>;
656 def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
657 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
659 [hasPTX<72>, hasSM<86>]>;
660 def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
661 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
663 [hasPTX<72>, hasSM<86>]>;
664 def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
665 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
667 [hasPTX<72>, hasSM<86>]>;
669 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
670 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
671 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
672 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
675 // Min Max f16, f16x2, bf16, bf16x2
678 class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
679 list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
682 NVPTXRegClass RegClass = RC;
683 list<Predicate> Predicates = Preds;
686 multiclass MIN_MAX<string IntName> {
688 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
689 int_nvvm_fmax_f16), Int16Regs>,
690 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
691 int_nvvm_fmax_ftz_f16), Int16Regs>,
692 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
693 int_nvvm_fmax_nan_f16), Int16Regs>,
694 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
695 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
696 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
697 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
698 Int16Regs, [hasPTX<72>, hasSM<86>]>,
699 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
700 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
701 Int16Regs, [hasPTX<72>, hasSM<86>]>,
702 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
703 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
704 Int16Regs, [hasPTX<72>, hasSM<86>]>,
705 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
706 int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
707 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
708 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
709 int_nvvm_fmax_f16x2), Int32Regs>,
710 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
711 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
712 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
713 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
714 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
715 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
716 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
717 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
718 Int32Regs, [hasPTX<72>, hasSM<86>]>,
719 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
720 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
721 Int32Regs, [hasPTX<72>, hasSM<86>]>,
722 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
723 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
724 Int32Regs, [hasPTX<72>, hasSM<86>]>,
725 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
726 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
727 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
728 Int32Regs, [hasPTX<72>, hasSM<86>]>,
729 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
730 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
731 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
732 int_nvvm_fmax_nan_bf16), Int16Regs>,
733 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
734 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
735 Int16Regs, [hasPTX<72>, hasSM<86>]>,
736 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
737 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
738 Int16Regs, [hasPTX<72>, hasSM<86>]>,
739 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
740 int_nvvm_fmax_bf16x2), Int32Regs>,
741 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
742 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
743 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
744 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
745 Int32Regs, [hasPTX<72>, hasSM<86>]>,
746 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
747 int_nvvm_fmin_nan_xorsign_abs_bf16x2,
748 int_nvvm_fmax_nan_xorsign_abs_bf16x2),
749 Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
750 def P.Variant : F_MATH_2<!strconcat(
751 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
752 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
756 defm INT_NVVM_FMIN : MIN_MAX<"min">;
757 defm INT_NVVM_FMAN : MIN_MAX<"max">;
763 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
764 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
765 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
766 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
768 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
769 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
770 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
771 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
773 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
774 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
775 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
776 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
777 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
778 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
779 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
780 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
781 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
782 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
783 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
784 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
785 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
786 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
787 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
788 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
790 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
791 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
792 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
793 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
794 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
795 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
796 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
797 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
799 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
800 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
801 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
802 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
808 def INT_NVVM_DIV_APPROX_FTZ_F
809 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
810 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
811 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
812 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
814 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
815 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
816 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
817 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
818 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
819 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
820 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
821 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
822 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
823 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
824 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
825 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
826 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
827 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
828 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
829 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
831 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
832 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
833 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
834 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
835 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
836 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
837 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
838 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
844 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
845 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
846 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
847 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
853 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
854 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
855 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
856 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
857 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
858 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
860 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
861 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
862 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
863 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
864 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
865 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
871 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
872 Float32Regs, int_nvvm_fabs_ftz_f>;
873 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
874 Float32Regs, int_nvvm_fabs_f>;
876 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
877 Float64Regs, int_nvvm_fabs_d>;
880 // Abs, Neg bf16, bf16x2
883 def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
884 Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
885 def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
886 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
887 def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
888 Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
889 def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
890 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
896 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
897 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
898 def : Pat<(int_nvvm_round_f Float32Regs:$a),
899 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
900 def : Pat<(int_nvvm_round_d Float64Regs:$a),
901 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
907 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
908 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
909 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
910 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
911 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
912 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
918 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
919 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
920 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
921 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
922 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
923 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
929 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
930 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
931 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
932 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
933 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
934 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
935 def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
936 Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
937 def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
938 Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
940 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
941 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
942 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
943 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
944 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
945 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
951 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
952 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
953 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
954 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
956 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
957 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
958 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
959 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
965 class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
966 list<Predicate> Preds = []> {
969 NVPTXRegClass RegClass = RC;
970 list<Predicate> Predicates = Preds;
973 multiclass FMA_INST {
975 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
976 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
977 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
978 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
980 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
981 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
982 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
983 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
984 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
985 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
986 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
987 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
989 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
990 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
991 [hasPTX<42>, hasSM<53>]>,
992 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
993 [hasPTX<42>, hasSM<53>]>,
994 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
995 [hasPTX<42>, hasSM<53>]>,
996 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
997 [hasPTX<70>, hasSM<80>]>,
998 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
999 [hasPTX<70>, hasSM<80>]>,
1001 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1002 FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1003 [hasPTX<70>, hasSM<80>]>,
1004 FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1005 [hasPTX<70>, hasSM<80>]>,
1006 FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1007 [hasPTX<70>, hasSM<80>]>,
1008 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1009 [hasPTX<70>, hasSM<80>]>,
1010 FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1011 [hasPTX<70>, hasSM<80>]>,
1013 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1014 [hasPTX<42>, hasSM<53>]>,
1015 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1016 [hasPTX<42>, hasSM<53>]>,
1017 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1018 [hasPTX<42>, hasSM<53>]>,
1019 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1020 Int32Regs, [hasPTX<42>, hasSM<53>]>,
1021 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1022 [hasPTX<70>, hasSM<80>]>,
1023 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1024 Int32Regs, [hasPTX<70>, hasSM<80>]>,
1025 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1026 [hasPTX<70>, hasSM<80>]>,
1027 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1028 [hasPTX<70>, hasSM<80>]>
1031 F_MATH_3<!strconcat("fma",
1032 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1033 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1037 defm INT_NVVM_FMA : FMA_INST;
1043 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1044 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1045 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1046 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1047 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1048 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1049 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1050 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1051 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1052 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1053 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1054 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1055 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1056 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1057 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1058 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1060 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1061 Float64Regs, int_nvvm_rcp_rn_d>;
1062 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1063 Float64Regs, int_nvvm_rcp_rz_d>;
1064 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1065 Float64Regs, int_nvvm_rcp_rm_d>;
1066 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1067 Float64Regs, int_nvvm_rcp_rp_d>;
1069 def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1070 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1071 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1072 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1078 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1079 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1080 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1081 Float32Regs, int_nvvm_sqrt_rn_f>;
1082 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1083 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1084 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1085 Float32Regs, int_nvvm_sqrt_rz_f>;
1086 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1087 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1088 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1089 Float32Regs, int_nvvm_sqrt_rm_f>;
1090 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1091 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1092 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1093 Float32Regs, int_nvvm_sqrt_rp_f>;
1094 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1095 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1096 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1097 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1099 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1100 Float64Regs, int_nvvm_sqrt_rn_d>;
1101 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1102 Float64Regs, int_nvvm_sqrt_rz_d>;
1103 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1104 Float64Regs, int_nvvm_sqrt_rm_d>;
1105 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1106 Float64Regs, int_nvvm_sqrt_rp_d>;
1108 // nvvm_sqrt intrinsic
1109 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1110 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1111 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1112 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1113 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1114 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1115 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1116 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1122 def INT_NVVM_RSQRT_APPROX_FTZ_F
1123 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1124 int_nvvm_rsqrt_approx_ftz_f>;
1125 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1126 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1127 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1128 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1134 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1135 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1136 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1137 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1138 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1139 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1140 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1141 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1142 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1143 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1144 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1145 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1146 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1147 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1148 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1149 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1151 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1152 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1153 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1154 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1155 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1156 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1157 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1158 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1164 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1165 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1166 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1167 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1168 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1169 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1170 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1171 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1172 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1173 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1174 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1175 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1176 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1177 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1178 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1179 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1181 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1182 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1183 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1184 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1185 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1186 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1187 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1188 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1190 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1191 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1192 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1193 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1194 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1195 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1196 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1197 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1199 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1200 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1201 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1202 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1203 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1204 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1205 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1206 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1208 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1209 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1210 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1211 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1212 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1213 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1214 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1215 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1217 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1218 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1219 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1220 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1221 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1222 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1223 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1224 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1225 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1226 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1227 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1228 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1229 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1230 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1231 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1232 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1234 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1235 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1236 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1237 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1238 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1239 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1240 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1241 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1242 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1243 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1244 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1245 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1246 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1247 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1248 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1249 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1251 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1252 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1253 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1254 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1255 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1256 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1257 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1258 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1260 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1261 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1262 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1263 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1264 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1265 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1266 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1267 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1269 def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1270 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1271 def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1272 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1273 def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1274 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1275 def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1276 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1278 def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1279 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1280 def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1281 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1282 def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1283 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1284 def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1285 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1287 def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1288 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1289 def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1290 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1291 def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1292 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1293 def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1294 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1297 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1298 "cvt.rna.tf32.f32 \t$dest, $a;",
1299 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1301 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1302 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1304 def INT_NVVM_D2I_LO : F_MATH_1<
1305 !strconcat("{{\n\t",
1306 ".reg .b32 %temp; \n\t",
1307 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1309 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1310 def INT_NVVM_D2I_HI : F_MATH_1<
1311 !strconcat("{{\n\t",
1312 ".reg .b32 %temp; \n\t",
1313 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1315 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1317 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1318 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1319 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1320 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1321 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1322 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1323 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1324 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1325 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1326 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1327 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1328 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1329 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1330 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1331 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1332 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1334 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1335 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1336 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1337 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1338 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1339 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1340 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1341 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1342 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1343 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1344 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1345 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1346 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1347 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1348 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1349 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1351 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1352 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1353 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1354 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1355 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1356 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1357 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1358 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1360 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1361 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1362 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1363 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1364 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1365 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1366 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1367 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1369 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1370 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1371 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1372 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1373 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1374 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1375 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1376 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1378 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1379 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1380 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1381 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1382 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1383 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1384 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1385 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1387 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1388 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1389 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1390 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1391 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1392 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1393 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1394 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1396 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1397 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1398 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1399 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1400 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1401 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1402 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1403 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1406 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1407 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1408 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1409 (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1415 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1416 Float32Regs, int_nvvm_bitcast_f2i>;
1417 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1418 Int32Regs, int_nvvm_bitcast_i2f>;
1420 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1421 Int64Regs, int_nvvm_bitcast_ll2d>;
1422 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1423 Float64Regs, int_nvvm_bitcast_d2ll>;
1429 class INT_FNS_MBO<dag ins, dag Operands>
1430 : NVPTXInst<(outs Int32Regs:$dst), ins,
1431 "fns.b32 \t$dst, $mask, $base, $offset;",
1432 [(set Int32Regs:$dst, Operands )]>,
1433 Requires<[hasPTX<60>, hasSM<30>]>;
1435 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1436 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1437 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
1438 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
1439 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
1440 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
1441 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
1442 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
1443 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1444 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1445 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
1446 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
1447 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
1448 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
1449 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
1450 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
1452 //-----------------------------------
1454 //-----------------------------------
1456 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1457 : PatFrag<ops, frag, AS_match.global>;
1458 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1459 : PatFrag<ops, frag, AS_match.shared>;
1460 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1461 : PatFrag<ops, frag, AS_match.generic>;
1463 multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1464 ValueType regT, NVPTXRegClass regclass,
1465 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1466 Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1467 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1468 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1469 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1471 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1472 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1473 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1476 multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1477 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1478 list<Predicate> Pred = []> {
1479 defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1480 IntOp, IMMType, IMM, Pred>;
1481 defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1482 IntOp, IMMType, IMM, Pred>;
1485 // has 2 operands, neg the second one
1486 multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1487 ValueType regT, NVPTXRegClass regclass,
1488 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1489 list<Predicate> Pred> {
1490 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1493 ".reg \t.s", TypeStr, " temp; \n\t",
1494 "neg.s", TypeStr, " \ttemp, $b; \n\t",
1495 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1497 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1500 multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1501 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1502 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1504 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1509 multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1510 ValueType regT, NVPTXRegClass regclass,
1511 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1512 Operand IMMType, list<Predicate> Pred> {
1513 def reg : NVPTXInst<(outs regclass:$dst),
1514 (ins ptrclass:$addr, regclass:$b, regclass:$c),
1515 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1516 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1519 def imm1 : NVPTXInst<(outs regclass:$dst),
1520 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1521 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1522 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1525 def imm2 : NVPTXInst<(outs regclass:$dst),
1526 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1527 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1528 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1531 def imm3 : NVPTXInst<(outs regclass:$dst),
1532 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1533 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1534 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1537 multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1538 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1539 defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1540 IntOp, IMMType, Pred>;
1541 defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1542 IntOp, IMMType, Pred>;
1547 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1548 (atomic_load_add_32 node:$a, node:$b)>;
1549 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1550 (atomic_load_add_32 node:$a, node:$b)>;
1551 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1552 (atomic_load_add_32 node:$a, node:$b)>;
1553 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1554 (atomic_load_add_64 node:$a, node:$b)>;
1555 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1556 (atomic_load_add_64 node:$a, node:$b)>;
1557 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1558 (atomic_load_add_64 node:$a, node:$b)>;
1559 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1560 (atomic_load_fadd node:$a, node:$b)>;
1561 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1562 (atomic_load_fadd node:$a, node:$b)>;
1563 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1564 (atomic_load_fadd node:$a, node:$b)>;
1566 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1567 atomic_load_add_32_g, i32imm, imm>;
1568 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1569 atomic_load_add_32_s, i32imm, imm>;
1570 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1571 atomic_load_add_32_gen, i32imm, imm>;
1572 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1573 ".add", atomic_load_add_32_gen, i32imm, imm>;
1575 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1576 atomic_load_add_64_g, i64imm, imm>;
1577 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1578 atomic_load_add_64_s, i64imm, imm>;
1579 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1580 atomic_load_add_64_gen, i64imm, imm>;
1581 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1582 ".add", atomic_load_add_64_gen, i64imm, imm>;
1584 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1585 atomic_load_add_g, f32imm, fpimm>;
1586 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1587 atomic_load_add_s, f32imm, fpimm>;
1588 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1589 atomic_load_add_gen, f32imm, fpimm>;
1591 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1592 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1593 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1594 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1595 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1596 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1600 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1601 (atomic_load_sub_32 node:$a, node:$b)>;
1602 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1603 (atomic_load_sub_32 node:$a, node:$b)>;
1604 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1605 (atomic_load_sub_32 node:$a, node:$b)>;
1606 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1607 (atomic_load_sub_64 node:$a, node:$b)>;
1608 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1609 (atomic_load_sub_64 node:$a, node:$b)>;
1610 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1611 (atomic_load_sub_64 node:$a, node:$b)>;
1613 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1614 atomic_load_sub_32_g>;
1615 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1616 atomic_load_sub_64_g>;
1617 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1618 atomic_load_sub_32_gen>;
1619 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1620 ".add", atomic_load_sub_32_gen>;
1621 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1622 atomic_load_sub_32_s>;
1623 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1624 atomic_load_sub_64_s>;
1625 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1626 atomic_load_sub_64_gen>;
1627 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1628 ".add", atomic_load_sub_64_gen>;
1632 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1633 (atomic_swap_32 node:$a, node:$b)>;
1634 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1635 (atomic_swap_32 node:$a, node:$b)>;
1636 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1637 (atomic_swap_32 node:$a, node:$b)>;
1638 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1639 (atomic_swap_64 node:$a, node:$b)>;
1640 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1641 (atomic_swap_64 node:$a, node:$b)>;
1642 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1643 (atomic_swap_64 node:$a, node:$b)>;
1645 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1646 atomic_swap_32_g, i32imm, imm>;
1647 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1648 atomic_swap_32_s, i32imm, imm>;
1649 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1650 atomic_swap_32_gen, i32imm, imm>;
1651 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1652 ".exch", atomic_swap_32_gen, i32imm, imm>;
1653 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1654 atomic_swap_64_g, i64imm, imm>;
1655 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1656 atomic_swap_64_s, i64imm, imm>;
1657 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1658 atomic_swap_64_gen, i64imm, imm>;
1659 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1660 ".exch", atomic_swap_64_gen, i64imm, imm>;
1664 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1665 , (atomic_load_max_32 node:$a, node:$b)>;
1666 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1667 (atomic_load_max_32 node:$a, node:$b)>;
1668 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1669 (atomic_load_max_32 node:$a, node:$b)>;
1670 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1671 , (atomic_load_max_64 node:$a, node:$b)>;
1672 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1673 (atomic_load_max_64 node:$a, node:$b)>;
1674 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1675 (atomic_load_max_64 node:$a, node:$b)>;
1676 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1677 (atomic_load_umax_32 node:$a, node:$b)>;
1678 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1679 (atomic_load_umax_32 node:$a, node:$b)>;
1680 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1681 (atomic_load_umax_32 node:$a, node:$b)>;
1682 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1683 (atomic_load_umax_64 node:$a, node:$b)>;
1684 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1685 (atomic_load_umax_64 node:$a, node:$b)>;
1686 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1687 (atomic_load_umax_64 node:$a, node:$b)>;
1689 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1690 ".max", atomic_load_max_32_g, i32imm, imm>;
1691 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1692 ".max", atomic_load_max_32_s, i32imm, imm>;
1693 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1694 atomic_load_max_32_gen, i32imm, imm>;
1695 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1696 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1697 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1698 ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
1699 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1700 ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
1701 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1702 atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1703 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1704 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1705 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1706 ".max", atomic_load_umax_32_g, i32imm, imm>;
1707 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1708 ".max", atomic_load_umax_32_s, i32imm, imm>;
1709 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1710 atomic_load_umax_32_gen, i32imm, imm>;
1711 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1712 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1713 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1714 ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
1715 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1716 ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
1717 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1718 atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1719 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1720 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1724 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1725 (atomic_load_min_32 node:$a, node:$b)>;
1726 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1727 (atomic_load_min_32 node:$a, node:$b)>;
1728 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1729 (atomic_load_min_32 node:$a, node:$b)>;
1730 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1731 (atomic_load_min_64 node:$a, node:$b)>;
1732 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1733 (atomic_load_min_64 node:$a, node:$b)>;
1734 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1735 (atomic_load_min_64 node:$a, node:$b)>;
1736 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1737 (atomic_load_umin_32 node:$a, node:$b)>;
1738 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1739 (atomic_load_umin_32 node:$a, node:$b)>;
1740 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1741 (atomic_load_umin_32 node:$a, node:$b)>;
1742 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1743 (atomic_load_umin_64 node:$a, node:$b)>;
1744 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1745 (atomic_load_umin_64 node:$a, node:$b)>;
1746 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1747 (atomic_load_umin_64 node:$a, node:$b)>;
1749 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1750 ".min", atomic_load_min_32_g, i32imm, imm>;
1751 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1752 ".min", atomic_load_min_32_s, i32imm, imm>;
1753 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1754 atomic_load_min_32_gen, i32imm, imm>;
1755 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1756 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1757 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1758 ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
1759 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1760 ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
1761 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1762 atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1763 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1764 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1765 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1766 ".min", atomic_load_umin_32_g, i32imm, imm>;
1767 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1768 ".min", atomic_load_umin_32_s, i32imm, imm>;
1769 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1770 atomic_load_umin_32_gen, i32imm, imm>;
1771 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1772 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1773 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1774 ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
1775 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1776 ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
1777 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1778 atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1779 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1780 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1782 // atom_inc atom_dec
1784 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1785 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1786 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1787 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1788 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1789 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1790 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1791 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1792 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1793 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1794 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1795 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1797 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1798 atomic_load_inc_32_g, i32imm, imm>;
1799 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1800 atomic_load_inc_32_s, i32imm, imm>;
1801 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1802 atomic_load_inc_32_gen, i32imm, imm>;
1803 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1804 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1805 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1806 atomic_load_dec_32_g, i32imm, imm>;
1807 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1808 atomic_load_dec_32_s, i32imm, imm>;
1809 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1810 atomic_load_dec_32_gen, i32imm, imm>;
1811 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1812 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1816 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1817 (atomic_load_and_32 node:$a, node:$b)>;
1818 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1819 (atomic_load_and_32 node:$a, node:$b)>;
1820 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1821 (atomic_load_and_32 node:$a, node:$b)>;
1822 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1823 (atomic_load_and_64 node:$a, node:$b)>;
1824 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1825 (atomic_load_and_64 node:$a, node:$b)>;
1826 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1827 (atomic_load_and_64 node:$a, node:$b)>;
1829 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1830 atomic_load_and_32_g, i32imm, imm>;
1831 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1832 atomic_load_and_32_s, i32imm, imm>;
1833 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1834 atomic_load_and_32_gen, i32imm, imm>;
1835 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1836 ".and", atomic_load_and_32_gen, i32imm, imm>;
1837 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1838 atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
1839 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1840 atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
1841 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1842 atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1843 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1844 ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1848 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1849 (atomic_load_or_32 node:$a, node:$b)>;
1850 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1851 (atomic_load_or_32 node:$a, node:$b)>;
1852 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1853 (atomic_load_or_32 node:$a, node:$b)>;
1854 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1855 (atomic_load_or_64 node:$a, node:$b)>;
1856 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1857 (atomic_load_or_64 node:$a, node:$b)>;
1858 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1859 (atomic_load_or_64 node:$a, node:$b)>;
1861 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1862 atomic_load_or_32_g, i32imm, imm>;
1863 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1864 atomic_load_or_32_gen, i32imm, imm>;
1865 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1866 ".or", atomic_load_or_32_gen, i32imm, imm>;
1867 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1868 atomic_load_or_32_s, i32imm, imm>;
1869 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1870 atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
1871 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1872 atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1873 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1874 ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1875 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1876 atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
1880 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1881 (atomic_load_xor_32 node:$a, node:$b)>;
1882 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1883 (atomic_load_xor_32 node:$a, node:$b)>;
1884 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1885 (atomic_load_xor_32 node:$a, node:$b)>;
1886 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1887 (atomic_load_xor_64 node:$a, node:$b)>;
1888 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1889 (atomic_load_xor_64 node:$a, node:$b)>;
1890 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1891 (atomic_load_xor_64 node:$a, node:$b)>;
1893 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1894 atomic_load_xor_32_g, i32imm, imm>;
1895 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1896 atomic_load_xor_32_s, i32imm, imm>;
1897 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1898 atomic_load_xor_32_gen, i32imm, imm>;
1899 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1900 ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1901 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1902 atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
1903 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1904 atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
1905 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1906 atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1907 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1908 ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1912 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1913 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1914 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1915 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1916 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1917 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1918 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1919 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1920 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1921 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1922 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1923 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1925 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
1926 atomic_cmp_swap_32_g, i32imm>;
1927 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
1928 atomic_cmp_swap_32_s, i32imm>;
1929 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
1930 atomic_cmp_swap_32_gen, i32imm>;
1931 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
1932 ".cas", atomic_cmp_swap_32_gen, i32imm>;
1933 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
1934 atomic_cmp_swap_64_g, i64imm>;
1935 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
1936 atomic_cmp_swap_64_s, i64imm>;
1937 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
1938 atomic_cmp_swap_64_gen, i64imm>;
1939 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
1940 ".cas", atomic_cmp_swap_64_gen, i64imm>;
1942 // Support for scoped atomic operations. Matches
1943 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1944 // and converts it into the appropriate instruction.
1945 // NOTE: not all possible combinations are implemented
1946 // 'space' is limited to generic as it's the only one needed to support CUDA.
1947 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1948 class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
1949 dag ins, dag Operands>
1950 : NVPTXInst<(outs regclass:$result), ins,
1952 [(set (regT regclass:$result), Operands)]>,
1955 // Define instruction variants for all addressing modes.
1956 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1957 ValueType regT, NVPTXRegClass regclass, Operand ImmType,
1958 SDNode Imm, ValueType ImmTy,
1959 list<Predicate> Preds> {
1960 let AddedComplexity = 1 in {
1961 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1962 (ins Int32Regs:$src, regclass:$b),
1963 (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
1964 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1965 (ins Int64Regs:$src, regclass:$b),
1966 (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
1968 // tablegen can't infer argument types from Intrinsic (though it can
1969 // from Instruction) so we have to enforce specific type on
1970 // immediates via explicit cast to ImmTy.
1971 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1972 (ins Int32Regs:$src, ImmType:$b),
1973 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
1974 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1975 (ins Int64Regs:$src, ImmType:$b),
1976 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
1979 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1980 ValueType regT, NVPTXRegClass regclass,
1981 Operand ImmType, SDNode Imm, ValueType ImmTy,
1982 list<Predicate> Preds> {
1983 // Variants for register/immediate permutations of $b and $c
1984 let AddedComplexity = 2 in {
1985 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1986 (ins Int32Regs:$src, regclass:$b, regclass:$c),
1987 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
1988 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1989 (ins Int64Regs:$src, regclass:$b, regclass:$c),
1990 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
1992 let AddedComplexity = 1 in {
1993 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1994 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1995 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
1996 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1997 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1998 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
1999 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2000 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2001 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2002 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2003 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2004 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2006 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2007 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2008 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2009 def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2010 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2011 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2014 // Constructs intrinsic name and instruction asm strings.
2015 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2016 string ScopeStr, string SpaceStr,
2017 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2018 ValueType ImmTy, list<Predicate> Preds> {
2019 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2020 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2021 # "." # OpStr # "." # TypeStr
2022 # " \t$result, [$src], $b;",
2024 "int_nvvm_atomic_" # OpStr
2025 # "_" # SpaceStr # "_" # IntTypeStr
2026 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2027 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2029 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2030 string ScopeStr, string SpaceStr,
2031 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2032 ValueType ImmTy, list<Predicate> Preds> {
2033 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2034 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2035 # "." # OpStr # "." # TypeStr
2036 # " \t$result, [$src], $b, $c;",
2038 "int_nvvm_atomic_" # OpStr
2039 # "_" # SpaceStr # "_" # IntTypeStr
2040 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2041 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2044 // Constructs variants for different address spaces.
2045 // For now we only need variants for generic space pointers.
2046 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2047 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2048 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2049 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2050 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2052 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2053 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2054 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2055 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2056 regT, regclass, ImmType, Imm, ImmTy, Preds>;
2059 // Constructs variants for different scopes of atomic op.
2060 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2061 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2062 ValueType ImmTy, list<Predicate> Preds> {
2063 // .gpu scope is default and is currently covered by existing
2064 // atomics w/o explicitly specified scope.
2065 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2066 regT, regclass, ImmType, Imm, ImmTy,
2067 !listconcat(Preds,[hasAtomScope])>;
2068 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2069 regT, regclass, ImmType, Imm, ImmTy,
2070 !listconcat(Preds,[hasAtomScope])>;
2072 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2073 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2074 list<Predicate> Preds> {
2075 // No need to define ".gpu"-scoped atomics. They do the same thing
2076 // as the regular, non-scoped atomics defined elsewhere.
2077 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2078 regT, regclass, ImmType, Imm, ImmTy,
2079 !listconcat(Preds,[hasAtomScope])>;
2080 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2081 regT, regclass, ImmType, Imm, ImmTy,
2082 !listconcat(Preds,[hasAtomScope])>;
2086 multiclass ATOM2_add_impl<string OpStr> {
2087 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2088 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2089 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2090 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2092 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2096 // atom.{and,or,xor}
2097 multiclass ATOM2_bitwise_impl<string OpStr> {
2098 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2099 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2100 [hasAtomBitwise64]>;
2104 multiclass ATOM2_exch_impl<string OpStr> {
2105 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2106 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2110 multiclass ATOM2_minmax_impl<string OpStr> {
2111 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2112 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2113 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2115 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2120 multiclass ATOM2_incdec_impl<string OpStr> {
2121 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2125 multiclass ATOM3_cas_impl<string OpStr> {
2126 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2127 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2130 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2131 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2132 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2133 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2134 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2135 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2136 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2137 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2138 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
2139 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2141 //-----------------------------------
2142 // Support for ldu on sm_20 or later
2143 //-----------------------------------
2145 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
2146 // read-only in a kernel.
2150 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2151 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2152 !strconcat("ldu.global.", TyStr),
2153 []>, Requires<[hasLDU]>;
2154 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2155 !strconcat("ldu.global.", TyStr),
2156 []>, Requires<[hasLDU]>;
2157 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2158 !strconcat("ldu.global.", TyStr),
2159 []>, Requires<[hasLDU]>;
2160 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2161 !strconcat("ldu.global.", TyStr),
2162 []>, Requires<[hasLDU]>;
2163 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2164 !strconcat("ldu.global.", TyStr),
2165 []>, Requires<[hasLDU]>;
2168 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2169 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2170 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2171 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2172 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2173 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2177 // Elementized vector ldu
2178 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2179 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2180 (ins Int32Regs:$src),
2181 !strconcat("ldu.global.", TyStr), []>;
2182 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2183 (ins Int64Regs:$src),
2184 !strconcat("ldu.global.", TyStr), []>;
2185 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2187 !strconcat("ldu.global.", TyStr), []>;
2188 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2190 !strconcat("ldu.global.", TyStr), []>;
2191 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2193 !strconcat("ldu.global.", TyStr), []>;
2196 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2197 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2198 regclass:$dst4), (ins Int32Regs:$src),
2199 !strconcat("ldu.global.", TyStr), []>;
2200 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2201 regclass:$dst4), (ins Int64Regs:$src),
2202 !strconcat("ldu.global.", TyStr), []>;
2203 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2204 regclass:$dst4), (ins MEMri:$src),
2205 !strconcat("ldu.global.", TyStr), []>;
2206 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2207 regclass:$dst4), (ins MEMri64:$src),
2208 !strconcat("ldu.global.", TyStr), []>;
2209 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2210 regclass:$dst4), (ins imemAny:$src),
2211 !strconcat("ldu.global.", TyStr), []>;
2214 defm INT_PTX_LDU_G_v2i8_ELE
2215 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2216 defm INT_PTX_LDU_G_v2i16_ELE
2217 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2218 defm INT_PTX_LDU_G_v2i32_ELE
2219 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2220 defm INT_PTX_LDU_G_v2f32_ELE
2221 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2222 defm INT_PTX_LDU_G_v2i64_ELE
2223 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2224 defm INT_PTX_LDU_G_v2f64_ELE
2225 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2226 defm INT_PTX_LDU_G_v4i8_ELE
2227 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2228 defm INT_PTX_LDU_G_v4i16_ELE
2229 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2231 defm INT_PTX_LDU_G_v4i32_ELE
2232 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2234 defm INT_PTX_LDU_G_v4f16_ELE
2235 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2237 defm INT_PTX_LDU_G_v4f16x2_ELE
2238 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2240 defm INT_PTX_LDU_G_v4f32_ELE
2241 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2245 //-----------------------------------
2246 // Support for ldg on sm_35 or later
2247 //-----------------------------------
2249 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
2250 // non-coherent texture cache, and therefore the values read must be read-only
2251 // during the lifetime of the kernel.
2253 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2254 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2255 !strconcat("ld.global.nc.", TyStr),
2256 []>, Requires<[hasLDG]>;
2257 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2258 !strconcat("ld.global.nc.", TyStr),
2259 []>, Requires<[hasLDG]>;
2260 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2261 !strconcat("ld.global.nc.", TyStr),
2262 []>, Requires<[hasLDG]>;
2263 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2264 !strconcat("ld.global.nc.", TyStr),
2265 []>, Requires<[hasLDG]>;
2266 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2267 !strconcat("ld.global.nc.", TyStr),
2268 []>, Requires<[hasLDG]>;
2271 defm INT_PTX_LDG_GLOBAL_i8
2272 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2273 defm INT_PTX_LDG_GLOBAL_i16
2274 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2275 defm INT_PTX_LDG_GLOBAL_i32
2276 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2277 defm INT_PTX_LDG_GLOBAL_i64
2278 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2279 defm INT_PTX_LDG_GLOBAL_f32
2280 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2281 defm INT_PTX_LDG_GLOBAL_f64
2282 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2286 // Elementized vector ldg
2287 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2288 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2289 (ins Int32Regs:$src),
2290 !strconcat("ld.global.nc.", TyStr), []>;
2291 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2292 (ins Int64Regs:$src),
2293 !strconcat("ld.global.nc.", TyStr), []>;
2294 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2296 !strconcat("ld.global.nc.", TyStr), []>;
2297 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2299 !strconcat("ld.global.nc.", TyStr), []>;
2300 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2302 !strconcat("ld.global.nc.", TyStr), []>;
2305 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2306 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2307 regclass:$dst4), (ins Int32Regs:$src),
2308 !strconcat("ld.global.nc.", TyStr), []>;
2309 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2310 regclass:$dst4), (ins Int64Regs:$src),
2311 !strconcat("ld.global.nc.", TyStr), []>;
2312 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2313 regclass:$dst4), (ins MEMri:$src),
2314 !strconcat("ld.global.nc.", TyStr), []>;
2315 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2316 regclass:$dst4), (ins MEMri64:$src),
2317 !strconcat("ld.global.nc.", TyStr), []>;
2318 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2319 regclass:$dst4), (ins imemAny:$src),
2320 !strconcat("ld.global.nc.", TyStr), []>;
2323 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2324 defm INT_PTX_LDG_G_v2i8_ELE
2325 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2326 defm INT_PTX_LDG_G_v2i16_ELE
2327 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2328 defm INT_PTX_LDG_G_v2i32_ELE
2329 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2330 defm INT_PTX_LDG_G_v2f32_ELE
2331 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2332 defm INT_PTX_LDG_G_v2i64_ELE
2333 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2334 defm INT_PTX_LDG_G_v2f64_ELE
2335 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2336 defm INT_PTX_LDG_G_v4i8_ELE
2337 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2338 defm INT_PTX_LDG_G_v4i16_ELE
2339 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2340 defm INT_PTX_LDG_G_v4i32_ELE
2341 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2342 defm INT_PTX_LDG_G_v4f32_ELE
2343 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2346 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2347 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2348 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2349 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2350 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2351 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2352 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2353 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2354 "{{ .reg .b64 %tmp;\n\t"
2355 #" cvt.u64.u32 \t%tmp, $src;\n\t"
2356 #" cvta." # Str # ".u64 \t$result, %tmp; }}",
2357 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2358 Requires<[useShortPtr]>;
2361 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2362 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2363 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2364 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2365 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2366 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2367 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2368 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2369 "{{ .reg .b64 %tmp;\n\t"
2370 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2371 #" cvt.u32.u64 \t$result, %tmp; }}",
2372 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2373 Requires<[useShortPtr]>;
2376 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2377 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2378 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2379 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2381 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2382 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2383 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2384 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2387 // nvvm.ptr.gen.to.param
2388 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2389 (ins Int32Regs:$src),
2390 "mov.u32 \t$result, $src;",
2391 [(set Int32Regs:$result,
2392 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2393 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2394 (ins Int64Regs:$src),
2395 "mov.u64 \t$result, $src;",
2396 [(set Int64Regs:$result,
2397 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2400 // nvvm.move intrinsicc
2401 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2402 "mov.b16 \t$r, $s;",
2404 (int_nvvm_move_i16 Int16Regs:$s))]>;
2405 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2406 "mov.b32 \t$r, $s;",
2408 (int_nvvm_move_i32 Int32Regs:$s))]>;
2409 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2410 "mov.b64 \t$r, $s;",
2412 (int_nvvm_move_i64 Int64Regs:$s))]>;
2413 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2414 "mov.f32 \t$r, $s;",
2415 [(set Float32Regs:$r,
2416 (int_nvvm_move_float Float32Regs:$s))]>;
2417 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2418 "mov.f64 \t$r, $s;",
2419 [(set Float64Regs:$r,
2420 (int_nvvm_move_double Float64Regs:$s))]>;
2421 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2422 "mov.u32 \t$r, $s;",
2424 (int_nvvm_move_ptr Int32Regs:$s))]>;
2425 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2426 "mov.u64 \t$r, $s;",
2428 (int_nvvm_move_ptr Int64Regs:$s))]>;
2430 // @TODO: Are these actually needed, or will we always just see symbols
2431 // copied to registers first?
2432 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2433 "mov.u32 \t$r, $s;",
2435 (int_nvvm_move_ptr texternalsym:$s))]>;
2436 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2437 "mov.u64 \t$r, $s;",
2439 (int_nvvm_move_ptr texternalsym:$s))]>;*/
2442 // MoveParam %r1, param
2443 // ptr_local_to_gen %r2, %r1
2444 // ptr_gen_to_local %r3, %r2
2448 // @TODO: Revisit this. There is a type
2449 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2450 // instructions are not currently defined. However, we can use the ptr
2451 // variants and the asm printer will do the right thing.
2452 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2453 (MoveParam texternalsym:$src)))),
2454 (nvvm_move_ptr64 texternalsym:$src)>;
2455 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2456 (MoveParam texternalsym:$src)))),
2457 (nvvm_move_ptr32 texternalsym:$src)>;
2460 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2461 "mov.u64 \t$result, $src;", []>;
2463 //-----------------------------------
2464 // Compiler Error Warn
2465 // - Just ignore them in codegen
2466 //-----------------------------------
2468 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2469 "// llvm.nvvm.compiler.warn()",
2470 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2471 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2472 "// llvm.nvvm.compiler.warn()",
2473 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2474 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2475 "// llvm.nvvm.compiler.error()",
2476 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2477 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2478 "// llvm.nvvm.compiler.error()",
2479 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2484 multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2485 def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2486 "isspacep." # suffix # "\t$d, $a;",
2487 [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2489 def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2490 "isspacep." # suffix # "\t$d, $a;",
2491 [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2495 defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2496 defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2497 defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>;
2498 defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2499 defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2500 int_nvvm_isspacep_shared_cluster,
2501 [hasPTX<78>, hasSM<90>]>;
2503 // Special register reads
2504 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2505 (ins SpecialRegs:$r),
2506 "mov.b32 \t$d, $r;", []>;
2508 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2509 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2510 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2511 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2512 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2513 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2514 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2515 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2516 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2517 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2518 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2519 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2520 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2521 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2522 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2523 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2524 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2525 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2526 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2527 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2528 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2529 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2530 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2531 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2532 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2533 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2534 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2535 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2536 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2537 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2538 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2539 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2542 // rotate builtin support
2544 def ROTATE_B32_HW_IMM
2545 : NVPTXInst<(outs Int32Regs:$dst),
2546 (ins Int32Regs:$src, i32imm:$amt),
2547 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2548 [(set Int32Regs:$dst,
2549 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2550 Requires<[hasHWROT32]> ;
2552 def ROTATE_B32_HW_REG
2553 : NVPTXInst<(outs Int32Regs:$dst),
2554 (ins Int32Regs:$src, Int32Regs:$amt),
2555 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2556 [(set Int32Regs:$dst,
2557 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2558 Requires<[hasHWROT32]> ;
2560 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2561 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2562 Requires<[noHWROT32]> ;
2564 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2565 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2566 Requires<[noHWROT32]> ;
2568 let hasSideEffects = false in {
2569 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2570 !strconcat("{{\n\t",
2571 ".reg .b32 %dummy;\n\t",
2572 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2576 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2577 !strconcat("{{\n\t",
2578 ".reg .b32 %dummy;\n\t",
2579 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2584 let hasSideEffects = false in {
2586 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2587 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2590 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2591 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2592 (GET_LO_INT64 Int64Regs:$src))> ;
2594 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2596 let hasSideEffects = false in {
2597 def SHF_L_WRAP_B32_IMM
2598 : NVPTXInst<(outs Int32Regs:$dst),
2599 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2600 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2601 Requires<[hasHWROT32]>;
2603 def SHF_L_WRAP_B32_REG
2604 : NVPTXInst<(outs Int32Regs:$dst),
2605 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2606 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2607 Requires<[hasHWROT32]>;
2609 def SHF_R_WRAP_B32_IMM
2610 : NVPTXInst<(outs Int32Regs:$dst),
2611 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2612 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2613 Requires<[hasHWROT32]>;
2615 def SHF_R_WRAP_B32_REG
2616 : NVPTXInst<(outs Int32Regs:$dst),
2617 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2618 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2619 Requires<[hasHWROT32]>;
2622 // HW version of rotate 64
2623 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2625 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2626 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2627 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2628 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2629 Requires<[hasHWROT32]>;
2631 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2633 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2634 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2635 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2636 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2637 Requires<[hasHWROT32]>;
2640 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2642 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2643 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2644 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2645 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2646 Requires<[hasHWROT32]>;
2648 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2650 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2651 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2652 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2653 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2654 Requires<[hasHWROT32]>;
2656 // SW version of rotate 64
2657 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2658 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2659 Requires<[noHWROT32]>;
2660 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2661 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2662 Requires<[noHWROT32]>;
2663 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2664 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2665 Requires<[noHWROT32]>;
2666 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2667 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2668 Requires<[noHWROT32]>;
2671 //-----------------------------------
2672 // Texture Intrinsics
2673 //-----------------------------------
2675 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2676 // also defined in NVPTXReplaceImageHandles.cpp
2678 // texmode_independent
2679 let IsTex = true, IsTexModeUnified = false in {
2680 // Texture fetch instructions using handles
2682 class TEX_1D_base<string inst, NVPTXRegClass outtype,
2683 NVPTXRegClass intype, dag texsamp>
2684 : NVPTXInst<(outs outtype:$r, outtype:$g,
2685 outtype:$b, outtype:$a),
2686 !con(texsamp, (ins intype:$x)),
2687 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2690 multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2691 def _RR : TEX_1D_base<inst, outtype, intype,
2692 (ins Int64Regs:$t, Int64Regs:$s)>;
2693 def _RI : TEX_1D_base<inst, outtype, intype,
2694 (ins Int64Regs:$t, i64imm:$s)>;
2695 def _IR : TEX_1D_base<inst, outtype, intype,
2696 (ins i64imm:$t, Int64Regs:$s)>;
2697 def _II : TEX_1D_base<inst, outtype, intype,
2698 (ins i64imm:$t, i64imm:$s)>;
2701 defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2702 defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2703 defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2704 defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2705 defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2706 defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2708 class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2709 NVPTXRegClass intype, dag texsamp>
2710 : NVPTXInst<(outs outtype:$r, outtype:$g,
2711 outtype:$b, outtype:$a),
2712 !con(texsamp, (ins intype:$x, intype:$lod)),
2713 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2716 multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2717 NVPTXRegClass intype> {
2718 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2719 (ins Int64Regs:$t, Int64Regs:$s)>;
2720 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2721 (ins Int64Regs:$t, i64imm:$s)>;
2722 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2723 (ins i64imm:$t, Int64Regs:$s)>;
2724 def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2725 (ins i64imm:$t, i64imm:$s)>;
2728 defm TEX_1D_F32_F32_LEVEL :
2729 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2730 defm TEX_1D_S32_F32_LEVEL :
2731 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2732 defm TEX_1D_U32_F32_LEVEL :
2733 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2735 class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2736 NVPTXRegClass intype, dag texsamp>
2737 : NVPTXInst<(outs outtype:$r, outtype:$g,
2738 outtype:$b, outtype:$a),
2739 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2740 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2741 " \\{$gradx\\}, \\{$grady\\};",
2744 multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2745 NVPTXRegClass intype> {
2746 def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2747 (ins Int64Regs:$t, Int64Regs:$s)>;
2748 def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2749 (ins Int64Regs:$t, i64imm:$s)>;
2750 def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2751 (ins i64imm:$t, Int64Regs:$s)>;
2752 def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2753 (ins i64imm:$t, i64imm:$s)>;
2756 defm TEX_1D_F32_F32_GRAD
2757 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2758 defm TEX_1D_S32_F32_GRAD
2759 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2760 defm TEX_1D_U32_F32_GRAD
2761 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2763 class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2764 NVPTXRegClass intype, dag texsamp>
2765 : NVPTXInst<(outs outtype:$r, outtype:$g,
2766 outtype:$b, outtype:$a),
2767 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2768 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2771 multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2772 NVPTXRegClass intype> {
2773 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2774 (ins Int64Regs:$t, Int64Regs:$s)>;
2775 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2776 (ins Int64Regs:$t, i64imm:$s)>;
2777 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2778 (ins i64imm:$t, Int64Regs:$s)>;
2779 def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2780 (ins i64imm:$t, i64imm:$s)>;
2783 defm TEX_1D_ARRAY_F32_F32
2784 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2785 defm TEX_1D_ARRAY_F32_S32
2786 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2787 defm TEX_1D_ARRAY_S32_S32
2788 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2789 defm TEX_1D_ARRAY_S32_F32
2790 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2791 defm TEX_1D_ARRAY_U32_S32
2792 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2793 defm TEX_1D_ARRAY_U32_F32
2794 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2796 class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2797 NVPTXRegClass intype, dag texsamp>
2798 : NVPTXInst<(outs outtype:$r, outtype:$g,
2799 outtype:$b, outtype:$a),
2800 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2801 inst # " \t\\{$r, $g, $b, $a\\},"
2802 " [$t, $s, \\{$l, $x\\}], $lod;",
2805 multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2806 NVPTXRegClass intype> {
2807 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2808 (ins Int64Regs:$t, Int64Regs:$s)>;
2809 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2810 (ins Int64Regs:$t, i64imm:$s)>;
2811 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2812 (ins i64imm:$t, Int64Regs:$s)>;
2813 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2814 (ins i64imm:$t, i64imm:$s)>;
2817 defm TEX_1D_ARRAY_F32_F32_LEVEL
2818 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2819 defm TEX_1D_ARRAY_S32_F32_LEVEL
2820 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2821 defm TEX_1D_ARRAY_U32_F32_LEVEL
2822 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2824 class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2825 NVPTXRegClass intype, dag texsamp>
2826 : NVPTXInst<(outs outtype:$r, outtype:$g,
2827 outtype:$b, outtype:$a),
2828 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2829 intype:$gradx, intype:$grady)),
2830 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2831 " \\{$gradx\\}, \\{$grady\\};",
2834 multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2835 NVPTXRegClass intype> {
2836 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2837 (ins Int64Regs:$t, Int64Regs:$s)>;
2838 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2839 (ins Int64Regs:$t, i64imm:$s)>;
2840 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2841 (ins i64imm:$t, Int64Regs:$s)>;
2842 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2843 (ins i64imm:$t, i64imm:$s)>;
2846 defm TEX_1D_ARRAY_F32_F32_GRAD
2847 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2848 defm TEX_1D_ARRAY_S32_F32_GRAD
2849 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2850 defm TEX_1D_ARRAY_U32_F32_GRAD
2851 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2853 class TEX_2D_base<string inst, NVPTXRegClass outtype,
2854 NVPTXRegClass intype, dag texsamp>
2855 : NVPTXInst<(outs outtype:$r, outtype:$g,
2856 outtype:$b, outtype:$a),
2857 !con(texsamp, (ins intype:$x, intype:$y)),
2858 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2861 multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2862 def _RR : TEX_2D_base<inst, outtype, intype,
2863 (ins Int64Regs:$t, Int64Regs:$s)>;
2864 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2865 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2866 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2869 defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2870 defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2871 defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2872 defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2873 defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2874 defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2876 class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2877 NVPTXRegClass intype, dag texsamp>
2878 : NVPTXInst<(outs outtype:$r, outtype:$g,
2879 outtype:$b, outtype:$a),
2880 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2881 inst # " \t\\{$r, $g, $b, $a\\},"
2882 " [$t, $s, \\{$x, $y\\}], $lod;",
2885 multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2886 NVPTXRegClass intype> {
2887 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2888 (ins Int64Regs:$t, Int64Regs:$s)>;
2889 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2890 (ins Int64Regs:$t, i64imm:$s)>;
2891 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2892 (ins i64imm:$t, Int64Regs:$s)>;
2893 def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2894 (ins i64imm:$t, i64imm:$s)>;
2897 defm TEX_2D_F32_F32_LEVEL :
2898 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2899 defm TEX_2D_S32_F32_LEVEL :
2900 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2901 defm TEX_2D_U32_F32_LEVEL :
2902 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2904 class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2905 NVPTXRegClass intype, dag texsamp>
2906 : NVPTXInst<(outs outtype:$r, outtype:$g,
2907 outtype:$b, outtype:$a),
2908 !con(texsamp, (ins intype:$x, intype:$y,
2909 intype:$gradx0, intype:$gradx1,
2910 intype:$grady0, intype:$grady1)),
2911 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2912 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2915 multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2916 NVPTXRegClass intype> {
2917 def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2918 (ins Int64Regs:$t, Int64Regs:$s)>;
2919 def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2920 (ins Int64Regs:$t, i64imm:$s)>;
2921 def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2922 (ins i64imm:$t, Int64Regs:$s)>;
2923 def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2924 (ins i64imm:$t, i64imm:$s)>;
2927 defm TEX_2D_F32_F32_GRAD :
2928 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2929 defm TEX_2D_S32_F32_GRAD :
2930 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2931 defm TEX_2D_U32_F32_GRAD :
2932 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2934 class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2935 NVPTXRegClass intype, dag texsamp>
2936 : NVPTXInst<(outs outtype:$r, outtype:$g,
2937 outtype:$b, outtype:$a),
2938 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2939 inst # " \t\\{$r, $g, $b, $a\\},"
2940 " [$t, $s, \\{$l, $x, $y, $y\\}];",
2943 multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2944 NVPTXRegClass intype> {
2945 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2946 (ins Int64Regs:$t, Int64Regs:$s)>;
2947 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2948 (ins Int64Regs:$t, i64imm:$s)>;
2949 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2950 (ins i64imm:$t, Int64Regs:$s)>;
2951 def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2952 (ins i64imm:$t, i64imm:$s)>;
2955 defm TEX_2D_ARRAY_F32_F32
2956 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2957 defm TEX_2D_ARRAY_F32_S32
2958 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2959 defm TEX_2D_ARRAY_S32_S32
2960 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2961 defm TEX_2D_ARRAY_S32_F32
2962 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2963 defm TEX_2D_ARRAY_U32_S32
2964 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
2965 defm TEX_2D_ARRAY_U32_F32
2966 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2968 class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2969 NVPTXRegClass intype, dag texsamp>
2970 : NVPTXInst<(outs outtype:$r, outtype:$g,
2971 outtype:$b, outtype:$a),
2972 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2974 inst # " \t\\{$r, $g, $b, $a\\},"
2975 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2978 multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2979 NVPTXRegClass intype> {
2980 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2981 (ins Int64Regs:$t, Int64Regs:$s)>;
2982 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2983 (ins Int64Regs:$t, i64imm:$s)>;
2984 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2985 (ins i64imm:$t, Int64Regs:$s)>;
2986 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2987 (ins i64imm:$t, i64imm:$s)>;
2990 defm TEX_2D_ARRAY_F32_F32_LEVEL
2991 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2992 defm TEX_2D_ARRAY_S32_F32_LEVEL
2993 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2994 defm TEX_2D_ARRAY_U32_F32_LEVEL
2995 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2997 class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2998 NVPTXRegClass intype, dag texsamp>
2999 : NVPTXInst<(outs outtype:$r, outtype:$g,
3000 outtype:$b, outtype:$a),
3001 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3002 intype:$gradx0, intype:$gradx1,
3003 intype:$grady0, intype:$grady1)),
3004 inst # " \t\\{$r, $g, $b, $a\\},"
3005 " [$t, $s, \\{$l, $x, $y, $y\\}],"
3006 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3009 multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3010 NVPTXRegClass intype> {
3011 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3012 (ins Int64Regs:$t, Int64Regs:$s)>;
3013 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3014 (ins Int64Regs:$t, i64imm:$s)>;
3015 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3016 (ins i64imm:$t, Int64Regs:$s)>;
3017 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3018 (ins i64imm:$t, i64imm:$s)>;
3021 defm TEX_2D_ARRAY_F32_F32_GRAD
3022 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3023 defm TEX_2D_ARRAY_S32_F32_GRAD
3024 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3025 defm TEX_2D_ARRAY_U32_F32_GRAD
3026 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3028 class TEX_3D_base<string inst, NVPTXRegClass outtype,
3029 NVPTXRegClass intype, dag texsamp>
3030 : NVPTXInst<(outs outtype:$r, outtype:$g,
3031 outtype:$b, outtype:$a),
3032 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3033 inst # " \t\\{$r, $g, $b, $a\\},"
3034 " [$t, $s, \\{$x, $y, $z, $z\\}];",
3037 multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3038 def _RR : TEX_3D_base<inst, outtype, intype,
3039 (ins Int64Regs:$t, Int64Regs:$s)>;
3040 def _RI : TEX_3D_base<inst, outtype, intype,
3041 (ins Int64Regs:$t, i64imm:$s)>;
3042 def _IR : TEX_3D_base<inst, outtype, intype,
3043 (ins i64imm:$t, Int64Regs:$s)>;
3044 def _II : TEX_3D_base<inst, outtype, intype,
3045 (ins i64imm:$t, i64imm:$s)>;
3048 defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3049 defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3050 defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3051 defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3052 defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3053 defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3055 class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3056 NVPTXRegClass intype, dag texsamp>
3057 : NVPTXInst<(outs outtype:$r, outtype:$g,
3058 outtype:$b, outtype:$a),
3059 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3061 inst # " \t\\{$r, $g, $b, $a\\},"
3062 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3065 multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3066 NVPTXRegClass intype> {
3067 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3068 (ins Int64Regs:$t, Int64Regs:$s)>;
3069 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3070 (ins Int64Regs:$t, i64imm:$s)>;
3071 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3072 (ins i64imm:$t, Int64Regs:$s)>;
3073 def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3074 (ins i64imm:$t, i64imm:$s)>;
3077 defm TEX_3D_F32_F32_LEVEL
3078 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3079 defm TEX_3D_S32_F32_LEVEL
3080 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3081 defm TEX_3D_U32_F32_LEVEL
3082 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3084 class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3085 NVPTXRegClass intype, dag texsamp>
3086 : NVPTXInst<(outs outtype:$r, outtype:$g,
3087 outtype:$b, outtype:$a),
3088 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3089 intype :$gradx0, intype:$gradx1,
3090 intype:$gradx2, intype:$grady0,
3091 intype:$grady1, intype:$grady2)),
3092 inst # " \t\\{$r, $g, $b, $a\\},"
3093 " [$t, $s, \\{$x, $y, $z, $z\\}],"
3094 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3095 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3098 multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3099 NVPTXRegClass intype> {
3100 def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3101 (ins Int64Regs:$t, Int64Regs:$s)>;
3102 def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3103 (ins Int64Regs:$t, i64imm:$s)>;
3104 def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3105 (ins i64imm:$t, Int64Regs:$s)>;
3106 def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3107 (ins i64imm:$t, i64imm:$s)>;
3110 defm TEX_3D_F32_F32_GRAD
3111 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3112 defm TEX_3D_S32_F32_GRAD
3113 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3114 defm TEX_3D_U32_F32_GRAD
3115 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3117 class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3118 NVPTXRegClass intype, dag texsamp>
3119 : NVPTXInst<(outs outtype:$r, outtype:$g,
3120 outtype:$b, outtype:$a),
3121 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3122 inst # " \t\\{$r, $g, $b, $a\\},"
3123 " [$t, $s, \\{$x, $y, $z, $z\\}];",
3126 multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3127 def _RR : TEX_CUBE_base<inst, outtype, intype,
3128 (ins Int64Regs:$t, Int64Regs:$s)>;
3129 def _RI : TEX_CUBE_base<inst, outtype, intype,
3130 (ins Int64Regs:$t, i64imm:$s)>;
3131 def _IR : TEX_CUBE_base<inst, outtype, intype,
3132 (ins i64imm:$t, Int64Regs:$s)>;
3133 def _II : TEX_CUBE_base<inst, outtype, intype,
3134 (ins i64imm:$t, i64imm:$s)>;
3137 defm TEX_CUBE_F32_F32
3138 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3139 defm TEX_CUBE_S32_F32
3140 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3141 defm TEX_CUBE_U32_F32
3142 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3144 class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3145 NVPTXRegClass intype, dag texsamp>
3146 : NVPTXInst<(outs outtype:$r, outtype:$g,
3147 outtype:$b, outtype:$a),
3148 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3150 inst # " \t\\{$r, $g, $b, $a\\},"
3151 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3154 multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3155 NVPTXRegClass intype> {
3156 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3157 (ins Int64Regs:$t, Int64Regs:$s)>;
3158 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3159 (ins Int64Regs:$t, i64imm:$s)>;
3160 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3161 (ins i64imm:$t, Int64Regs:$s)>;
3162 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3163 (ins i64imm:$t, i64imm:$s)>;
3166 defm TEX_CUBE_F32_F32_LEVEL
3167 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3168 defm TEX_CUBE_S32_F32_LEVEL
3169 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3170 defm TEX_CUBE_U32_F32_LEVEL
3171 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3173 class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3174 NVPTXRegClass intype, dag texsamp>
3175 : NVPTXInst<(outs outtype:$r, outtype:$g,
3176 outtype:$b, outtype:$a),
3177 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3179 inst # " \t\\{$r, $g, $b, $a\\},"
3180 " [$t, $s, \\{$l, $x, $y, $z\\}];",
3183 multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3184 NVPTXRegClass intype> {
3185 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3186 (ins Int64Regs:$t, Int64Regs:$s)>;
3187 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3188 (ins Int64Regs:$t, i64imm:$s)>;
3189 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3190 (ins i64imm:$t, Int64Regs:$s)>;
3191 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3192 (ins i64imm:$t, i64imm:$s)>;
3195 defm TEX_CUBE_ARRAY_F32_F32
3196 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3197 defm TEX_CUBE_ARRAY_S32_F32
3198 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3199 defm TEX_CUBE_ARRAY_U32_F32
3200 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3202 class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3203 NVPTXRegClass intype, dag texsamp>
3204 : NVPTXInst<(outs outtype:$r, outtype:$g,
3205 outtype:$b, outtype:$a),
3206 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3207 intype:$z, intype:$lod)),
3208 inst # " \t\\{$r, $g, $b, $a\\},"
3209 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3212 multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3213 NVPTXRegClass intype> {
3214 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3215 (ins Int64Regs:$t, Int64Regs:$s)>;
3216 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3217 (ins Int64Regs:$t, i64imm:$s)>;
3218 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3219 (ins i64imm:$t, Int64Regs:$s)>;
3220 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3221 (ins i64imm:$t, i64imm:$s)>;
3224 defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3225 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3226 Float32Regs, Float32Regs>;
3227 defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3228 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3229 Int32Regs, Float32Regs>;
3230 defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3231 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3232 Int32Regs, Float32Regs>;
3234 class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3235 NVPTXRegClass intype, dag texsamp>
3236 : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3237 outtype:$v2, outtype:$v3),
3238 !con(texsamp, (ins intype:$x, intype:$y)),
3239 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3242 multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3243 def _RR : TLD4_2D_base<inst, outtype, intype,
3244 (ins Int64Regs:$t, Int64Regs:$s)>;
3245 def _RI : TLD4_2D_base<inst, outtype, intype,
3246 (ins Int64Regs:$t, i64imm:$s)>;
3247 def _IR : TLD4_2D_base<inst, outtype, intype,
3248 (ins i64imm:$t, Int64Regs:$s)>;
3249 def _II : TLD4_2D_base<inst, outtype, intype,
3250 (ins i64imm:$t, i64imm:$s)>;
3253 defm TLD4_R_2D_F32_F32
3254 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3255 defm TLD4_G_2D_F32_F32
3256 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3257 defm TLD4_B_2D_F32_F32
3258 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3259 defm TLD4_A_2D_F32_F32
3260 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3262 defm TLD4_R_2D_S32_F32
3263 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3264 defm TLD4_G_2D_S32_F32
3265 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3266 defm TLD4_B_2D_S32_F32
3267 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3268 defm TLD4_A_2D_S32_F32
3269 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3271 defm TLD4_R_2D_U32_F32
3272 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3273 defm TLD4_G_2D_U32_F32
3274 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3275 defm TLD4_B_2D_U32_F32
3276 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3277 defm TLD4_A_2D_U32_F32
3278 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3284 let IsTex = true, IsTexModeUnified = true in {
3285 // Texture fetch instructions using handles
3287 class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3288 NVPTXRegClass intype, dag tex>
3289 : NVPTXInst<(outs outtype:$r, outtype:$g,
3290 outtype:$b, outtype:$a),
3291 !con(tex, (ins intype:$x)),
3292 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3295 multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3296 NVPTXRegClass intype> {
3297 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3298 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3301 defm TEX_UNIFIED_1D_F32_S32
3302 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3303 defm TEX_UNIFIED_1D_F32_F32
3304 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3305 defm TEX_UNIFIED_1D_S32_S32
3306 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3307 defm TEX_UNIFIED_1D_S32_F32
3308 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3309 defm TEX_UNIFIED_1D_U32_S32
3310 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3311 defm TEX_UNIFIED_1D_U32_F32
3312 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3314 class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3315 NVPTXRegClass intype, dag tex>
3316 : NVPTXInst<(outs outtype:$r, outtype:$g,
3317 outtype:$b, outtype:$a),
3318 !con(tex, (ins intype:$x, intype:$lod)),
3319 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3322 multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3323 NVPTXRegClass intype> {
3324 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3325 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3328 defm TEX_UNIFIED_1D_F32_F32_LEVEL
3329 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3330 defm TEX_UNIFIED_1D_S32_F32_LEVEL
3331 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3332 defm TEX_UNIFIED_1D_U32_F32_LEVEL
3333 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3335 class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3336 NVPTXRegClass intype, dag tex>
3337 : NVPTXInst<(outs outtype:$r, outtype:$g,
3338 outtype:$b, outtype:$a),
3339 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3340 inst # " \t\\{$r, $g, $b, $a\\},"
3341 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3344 multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3345 NVPTXRegClass intype> {
3346 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3347 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3350 defm TEX_UNIFIED_1D_F32_F32_GRAD
3351 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3352 defm TEX_UNIFIED_1D_S32_F32_GRAD
3353 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3354 defm TEX_UNIFIED_1D_U32_F32_GRAD
3355 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3357 class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3358 NVPTXRegClass intype, dag tex>
3359 : NVPTXInst<(outs outtype:$r, outtype:$g,
3360 outtype:$b, outtype:$a),
3361 !con(tex, (ins Int32Regs:$l, intype:$x)),
3362 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3365 multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3366 NVPTXRegClass intype> {
3367 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3368 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3371 defm TEX_UNIFIED_1D_ARRAY_F32_S32
3372 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3373 defm TEX_UNIFIED_1D_ARRAY_F32_F32
3374 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3375 defm TEX_UNIFIED_1D_ARRAY_S32_S32
3376 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3377 defm TEX_UNIFIED_1D_ARRAY_S32_F32
3378 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3379 defm TEX_UNIFIED_1D_ARRAY_U32_S32
3380 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3381 defm TEX_UNIFIED_1D_ARRAY_U32_F32
3382 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3384 class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3385 NVPTXRegClass intype, dag tex>
3386 : NVPTXInst<(outs outtype:$r, outtype:$g,
3387 outtype:$b, outtype:$a),
3388 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3389 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3392 multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3393 NVPTXRegClass intype> {
3394 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3395 (ins Int64Regs:$t)>;
3396 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3400 defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3401 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3402 Float32Regs, Float32Regs>;
3403 defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3404 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3405 Int32Regs, Float32Regs>;
3406 defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3407 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3408 Int32Regs, Float32Regs>;
3410 class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3411 NVPTXRegClass intype, dag tex>
3412 : NVPTXInst<(outs outtype:$r, outtype:$g,
3413 outtype:$b, outtype:$a),
3414 !con(tex, (ins Int32Regs:$l, intype:$x,
3415 intype:$gradx, intype:$grady)),
3416 inst # " \t\\{$r, $g, $b, $a\\},"
3417 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3420 multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3421 NVPTXRegClass intype> {
3422 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3423 (ins Int64Regs:$t)>;
3424 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3428 defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3429 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3430 Float32Regs, Float32Regs>;
3431 defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3432 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3433 Int32Regs, Float32Regs>;
3434 defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3435 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3436 Int32Regs, Float32Regs>;
3438 class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3439 NVPTXRegClass intype, dag tex>
3440 : NVPTXInst<(outs outtype:$r, outtype:$g,
3441 outtype:$b, outtype:$a),
3442 !con(tex, (ins intype:$x, intype:$y)),
3443 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3446 multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3447 NVPTXRegClass intype> {
3448 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3449 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3452 defm TEX_UNIFIED_2D_F32_S32
3453 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3454 defm TEX_UNIFIED_2D_F32_F32
3455 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3456 defm TEX_UNIFIED_2D_S32_S32
3457 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3458 defm TEX_UNIFIED_2D_S32_F32
3459 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3460 defm TEX_UNIFIED_2D_U32_S32
3461 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3462 defm TEX_UNIFIED_2D_U32_F32
3463 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3465 class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3466 NVPTXRegClass intype, dag tex>
3467 : NVPTXInst<(outs outtype:$r, outtype:$g,
3468 outtype:$b, outtype:$a),
3469 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3470 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3473 multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3474 NVPTXRegClass intype> {
3475 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3476 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3479 defm TEX_UNIFIED_2D_F32_F32_LEVEL
3480 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3481 defm TEX_UNIFIED_2D_S32_F32_LEVEL
3482 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3483 defm TEX_UNIFIED_2D_U32_F32_LEVEL
3484 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3486 class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3487 NVPTXRegClass intype, dag tex>
3488 : NVPTXInst<(outs outtype:$r, outtype:$g,
3489 outtype:$b, outtype:$a),
3490 !con(tex, (ins intype:$x, intype:$y,
3491 intype:$gradx0, intype:$gradx1,
3492 intype:$grady0, intype:$grady1)),
3493 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3494 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3496 multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3497 NVPTXRegClass intype> {
3498 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3499 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3502 defm TEX_UNIFIED_2D_F32_F32_GRAD
3503 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3504 defm TEX_UNIFIED_2D_S32_F32_GRAD
3505 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3506 defm TEX_UNIFIED_2D_U32_F32_GRAD
3507 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3509 class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3510 NVPTXRegClass intype, dag tex>
3511 : NVPTXInst<(outs outtype:$r, outtype:$g,
3512 outtype:$b, outtype:$a),
3513 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3514 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3516 multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3517 NVPTXRegClass intype> {
3518 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3519 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3522 defm TEX_UNIFIED_2D_ARRAY_F32_S32
3523 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3524 defm TEX_UNIFIED_2D_ARRAY_F32_F32
3525 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3526 defm TEX_UNIFIED_2D_ARRAY_S32_S32
3527 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3528 defm TEX_UNIFIED_2D_ARRAY_S32_F32
3529 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3530 defm TEX_UNIFIED_2D_ARRAY_U32_S32
3531 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3532 defm TEX_UNIFIED_2D_ARRAY_U32_F32
3533 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3535 class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3536 NVPTXRegClass intype, dag tex>
3537 : NVPTXInst<(outs outtype:$r, outtype:$g,
3538 outtype:$b, outtype:$a),
3539 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3541 inst # " \t\\{$r, $g, $b, $a\\},"
3542 " [$t, \\{$l, $x, $y, $y\\}], $lod;",
3544 multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3545 NVPTXRegClass intype> {
3546 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3547 (ins Int64Regs:$t)>;
3548 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3552 defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3553 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3554 Float32Regs, Float32Regs>;
3555 defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3556 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3557 Int32Regs, Float32Regs>;
3558 defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3559 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3560 Int32Regs, Float32Regs>;
3562 class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3563 NVPTXRegClass intype, dag tex>
3564 : NVPTXInst<(outs outtype:$r, outtype:$g,
3565 outtype:$b, outtype:$a),
3566 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3567 intype:$gradx0, intype:$gradx1,
3568 intype:$grady0, intype:$grady1)),
3569 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3570 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3572 multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3573 NVPTXRegClass intype> {
3574 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3575 (ins Int64Regs:$t)>;
3576 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3580 defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3581 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3582 Float32Regs, Float32Regs>;
3583 defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3584 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3585 Int32Regs, Float32Regs>;
3586 defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3587 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3588 Int32Regs, Float32Regs>;
3590 class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3591 NVPTXRegClass intype, dag tex>
3592 : NVPTXInst<(outs outtype:$r, outtype:$g,
3593 outtype:$b, outtype:$a),
3594 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3595 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3597 multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3598 NVPTXRegClass intype> {
3599 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3600 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3603 defm TEX_UNIFIED_3D_F32_S32
3604 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3605 defm TEX_UNIFIED_3D_F32_F32
3606 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3607 defm TEX_UNIFIED_3D_S32_S32
3608 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3609 defm TEX_UNIFIED_3D_S32_F32
3610 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3611 defm TEX_UNIFIED_3D_U32_S32
3612 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3613 defm TEX_UNIFIED_3D_U32_F32
3614 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3616 class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3617 NVPTXRegClass intype, dag tex>
3618 : NVPTXInst<(outs outtype:$r, outtype:$g,
3619 outtype:$b, outtype:$a),
3620 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3621 inst # " \t\\{$r, $g, $b, $a\\},"
3622 " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3624 multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3625 NVPTXRegClass intype> {
3626 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3627 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3630 defm TEX_UNIFIED_3D_F32_F32_LEVEL
3631 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3632 defm TEX_UNIFIED_3D_S32_F32_LEVEL
3633 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3634 defm TEX_UNIFIED_3D_U32_F32_LEVEL
3635 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3637 class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3638 NVPTXRegClass intype, dag tex>
3639 : NVPTXInst<(outs outtype:$r, outtype:$g,
3640 outtype:$b, outtype:$a),
3641 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3642 intype:$gradx0, intype:$gradx1,
3643 intype:$gradx2, intype:$grady0,
3644 intype:$grady1, intype:$grady2)),
3645 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3646 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3647 " \\{$grady0, $grady1, $grady2, $grady2\\};",
3649 multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3650 NVPTXRegClass intype> {
3651 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3652 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3655 defm TEX_UNIFIED_3D_F32_F32_GRAD
3656 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3657 defm TEX_UNIFIED_3D_S32_F32_GRAD
3658 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3659 defm TEX_UNIFIED_3D_U32_F32_GRAD
3660 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3662 class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3663 NVPTXRegClass intype, dag tex>
3664 : NVPTXInst<(outs outtype:$r, outtype:$g,
3665 outtype:$b, outtype:$a),
3666 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3667 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3669 multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3670 NVPTXRegClass intype> {
3671 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3672 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3675 defm TEX_UNIFIED_CUBE_F32_F32
3676 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3677 defm TEX_UNIFIED_CUBE_S32_F32
3678 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3679 defm TEX_UNIFIED_CUBE_U32_F32
3680 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3682 class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3683 NVPTXRegClass intype, dag tex>
3684 : NVPTXInst<(outs outtype:$r, outtype:$g,
3685 outtype:$b, outtype:$a),
3686 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3687 inst # " \t\\{$r, $g, $b, $a\\},"
3688 " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3690 multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3691 NVPTXRegClass intype> {
3692 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3693 (ins Int64Regs:$t)>;
3694 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3698 defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3699 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3700 Float32Regs, Float32Regs>;
3701 defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3702 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3703 Int32Regs, Float32Regs>;
3704 defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3705 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3706 Int32Regs, Float32Regs>;
3708 class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3709 NVPTXRegClass intype, dag tex>
3710 : NVPTXInst<(outs outtype:$r, outtype:$g,
3711 outtype:$b, outtype:$a),
3712 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3713 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3715 multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3716 NVPTXRegClass intype> {
3717 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3718 (ins Int64Regs:$t)>;
3719 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3723 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3724 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3725 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3726 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3727 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3728 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3730 class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3731 NVPTXRegClass intype, dag tex>
3732 : NVPTXInst<(outs outtype:$r, outtype:$g,
3733 outtype:$b, outtype:$a),
3734 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3736 inst # " \t\\{$r, $g, $b, $a\\},"
3737 " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3739 multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3740 NVPTXRegClass intype> {
3741 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3742 (ins Int64Regs:$t)>;
3743 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3747 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3748 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3749 Float32Regs, Float32Regs>;
3750 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3751 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3752 Int32Regs, Float32Regs>;
3753 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3754 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3755 Int32Regs, Float32Regs>;
3757 class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3758 NVPTXRegClass intype, dag tex>
3759 : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3760 outtype:$v2, outtype:$v3),
3761 !con(tex, (ins intype:$x, intype:$y)),
3762 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3764 multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3765 NVPTXRegClass intype> {
3766 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3767 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3770 defm TLD4_UNIFIED_R_2D_F32_F32
3771 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3772 defm TLD4_UNIFIED_G_2D_F32_F32
3773 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3774 defm TLD4_UNIFIED_B_2D_F32_F32
3775 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3776 defm TLD4_UNIFIED_A_2D_F32_F32
3777 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3779 defm TLD4_UNIFIED_R_2D_S32_F32
3780 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3781 defm TLD4_UNIFIED_G_2D_S32_F32
3782 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3783 defm TLD4_UNIFIED_B_2D_S32_F32
3784 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3785 defm TLD4_UNIFIED_A_2D_S32_F32
3786 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3788 defm TLD4_UNIFIED_R_2D_U32_F32
3789 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3790 defm TLD4_UNIFIED_G_2D_U32_F32
3791 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3792 defm TLD4_UNIFIED_B_2D_U32_F32
3793 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3794 defm TLD4_UNIFIED_A_2D_U32_F32
3795 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3801 //=== Surface load instructions
3803 let IsSuld = true in {
3805 class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3806 : NVPTXInst<(outs outtype:$r),
3807 !con(surf, (ins Int32Regs:$x)),
3808 inst # " \\{$r\\}, [$s, \\{$x\\}];",
3810 multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3811 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3812 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3815 defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3816 defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3817 defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3818 defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3820 defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3821 defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3822 defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3823 defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3825 defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3826 defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3827 defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3828 defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3830 class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3831 : NVPTXInst<(outs outtype:$r),
3832 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3833 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3835 multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3836 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3837 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3840 defm SULD_1D_ARRAY_I8_CLAMP
3841 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3842 defm SULD_1D_ARRAY_I16_CLAMP
3843 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3844 defm SULD_1D_ARRAY_I32_CLAMP
3845 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3846 defm SULD_1D_ARRAY_I64_CLAMP
3847 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3849 defm SULD_1D_ARRAY_I8_TRAP
3850 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3851 defm SULD_1D_ARRAY_I16_TRAP
3852 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3853 defm SULD_1D_ARRAY_I32_TRAP
3854 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3855 defm SULD_1D_ARRAY_I64_TRAP
3856 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3858 defm SULD_1D_ARRAY_I8_ZERO
3859 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3860 defm SULD_1D_ARRAY_I16_ZERO
3861 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3862 defm SULD_1D_ARRAY_I32_ZERO
3863 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3864 defm SULD_1D_ARRAY_I64_ZERO
3865 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3867 class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3868 : NVPTXInst<(outs outtype:$r),
3869 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3870 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3872 multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3873 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3874 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3877 defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3878 defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3879 defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3880 defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3882 defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3883 defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3884 defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3885 defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3887 defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3888 defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3889 defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3890 defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3892 class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3893 : NVPTXInst<(outs outtype:$r),
3894 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3895 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3897 multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3898 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3899 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3902 defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3903 defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3904 defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3905 defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
3907 defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
3908 defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
3909 defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
3910 defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
3912 defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
3913 defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
3914 defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
3915 defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
3917 class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
3918 : NVPTXInst<(outs outtype:$r),
3919 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3920 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3922 multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
3923 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
3924 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
3927 defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
3928 defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
3929 defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
3930 defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
3932 defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
3933 defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
3934 defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
3935 defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
3937 defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
3938 defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
3939 defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
3940 defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
3945 class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3946 : NVPTXInst<(outs outtype:$r, outtype:$g),
3947 !con(surf, (ins Int32Regs:$x)),
3948 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
3950 multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
3951 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3952 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
3955 defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
3956 defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
3957 defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
3958 defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
3960 defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
3961 defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
3962 defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
3963 defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
3965 defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
3966 defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
3967 defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
3968 defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
3970 class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3971 : NVPTXInst<(outs outtype:$r, outtype:$g),
3972 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3973 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3975 multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3976 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3977 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3980 defm SULD_1D_ARRAY_V2I8_CLAMP
3981 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
3982 defm SULD_1D_ARRAY_V2I16_CLAMP
3983 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
3984 defm SULD_1D_ARRAY_V2I32_CLAMP
3985 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
3986 defm SULD_1D_ARRAY_V2I64_CLAMP
3987 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
3989 defm SULD_1D_ARRAY_V2I8_TRAP
3990 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
3991 defm SULD_1D_ARRAY_V2I16_TRAP
3992 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
3993 defm SULD_1D_ARRAY_V2I32_TRAP
3994 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
3995 defm SULD_1D_ARRAY_V2I64_TRAP
3996 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
3998 defm SULD_1D_ARRAY_V2I8_ZERO
3999 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4000 defm SULD_1D_ARRAY_V2I16_ZERO
4001 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4002 defm SULD_1D_ARRAY_V2I32_ZERO
4003 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4004 defm SULD_1D_ARRAY_V2I64_ZERO
4005 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4007 class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4008 : NVPTXInst<(outs outtype:$r, outtype:$g),
4009 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4010 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4012 multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4013 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4014 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4017 defm SULD_2D_V2I8_CLAMP
4018 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4019 defm SULD_2D_V2I16_CLAMP
4020 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4021 defm SULD_2D_V2I32_CLAMP
4022 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4023 defm SULD_2D_V2I64_CLAMP
4024 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4026 defm SULD_2D_V2I8_TRAP
4027 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4028 defm SULD_2D_V2I16_TRAP
4029 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4030 defm SULD_2D_V2I32_TRAP
4031 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4032 defm SULD_2D_V2I64_TRAP
4033 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4035 defm SULD_2D_V2I8_ZERO
4036 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4037 defm SULD_2D_V2I16_ZERO
4038 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4039 defm SULD_2D_V2I32_ZERO
4040 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4041 defm SULD_2D_V2I64_ZERO
4042 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4044 class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4045 : NVPTXInst<(outs outtype:$r, outtype:$g),
4046 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4047 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4049 multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4050 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4051 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4054 defm SULD_2D_ARRAY_V2I8_CLAMP
4055 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4056 defm SULD_2D_ARRAY_V2I16_CLAMP
4057 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4058 defm SULD_2D_ARRAY_V2I32_CLAMP
4059 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4060 defm SULD_2D_ARRAY_V2I64_CLAMP
4061 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4063 defm SULD_2D_ARRAY_V2I8_TRAP
4064 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4065 defm SULD_2D_ARRAY_V2I16_TRAP
4066 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4067 defm SULD_2D_ARRAY_V2I32_TRAP
4068 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4069 defm SULD_2D_ARRAY_V2I64_TRAP
4070 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4072 defm SULD_2D_ARRAY_V2I8_ZERO
4073 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4074 defm SULD_2D_ARRAY_V2I16_ZERO
4075 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4076 defm SULD_2D_ARRAY_V2I32_ZERO
4077 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4078 defm SULD_2D_ARRAY_V2I64_ZERO
4079 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4081 class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4082 : NVPTXInst<(outs outtype:$r, outtype:$g),
4083 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4084 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4086 multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4087 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4088 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4091 defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4092 defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4093 defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4094 defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4096 defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4097 defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4098 defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4099 defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4101 defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4102 defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4103 defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4104 defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4110 class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4111 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4112 !con(surf, (ins Int32Regs:$x)),
4113 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4115 multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4116 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4117 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4120 defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4121 defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4122 defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4124 defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4125 defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4126 defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4128 defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4129 defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4130 defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4132 class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4133 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4134 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4135 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4137 multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4138 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4139 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4142 defm SULD_1D_ARRAY_V4I8_CLAMP
4143 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4144 defm SULD_1D_ARRAY_V4I16_CLAMP
4145 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4146 defm SULD_1D_ARRAY_V4I32_CLAMP
4147 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4149 defm SULD_1D_ARRAY_V4I8_TRAP
4150 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4151 defm SULD_1D_ARRAY_V4I16_TRAP
4152 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4153 defm SULD_1D_ARRAY_V4I32_TRAP
4154 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4156 defm SULD_1D_ARRAY_V4I8_ZERO
4157 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4158 defm SULD_1D_ARRAY_V4I16_ZERO
4159 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4160 defm SULD_1D_ARRAY_V4I32_ZERO
4161 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4163 class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4164 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4165 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4166 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4168 multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4169 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4170 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4173 defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4174 defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4175 defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4177 defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4178 defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4179 defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4181 defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4182 defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4183 defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4185 class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4186 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4187 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4188 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4190 multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4191 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4192 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4195 defm SULD_2D_ARRAY_V4I8_CLAMP
4196 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4197 defm SULD_2D_ARRAY_V4I16_CLAMP
4198 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4199 defm SULD_2D_ARRAY_V4I32_CLAMP
4200 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4202 defm SULD_2D_ARRAY_V4I8_TRAP
4203 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4204 defm SULD_2D_ARRAY_V4I16_TRAP
4205 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4206 defm SULD_2D_ARRAY_V4I32_TRAP
4207 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4209 defm SULD_2D_ARRAY_V4I8_ZERO
4210 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4211 defm SULD_2D_ARRAY_V4I16_ZERO
4212 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4213 defm SULD_2D_ARRAY_V4I32_ZERO
4214 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4216 class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4217 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4218 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4219 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4221 multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4222 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4223 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4226 defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4227 defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4228 defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4230 defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4231 defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4232 defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4234 defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4235 defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4236 defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4240 //-----------------------------------
4241 // Texture Query Intrinsics
4242 //-----------------------------------
4244 let IsSurfTexQuery = true in {
4245 def TXQ_CHANNEL_ORDER_R
4246 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4247 "txq.channel_order.b32 \t$d, [$a];",
4249 def TXQ_CHANNEL_ORDER_I
4250 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4251 "txq.channel_order.b32 \t$d, [$a];",
4253 def TXQ_CHANNEL_DATA_TYPE_R
4254 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4255 "txq.channel_data_type.b32 \t$d, [$a];",
4257 def TXQ_CHANNEL_DATA_TYPE_I
4258 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4259 "txq.channel_data_type.b32 \t$d, [$a];",
4262 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4263 "txq.width.b32 \t$d, [$a];",
4266 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4267 "txq.width.b32 \t$d, [$a];",
4270 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4271 "txq.height.b32 \t$d, [$a];",
4274 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4275 "txq.height.b32 \t$d, [$a];",
4278 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4279 "txq.depth.b32 \t$d, [$a];",
4282 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4283 "txq.depth.b32 \t$d, [$a];",
4285 def TXQ_ARRAY_SIZE_R
4286 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4287 "txq.array_size.b32 \t$d, [$a];",
4289 def TXQ_ARRAY_SIZE_I
4290 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4291 "txq.array_size.b32 \t$d, [$a];",
4293 def TXQ_NUM_SAMPLES_R
4294 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4295 "txq.num_samples.b32 \t$d, [$a];",
4297 def TXQ_NUM_SAMPLES_I
4298 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4299 "txq.num_samples.b32 \t$d, [$a];",
4301 def TXQ_NUM_MIPMAP_LEVELS_R
4302 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4303 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4305 def TXQ_NUM_MIPMAP_LEVELS_I
4306 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4307 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4311 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4312 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4313 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4314 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4315 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4316 (TXQ_WIDTH_R Int64Regs:$a)>;
4317 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4318 (TXQ_HEIGHT_R Int64Regs:$a)>;
4319 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4320 (TXQ_DEPTH_R Int64Regs:$a)>;
4321 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4322 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4323 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4324 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4325 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4326 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4329 //-----------------------------------
4330 // Surface Query Intrinsics
4331 //-----------------------------------
4333 let IsSurfTexQuery = true in {
4334 def SUQ_CHANNEL_ORDER_R
4335 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4336 "suq.channel_order.b32 \t$d, [$a];",
4338 def SUQ_CHANNEL_ORDER_I
4339 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4340 "suq.channel_order.b32 \t$d, [$a];",
4342 def SUQ_CHANNEL_DATA_TYPE_R
4343 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4344 "suq.channel_data_type.b32 \t$d, [$a];",
4346 def SUQ_CHANNEL_DATA_TYPE_I
4347 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4348 "suq.channel_data_type.b32 \t$d, [$a];",
4351 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4352 "suq.width.b32 \t$d, [$a];",
4355 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4356 "suq.width.b32 \t$d, [$a];",
4359 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4360 "suq.height.b32 \t$d, [$a];",
4363 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4364 "suq.height.b32 \t$d, [$a];",
4367 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4368 "suq.depth.b32 \t$d, [$a];",
4371 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4372 "suq.depth.b32 \t$d, [$a];",
4374 def SUQ_ARRAY_SIZE_R
4375 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4376 "suq.array_size.b32 \t$d, [$a];",
4378 def SUQ_ARRAY_SIZE_I
4379 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4380 "suq.array_size.b32 \t$d, [$a];",
4384 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4385 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4386 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4387 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4388 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4389 (SUQ_WIDTH_R Int64Regs:$a)>;
4390 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4391 (SUQ_HEIGHT_R Int64Regs:$a)>;
4392 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4393 (SUQ_DEPTH_R Int64Regs:$a)>;
4394 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4395 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4398 //===- Handle Query -------------------------------------------------------===//
4400 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4402 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4403 "istypep.samplerref \t$d, $a;",
4404 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4406 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4407 "istypep.surfref \t$d, $a;",
4408 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4410 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4411 "istypep.texref \t$d, $a;",
4412 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4414 //===- Surface Stores -----------------------------------------------------===//
4416 let IsSust = true in {
4418 class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4420 !con(surf, (ins Int32Regs:$x, intype:$r)),
4421 inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4423 multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4424 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4425 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4428 defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4429 defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4430 defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4431 defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4433 defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4434 defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4435 defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4436 defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4438 defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4439 defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4440 defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4441 defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4443 defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4444 defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4445 defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4447 class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4449 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4450 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4452 multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4453 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4454 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4457 defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4458 defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4459 defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4460 defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4462 defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4463 defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4464 defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4465 defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4467 defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4468 defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4469 defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4470 defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4472 defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4473 defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4474 defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4476 class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4478 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4479 intype:$b, intype:$a)),
4480 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4482 multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4483 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4484 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4487 defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4488 defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4489 defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4491 defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4492 defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4493 defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4495 defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4496 defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4497 defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4499 defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4500 defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4501 defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4503 class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4505 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4506 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4508 multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4509 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4510 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4513 defm SUST_B_1D_ARRAY_B8_CLAMP
4514 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4515 defm SUST_B_1D_ARRAY_B16_CLAMP
4516 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4517 defm SUST_B_1D_ARRAY_B32_CLAMP
4518 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4519 defm SUST_B_1D_ARRAY_B64_CLAMP
4520 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4522 defm SUST_B_1D_ARRAY_B8_TRAP
4523 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4524 defm SUST_B_1D_ARRAY_B16_TRAP
4525 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4526 defm SUST_B_1D_ARRAY_B32_TRAP
4527 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4528 defm SUST_B_1D_ARRAY_B64_TRAP
4529 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4531 defm SUST_B_1D_ARRAY_B8_ZERO
4532 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4533 defm SUST_B_1D_ARRAY_B16_ZERO
4534 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4535 defm SUST_B_1D_ARRAY_B32_ZERO
4536 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4537 defm SUST_B_1D_ARRAY_B64_ZERO
4538 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4540 defm SUST_P_1D_ARRAY_B8_TRAP
4541 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4542 defm SUST_P_1D_ARRAY_B16_TRAP
4543 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4544 defm SUST_P_1D_ARRAY_B32_TRAP
4545 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4547 class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4549 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4550 intype:$r, intype:$g)),
4551 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4553 multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4554 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4555 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4558 defm SUST_B_1D_ARRAY_V2B8_CLAMP
4559 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4560 defm SUST_B_1D_ARRAY_V2B16_CLAMP
4561 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4562 defm SUST_B_1D_ARRAY_V2B32_CLAMP
4563 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4564 defm SUST_B_1D_ARRAY_V2B64_CLAMP
4565 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4567 defm SUST_B_1D_ARRAY_V2B8_TRAP
4568 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4569 defm SUST_B_1D_ARRAY_V2B16_TRAP
4570 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4571 defm SUST_B_1D_ARRAY_V2B32_TRAP
4572 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4573 defm SUST_B_1D_ARRAY_V2B64_TRAP
4574 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4576 defm SUST_B_1D_ARRAY_V2B8_ZERO
4577 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4578 defm SUST_B_1D_ARRAY_V2B16_ZERO
4579 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4580 defm SUST_B_1D_ARRAY_V2B32_ZERO
4581 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4582 defm SUST_B_1D_ARRAY_V2B64_ZERO
4583 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4585 defm SUST_P_1D_ARRAY_V2B8_TRAP
4586 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4587 defm SUST_P_1D_ARRAY_V2B16_TRAP
4588 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4589 defm SUST_P_1D_ARRAY_V2B32_TRAP
4590 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4592 class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4594 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4595 intype:$r, intype:$g, intype:$b, intype:$a)),
4596 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4598 multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4599 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4600 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4603 defm SUST_B_1D_ARRAY_V4B8_CLAMP
4604 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4605 defm SUST_B_1D_ARRAY_V4B16_CLAMP
4606 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4607 defm SUST_B_1D_ARRAY_V4B32_CLAMP
4608 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4610 defm SUST_B_1D_ARRAY_V4B8_TRAP
4611 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4612 defm SUST_B_1D_ARRAY_V4B16_TRAP
4613 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4614 defm SUST_B_1D_ARRAY_V4B32_TRAP
4615 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4617 defm SUST_B_1D_ARRAY_V4B8_ZERO
4618 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4619 defm SUST_B_1D_ARRAY_V4B16_ZERO
4620 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4621 defm SUST_B_1D_ARRAY_V4B32_ZERO
4622 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4624 defm SUST_P_1D_ARRAY_V4B8_TRAP
4625 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4626 defm SUST_P_1D_ARRAY_V4B16_TRAP
4627 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4628 defm SUST_P_1D_ARRAY_V4B32_TRAP
4629 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4631 class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4633 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4634 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4636 multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4637 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4638 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4641 defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4642 defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4643 defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4644 defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4646 defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4647 defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4648 defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4649 defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4651 defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4652 defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4653 defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4654 defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4656 defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4657 defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4658 defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4660 class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4662 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4663 intype:$r, intype:$g)),
4664 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4666 multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4667 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4668 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4671 defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4672 defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4673 defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4674 defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4676 defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4677 defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4678 defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4679 defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4681 defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4682 defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4683 defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4684 defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4686 defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4687 defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4688 defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4690 class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4692 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4693 intype:$r, intype:$g, intype:$b, intype:$a)),
4694 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4696 multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4697 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4698 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4701 defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4702 defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4703 defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4705 defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4706 defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4707 defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4709 defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4710 defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4711 defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4713 defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4714 defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4715 defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4717 class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4719 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4721 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4723 multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4724 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4725 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4728 defm SUST_B_2D_ARRAY_B8_CLAMP
4729 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4730 defm SUST_B_2D_ARRAY_B16_CLAMP
4731 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4732 defm SUST_B_2D_ARRAY_B32_CLAMP
4733 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4734 defm SUST_B_2D_ARRAY_B64_CLAMP
4735 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4737 defm SUST_B_2D_ARRAY_B8_TRAP
4738 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4739 defm SUST_B_2D_ARRAY_B16_TRAP
4740 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4741 defm SUST_B_2D_ARRAY_B32_TRAP
4742 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4743 defm SUST_B_2D_ARRAY_B64_TRAP
4744 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4746 defm SUST_B_2D_ARRAY_B8_ZERO
4747 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4748 defm SUST_B_2D_ARRAY_B16_ZERO
4749 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4750 defm SUST_B_2D_ARRAY_B32_ZERO
4751 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4752 defm SUST_B_2D_ARRAY_B64_ZERO
4753 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4755 defm SUST_P_2D_ARRAY_B8_TRAP
4756 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4757 defm SUST_P_2D_ARRAY_B16_TRAP
4758 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4759 defm SUST_P_2D_ARRAY_B32_TRAP
4760 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4762 class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4764 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4765 intype:$r, intype:$g)),
4766 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4768 multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4769 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4770 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4773 defm SUST_B_2D_ARRAY_V2B8_CLAMP
4774 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4775 defm SUST_B_2D_ARRAY_V2B16_CLAMP
4776 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4777 defm SUST_B_2D_ARRAY_V2B32_CLAMP
4778 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4779 defm SUST_B_2D_ARRAY_V2B64_CLAMP
4780 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4782 defm SUST_B_2D_ARRAY_V2B8_TRAP
4783 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4784 defm SUST_B_2D_ARRAY_V2B16_TRAP
4785 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4786 defm SUST_B_2D_ARRAY_V2B32_TRAP
4787 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4788 defm SUST_B_2D_ARRAY_V2B64_TRAP
4789 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4791 defm SUST_B_2D_ARRAY_V2B8_ZERO
4792 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4793 defm SUST_B_2D_ARRAY_V2B16_ZERO
4794 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4795 defm SUST_B_2D_ARRAY_V2B32_ZERO
4796 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4797 defm SUST_B_2D_ARRAY_V2B64_ZERO
4798 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4800 defm SUST_P_2D_ARRAY_V2B8_TRAP
4801 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4802 defm SUST_P_2D_ARRAY_V2B16_TRAP
4803 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4804 defm SUST_P_2D_ARRAY_V2B32_TRAP
4805 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4807 class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4809 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4810 intype:$r, intype:$g, intype:$b, intype:$a)),
4811 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4813 multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4814 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4815 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4818 defm SUST_B_2D_ARRAY_V4B8_CLAMP
4819 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4820 defm SUST_B_2D_ARRAY_V4B16_CLAMP
4821 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4822 defm SUST_B_2D_ARRAY_V4B32_CLAMP
4823 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4825 defm SUST_B_2D_ARRAY_V4B8_TRAP
4826 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4827 defm SUST_B_2D_ARRAY_V4B16_TRAP
4828 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4829 defm SUST_B_2D_ARRAY_V4B32_TRAP
4830 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4832 defm SUST_B_2D_ARRAY_V4B8_ZERO
4833 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4834 defm SUST_B_2D_ARRAY_V4B16_ZERO
4835 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4836 defm SUST_B_2D_ARRAY_V4B32_ZERO
4837 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4839 defm SUST_P_2D_ARRAY_V4B8_TRAP
4840 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4841 defm SUST_P_2D_ARRAY_V4B16_TRAP
4842 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4843 defm SUST_P_2D_ARRAY_V4B32_TRAP
4844 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4846 class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4848 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4850 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4852 multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4853 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4854 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4857 defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4858 defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4859 defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4860 defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4862 defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4863 defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4864 defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4865 defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4867 defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4868 defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4869 defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4870 defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4872 defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4873 defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4874 defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4876 class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4878 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4879 intype:$r, intype:$g)),
4880 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4882 multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4883 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4884 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4887 defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4888 defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4889 defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4890 defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4892 defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4893 defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4894 defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4895 defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4897 defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4898 defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4899 defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4900 defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4902 defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4903 defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4904 defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4906 class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4908 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4909 intype:$r, intype:$g, intype:$b, intype:$a)),
4910 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
4912 multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
4913 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4914 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
4917 defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
4918 defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
4919 defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
4921 defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
4922 defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
4923 defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
4925 defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
4926 defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
4927 defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
4929 defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
4930 defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
4931 defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
4935 // Surface store instruction patterns
4936 // I'm not sure why we can't just include these in the instruction definitions,
4937 // but TableGen complains of type errors :(
4940 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
4941 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4942 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4944 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
4945 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4946 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4948 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
4949 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4950 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
4952 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
4953 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4954 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
4956 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
4957 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4958 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4959 Int16Regs:$r, Int16Regs:$g)>;
4961 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
4962 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4963 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4964 Int16Regs:$r, Int16Regs:$g)>;
4966 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
4967 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4968 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4969 Int32Regs:$r, Int32Regs:$g)>;
4971 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
4972 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4973 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4974 Int64Regs:$r, Int64Regs:$g)>;
4976 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
4977 Int64Regs:$s, Int32Regs:$x,
4978 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4979 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4980 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4982 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
4983 Int64Regs:$s, Int32Regs:$x,
4984 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4985 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4986 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4988 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
4989 Int64Regs:$s, Int32Regs:$x,
4990 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4991 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4992 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4996 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
4997 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
4998 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5001 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5002 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5003 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5006 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5007 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5008 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5011 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5012 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5013 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5016 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5017 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5018 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5019 Int16Regs:$r, Int16Regs:$g)>;
5021 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5022 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5023 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5024 Int16Regs:$r, Int16Regs:$g)>;
5026 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5027 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5028 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5029 Int32Regs:$r, Int32Regs:$g)>;
5031 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5032 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5033 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5034 Int64Regs:$r, Int64Regs:$g)>;
5036 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5037 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5038 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5039 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5040 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5042 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5043 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5044 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5045 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5046 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5048 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5049 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5050 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5051 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5052 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5056 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5057 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5058 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5061 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5062 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5063 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5066 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5067 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5068 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5071 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5072 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5073 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5076 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5077 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5078 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5079 Int16Regs:$r, Int16Regs:$g)>;
5081 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5082 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5083 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5084 Int16Regs:$r, Int16Regs:$g)>;
5086 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5087 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5088 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5089 Int32Regs:$r, Int32Regs:$g)>;
5091 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5092 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5093 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5094 Int64Regs:$r, Int64Regs:$g)>;
5096 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5097 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5098 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5099 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5100 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5102 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5103 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5104 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5106 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5108 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5109 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5110 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5111 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5112 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5116 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5118 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5119 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5122 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5123 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5124 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5125 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5128 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5129 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5130 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5131 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5134 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5135 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5136 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5137 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5140 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5141 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5142 Int16Regs:$r, Int16Regs:$g),
5143 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5144 Int32Regs:$x, Int32Regs:$y,
5145 Int16Regs:$r, Int16Regs:$g)>;
5147 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5148 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5149 Int16Regs:$r, Int16Regs:$g),
5150 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5151 Int32Regs:$x, Int32Regs:$y,
5152 Int16Regs:$r, Int16Regs:$g)>;
5154 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5155 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5157 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5158 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5160 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5161 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5163 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5164 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5166 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5167 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5168 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5169 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5170 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5171 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5173 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5174 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5175 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5176 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5177 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5178 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5180 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5181 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5182 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5183 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5184 Int32Regs:$x, Int32Regs:$y,
5185 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5189 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5190 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5192 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5193 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5196 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5197 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5199 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5200 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5203 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5204 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5206 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5207 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5210 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5211 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5213 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5214 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5217 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5218 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5219 Int16Regs:$r, Int16Regs:$g),
5220 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5221 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5222 Int16Regs:$r, Int16Regs:$g)>;
5224 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5225 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5226 Int16Regs:$r, Int16Regs:$g),
5227 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5228 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5229 Int16Regs:$r, Int16Regs:$g)>;
5231 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5232 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5233 Int32Regs:$r, Int32Regs:$g),
5234 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5235 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5236 Int32Regs:$r, Int32Regs:$g)>;
5238 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5239 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5240 Int64Regs:$r, Int64Regs:$g),
5241 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5242 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5243 Int64Regs:$r, Int64Regs:$g)>;
5245 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5246 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5247 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5248 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5249 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5250 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5252 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5253 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5254 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5255 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5256 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5257 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5259 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5260 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5261 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5262 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5263 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5264 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5268 def : Pat<(int_nvvm_sust_b_1d_i8_trap
5269 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5270 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5272 def : Pat<(int_nvvm_sust_b_1d_i16_trap
5273 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5274 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5276 def : Pat<(int_nvvm_sust_b_1d_i32_trap
5277 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5278 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5280 def : Pat<(int_nvvm_sust_b_1d_i64_trap
5281 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5282 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5284 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5285 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5286 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5287 Int16Regs:$r, Int16Regs:$g)>;
5289 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5290 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5291 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5292 Int16Regs:$r, Int16Regs:$g)>;
5294 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5295 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5296 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5297 Int32Regs:$r, Int32Regs:$g)>;
5299 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5300 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5301 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5302 Int64Regs:$r, Int64Regs:$g)>;
5304 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5305 Int64Regs:$s, Int32Regs:$x,
5306 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5307 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5308 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5310 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5311 Int64Regs:$s, Int32Regs:$x,
5312 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5313 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5314 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5316 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5317 Int64Regs:$s, Int32Regs:$x,
5318 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5319 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5320 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5324 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5325 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5326 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5329 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5330 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5331 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5334 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5335 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5336 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5339 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5340 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5341 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5344 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5345 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5346 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5347 Int16Regs:$r, Int16Regs:$g)>;
5349 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5350 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5351 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5352 Int16Regs:$r, Int16Regs:$g)>;
5354 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5355 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5356 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5357 Int32Regs:$r, Int32Regs:$g)>;
5359 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5360 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5361 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5362 Int64Regs:$r, Int64Regs:$g)>;
5364 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5365 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5366 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5367 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5368 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5370 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5371 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5372 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5373 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5374 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5376 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5377 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5378 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5379 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5380 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5384 def : Pat<(int_nvvm_sust_b_2d_i8_trap
5385 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5386 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5389 def : Pat<(int_nvvm_sust_b_2d_i16_trap
5390 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5391 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5394 def : Pat<(int_nvvm_sust_b_2d_i32_trap
5395 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5396 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5399 def : Pat<(int_nvvm_sust_b_2d_i64_trap
5400 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5401 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5404 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5405 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5406 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5407 Int16Regs:$r, Int16Regs:$g)>;
5409 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5410 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5411 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5412 Int16Regs:$r, Int16Regs:$g)>;
5414 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5415 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5416 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5417 Int32Regs:$r, Int32Regs:$g)>;
5419 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5420 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5421 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5422 Int64Regs:$r, Int64Regs:$g)>;
5424 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5425 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5426 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5427 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5428 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5430 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5431 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5432 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5433 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5434 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5436 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5437 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5438 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5439 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5440 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5444 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5446 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5447 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5450 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5451 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5452 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5453 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5456 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5457 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5458 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5459 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5462 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5463 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5464 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5465 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5468 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5469 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5470 Int16Regs:$r, Int16Regs:$g),
5471 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5472 Int32Regs:$x, Int32Regs:$y,
5473 Int16Regs:$r, Int16Regs:$g)>;
5475 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5476 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5477 Int16Regs:$r, Int16Regs:$g),
5478 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5479 Int32Regs:$x, Int32Regs:$y,
5480 Int16Regs:$r, Int16Regs:$g)>;
5482 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5483 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5485 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5486 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5488 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5489 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5491 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5492 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5494 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5495 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5496 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5497 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5498 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5499 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5501 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5502 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5503 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5504 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5505 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5506 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5508 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5509 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5510 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5511 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5512 Int32Regs:$x, Int32Regs:$y,
5513 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5517 def : Pat<(int_nvvm_sust_b_3d_i8_trap
5518 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5520 (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5521 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5524 def : Pat<(int_nvvm_sust_b_3d_i16_trap
5525 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5527 (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5528 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5531 def : Pat<(int_nvvm_sust_b_3d_i32_trap
5532 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5534 (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5535 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5538 def : Pat<(int_nvvm_sust_b_3d_i64_trap
5539 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5541 (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5542 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5545 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5546 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5547 Int16Regs:$r, Int16Regs:$g),
5548 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5549 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5550 Int16Regs:$r, Int16Regs:$g)>;
5552 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5553 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5554 Int16Regs:$r, Int16Regs:$g),
5555 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5556 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5557 Int16Regs:$r, Int16Regs:$g)>;
5559 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5560 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5561 Int32Regs:$r, Int32Regs:$g),
5562 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5563 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5564 Int32Regs:$r, Int32Regs:$g)>;
5566 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5567 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5568 Int64Regs:$r, Int64Regs:$g),
5569 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5570 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5571 Int64Regs:$r, Int64Regs:$g)>;
5573 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5574 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5575 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5576 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5577 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5578 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5580 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5581 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5582 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5583 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5584 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5585 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5587 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5588 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5589 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5590 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5591 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5592 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5596 def : Pat<(int_nvvm_sust_b_1d_i8_zero
5597 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5598 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5600 def : Pat<(int_nvvm_sust_b_1d_i16_zero
5601 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5602 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5604 def : Pat<(int_nvvm_sust_b_1d_i32_zero
5605 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5606 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5608 def : Pat<(int_nvvm_sust_b_1d_i64_zero
5609 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5610 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5612 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5613 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5614 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5615 Int16Regs:$r, Int16Regs:$g)>;
5617 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5618 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5619 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5620 Int16Regs:$r, Int16Regs:$g)>;
5622 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5623 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5624 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5625 Int32Regs:$r, Int32Regs:$g)>;
5627 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5628 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5629 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5630 Int64Regs:$r, Int64Regs:$g)>;
5632 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5633 Int64Regs:$s, Int32Regs:$x,
5634 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5635 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5636 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5638 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5639 Int64Regs:$s, Int32Regs:$x,
5640 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5641 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5642 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5644 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5645 Int64Regs:$s, Int32Regs:$x,
5646 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5647 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5648 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5652 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5653 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5654 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5657 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5658 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5659 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5662 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5663 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5664 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5667 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5668 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5669 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5672 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5673 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5674 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5675 Int16Regs:$r, Int16Regs:$g)>;
5677 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5678 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5679 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5680 Int16Regs:$r, Int16Regs:$g)>;
5682 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5683 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5684 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5685 Int32Regs:$r, Int32Regs:$g)>;
5687 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5688 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5689 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5690 Int64Regs:$r, Int64Regs:$g)>;
5692 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5693 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5694 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5695 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5696 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5698 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5699 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5700 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5701 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5702 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5704 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5705 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5706 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5707 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5708 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5712 def : Pat<(int_nvvm_sust_b_2d_i8_zero
5713 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5714 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5717 def : Pat<(int_nvvm_sust_b_2d_i16_zero
5718 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5719 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5722 def : Pat<(int_nvvm_sust_b_2d_i32_zero
5723 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5724 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5727 def : Pat<(int_nvvm_sust_b_2d_i64_zero
5728 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5729 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5732 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5733 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5734 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5735 Int16Regs:$r, Int16Regs:$g)>;
5737 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5738 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5739 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5740 Int16Regs:$r, Int16Regs:$g)>;
5742 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5743 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5744 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5745 Int32Regs:$r, Int32Regs:$g)>;
5747 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5748 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5749 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5750 Int64Regs:$r, Int64Regs:$g)>;
5752 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5753 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5754 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5755 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5756 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5758 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5759 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5760 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5761 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5762 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5764 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5765 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5766 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5767 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5768 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5772 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5774 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5775 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5778 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5779 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5780 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5781 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5784 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5785 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5786 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5787 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5790 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5791 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5792 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5793 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5796 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5797 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5798 Int16Regs:$r, Int16Regs:$g),
5799 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5800 Int32Regs:$x, Int32Regs:$y,
5801 Int16Regs:$r, Int16Regs:$g)>;
5803 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5804 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5805 Int16Regs:$r, Int16Regs:$g),
5806 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5807 Int32Regs:$x, Int32Regs:$y,
5808 Int16Regs:$r, Int16Regs:$g)>;
5810 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5811 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5813 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5814 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5816 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5817 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5819 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5820 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5822 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5823 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5824 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5825 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5826 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5827 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5829 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5830 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5831 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5832 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5833 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5834 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5836 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5837 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5838 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5839 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5840 Int32Regs:$x, Int32Regs:$y,
5841 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5845 def : Pat<(int_nvvm_sust_b_3d_i8_zero
5846 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5848 (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5849 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5852 def : Pat<(int_nvvm_sust_b_3d_i16_zero
5853 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5855 (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5856 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5859 def : Pat<(int_nvvm_sust_b_3d_i32_zero
5860 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5862 (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5863 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5866 def : Pat<(int_nvvm_sust_b_3d_i64_zero
5867 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5869 (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5870 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5873 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5874 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5875 Int16Regs:$r, Int16Regs:$g),
5876 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5877 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5878 Int16Regs:$r, Int16Regs:$g)>;
5880 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5881 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5882 Int16Regs:$r, Int16Regs:$g),
5883 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5884 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5885 Int16Regs:$r, Int16Regs:$g)>;
5887 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5888 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5889 Int32Regs:$r, Int32Regs:$g),
5890 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5891 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5892 Int32Regs:$r, Int32Regs:$g)>;
5894 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5895 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5896 Int64Regs:$r, Int64Regs:$g),
5897 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5898 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5899 Int64Regs:$r, Int64Regs:$g)>;
5901 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5902 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5903 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5904 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5905 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5906 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5908 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
5909 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5910 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5911 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
5912 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5913 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5915 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
5916 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5917 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5918 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
5919 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5920 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5925 def : Pat<(int_nvvm_sust_p_1d_i8_trap
5926 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5927 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5929 def : Pat<(int_nvvm_sust_p_1d_i16_trap
5930 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5931 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5933 def : Pat<(int_nvvm_sust_p_1d_i32_trap
5934 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5935 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5937 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
5938 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5939 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5940 Int16Regs:$r, Int16Regs:$g)>;
5942 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
5943 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5944 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5945 Int16Regs:$r, Int16Regs:$g)>;
5947 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
5948 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5949 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5950 Int32Regs:$r, Int32Regs:$g)>;
5952 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
5953 Int64Regs:$s, Int32Regs:$x,
5954 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5955 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5956 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5958 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
5959 Int64Regs:$s, Int32Regs:$x,
5960 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5961 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5962 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5964 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
5965 Int64Regs:$s, Int32Regs:$x,
5966 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5967 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5968 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5972 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
5973 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5974 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5977 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
5978 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5979 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5982 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
5983 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5984 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5987 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
5988 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5989 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5990 Int16Regs:$r, Int16Regs:$g)>;
5992 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
5993 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5994 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5995 Int16Regs:$r, Int16Regs:$g)>;
5997 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
5998 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5999 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6000 Int32Regs:$r, Int32Regs:$g)>;
6002 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6003 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6004 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6005 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6006 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6008 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6009 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6010 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6011 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6012 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6014 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6015 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6016 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6017 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6018 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6022 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6023 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6024 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6027 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6028 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6029 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6032 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6033 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6034 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6037 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6038 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6039 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6040 Int16Regs:$r, Int16Regs:$g)>;
6042 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6043 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6044 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6045 Int16Regs:$r, Int16Regs:$g)>;
6047 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6048 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6049 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6050 Int32Regs:$r, Int32Regs:$g)>;
6052 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6053 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6054 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6055 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6056 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6058 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6059 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6060 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6061 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6062 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6064 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6065 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6066 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6067 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6068 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6072 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6073 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6074 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6075 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6078 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6079 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6080 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6081 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6084 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6085 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6086 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6087 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6090 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6091 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6092 Int16Regs:$r, Int16Regs:$g),
6093 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6094 Int32Regs:$x, Int32Regs:$y,
6095 Int16Regs:$r, Int16Regs:$g)>;
6097 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6098 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6099 Int16Regs:$r, Int16Regs:$g),
6100 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6101 Int32Regs:$x, Int32Regs:$y,
6102 Int16Regs:$r, Int16Regs:$g)>;
6104 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6105 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6107 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6108 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6110 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6111 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6112 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6113 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6114 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6115 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6117 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6118 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6119 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6120 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6121 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6122 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6124 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6125 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6126 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6127 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6128 Int32Regs:$x, Int32Regs:$y,
6129 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6133 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6134 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6136 (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6137 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6140 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6141 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6143 (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6144 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6147 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6148 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6150 (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6151 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6154 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6155 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6156 Int16Regs:$r, Int16Regs:$g),
6157 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6158 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6159 Int16Regs:$r, Int16Regs:$g)>;
6161 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6162 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6163 Int16Regs:$r, Int16Regs:$g),
6164 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6165 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6166 Int16Regs:$r, Int16Regs:$g)>;
6168 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6169 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6170 Int32Regs:$r, Int32Regs:$g),
6171 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6172 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6173 Int32Regs:$r, Int32Regs:$g)>;
6175 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6176 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6177 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6178 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6179 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6180 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6182 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6183 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6184 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6185 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6186 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6187 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6189 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6190 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6191 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6192 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6193 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6194 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6196 //-----------------------------------
6197 // Read Special Registers
6198 //-----------------------------------
6200 class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6201 : NVPTXInst<(outs Int64Regs:$d), (ins),
6202 !strconcat("mov.u64 \t$d, %", regname, ";"),
6203 [(set Int64Regs:$d, (intop))]>,
6206 class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6207 : NVPTXInst<(outs Int32Regs:$d), (ins),
6208 !strconcat("mov.u32 \t$d, %", regname, ";"),
6209 [(set Int32Regs:$d, (intop))]>,
6212 multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6213 foreach suffix = ["x", "y", "z", "w"] in {
6214 defvar reg = regname # "." # suffix;
6215 defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6216 def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>;
6220 // TODO Add read vector-version of special registers
6222 defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">;
6223 defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">;
6224 defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6225 defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6227 defm INT_PTX_SREG_CLUSTERID :
6228 PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6229 defm INT_PTX_SREG_NCLUSTERID :
6230 PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6231 defm INT_PTX_SREG_CLUSTER_CTAID :
6232 PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6233 defm INT_PTX_SREG_CLUSTER_NCTAID:
6234 PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6236 def INT_PTX_SREG_CLUSTER_CTARANK :
6237 PTX_READ_SREG_R32<"cluster_ctarank",
6238 int_nvvm_read_ptx_sreg_cluster_ctarank,
6239 [hasSM<90>, hasPTX<78>]>;
6240 def INT_PTX_SREG_CLUSTER_NCTARANK:
6241 PTX_READ_SREG_R32<"cluster_nctarank",
6242 int_nvvm_read_ptx_sreg_cluster_nctarank,
6243 [hasSM<90>, hasPTX<78>]>;
6246 def INT_PTX_SREG_LANEID :
6247 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6248 def INT_PTX_SREG_WARPID :
6249 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6250 def INT_PTX_SREG_NWARPID :
6251 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6252 def INT_PTX_SREG_SMID :
6253 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6254 def INT_PTX_SREG_NSMID :
6255 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6256 def INT_PTX_SREG_GRIDID :
6257 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6259 def INT_PTX_SREG_LANEMASK_EQ :
6260 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6261 def INT_PTX_SREG_LANEMASK_LE :
6262 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6263 def INT_PTX_SREG_LANEMASK_LT :
6264 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6265 def INT_PTX_SREG_LANEMASK_GE :
6266 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6267 def INT_PTX_SREG_LANEMASK_GT :
6268 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6270 def INT_PTX_SREG_CLOCK :
6271 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6272 def INT_PTX_SREG_CLOCK64 :
6273 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6275 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6276 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6277 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6278 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6280 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6281 // handle the constant.
6282 def INT_PTX_SREG_WARPSIZE :
6283 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6284 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6286 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6287 // In addition to target-independent fields provided by WMMA_REGS, it adds
6288 // the fields commonly used to implement specific PTX instruction -- register
6289 // types and names, constraints, parts of assembly, etc.
6290 class WMMA_REGINFO<WMMA_REGS r, string op>
6291 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6292 // NVPTX register types used to carry fragment data.
6293 NVPTXRegClass regclass = !cond(
6294 !eq(ptx_elt_type, "f16") : Int32Regs,
6295 !eq(ptx_elt_type, "f32") : Float32Regs,
6296 !eq(ptx_elt_type, "f64") : Float64Regs,
6297 !eq(ptx_elt_type, "bf16") : Int32Regs,
6298 !eq(ptx_elt_type, "tf32") : Int32Regs,
6299 !eq(ptx_elt_type, "s32") : Int32Regs,
6300 !eq(ptx_elt_type, "b16") : Int32Regs,
6301 !eq(ptx_elt_type, "s8") : Int32Regs,
6302 !eq(ptx_elt_type, "u8") : Int32Regs,
6303 !eq(ptx_elt_type, "s4") : Int32Regs,
6304 !eq(ptx_elt_type, "u4") : Int32Regs,
6305 !eq(ptx_elt_type, "b1") : Int32Regs);
6307 // Instruction input/output arguments for the fragment.
6308 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6310 // List of register names for the fragment -- ["ra0", "ra1",...]
6311 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6313 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6314 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6316 // Predicates for particular fragment variant. Technically those are
6317 // per-instruction predicates, but currently all fragments that can be used in
6318 // a given instruction are subject to the same constraints, so an instruction
6319 // can use predicates from any of its fragments. If/when this is no
6320 // longer the case, we can concat all per-fragment predicates to enforce that
6321 // all fragments of the instruction are viable.
6322 list<Predicate> Predicates = !cond(
6323 // fp16 -> fp16/fp32 @ m16n16k16
6324 !and(!eq(geom, "m16n16k16"),
6325 !or(!eq(ptx_elt_type, "f16"),
6326 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6328 !and(!eq(geom,"m8n8k4"),
6329 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6331 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6332 !and(!or(!eq(geom, "m8n32k16"),
6333 !eq(geom, "m32n8k16")),
6334 !or(!eq(ptx_elt_type, "f16"),
6335 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6337 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6338 !and(!or(!eq(geom,"m16n16k16"),
6339 !eq(geom,"m8n32k16"),
6340 !eq(geom,"m32n8k16")),
6341 !or(!eq(ptx_elt_type, "u8"),
6342 !eq(ptx_elt_type, "s8"),
6343 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6345 !and(!or(!eq(geom,"m16n16k16"),
6346 !eq(geom,"m8n32k16"),
6347 !eq(geom,"m32n8k16")),
6348 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6350 !and(!eq(geom,"m16n16k8"),
6351 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6353 !and(!eq(geom,"m16n16k8"),
6354 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6356 // b1 -> s32 @ m8n8k128(b1)
6358 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6360 // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6362 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6364 !or(!eq(geom,"m16n8k8"),
6365 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6367 !and(!ne(ptx_elt_type,"f64"),
6368 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6370 // mma m8n8k32 requires higher PTX version
6372 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6374 !and(!eq(ptx_elt_type,"f64"),
6375 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6378 !or(!eq(geom, "m16n8k16"),
6379 !eq(geom, "m16n8k4"),
6380 !eq(geom, "m16n8k32"),
6381 !eq(geom, "m16n8k64"),
6382 !eq(geom, "m8n8k128"),
6383 !eq(geom, "m16n8k128"),
6384 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6386 !and(!eq(op,"ldmatrix"),
6387 !eq(ptx_elt_type,"b16"),
6388 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6390 // template DAGs for instruction inputs/output.
6391 dag Outs = !dag(outs, ptx_regs, reg_names);
6392 dag Ins = !dag(ins, ptx_regs, reg_names);
6395 // Convert dag of arguments into a dag to match given intrinsic.
6396 class BuildPatternI<Intrinsic Intr, dag Ins> {
6397 // Build a dag pattern that matches the intrinsic call.
6398 dag ret = !foreach(tmp, Ins,
6399 !subst(imem, ADDRvar,
6400 !subst(MEMri64, ADDRri64,
6401 !subst(MEMri, ADDRri,
6402 !subst(ins, Intr, tmp)))));
6405 // Same as above, but uses PatFrag instead of an Intrinsic.
6406 class BuildPatternPF<PatFrag Intr, dag Ins> {
6407 // Build a dag pattern that matches the intrinsic call.
6408 dag ret = !foreach(tmp, Ins,
6409 !subst(imem, ADDRvar,
6410 !subst(MEMri64, ADDRri64,
6411 !subst(MEMri, ADDRri,
6412 !subst(ins, Intr, tmp)))));
6415 // Common WMMA-related fields used for building patterns for all MMA instructions.
6416 class WMMA_INSTR<string _Intr, list<dag> _Args>
6417 : NVPTXInst<(outs), (ins), "?", []> {
6418 Intrinsic Intr = !cast<Intrinsic>(_Intr);
6419 // Concatenate all arguments into a single dag.
6420 dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6421 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6422 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6426 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6429 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6431 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6432 [!con((ins SrcOp:$src),
6433 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6434 Requires<Frag.Predicates> {
6435 // Load/store intrinsics are overloaded on pointer's address space.
6436 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6437 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6438 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6439 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6440 // Build PatFrag that only matches particular address space.
6441 PatFrag IntrFrag = PatFrag<PFOperands,
6443 !cond(!eq(Space, ".shared"): AS_match.shared,
6444 !eq(Space, ".global"): AS_match.global,
6445 true: AS_match.generic)>;
6446 // Build AS-constrained pattern.
6447 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6449 let OutOperandList = Frag.Outs;
6450 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6451 let AsmString = "wmma.load."
6458 # "." # Frag.ptx_elt_type # " \t"
6461 # !if(WithStride, ", $ldm", "")
6466 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6468 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6469 bit WithStride, DAGOperand DstOp>
6470 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6471 [!con((ins DstOp:$dst),
6473 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6474 Requires<Frag.Predicates> {
6476 // Load/store intrinsics are overloaded on pointer's address space.
6477 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6478 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6479 dag PFOperands = !con((ops node:$dst),
6480 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6481 !if(WithStride, (ops node:$ldm), (ops)));
6482 // Build PatFrag that only matches particular address space.
6483 PatFrag IntrFrag = PatFrag<PFOperands,
6484 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6485 !cond(!eq(Space, ".shared"): AS_match.shared,
6486 !eq(Space, ".global"): AS_match.global,
6487 true: AS_match.generic)>;
6488 // Build AS-constrained pattern.
6489 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6491 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6492 let OutOperandList = (outs);
6493 let AsmString = "wmma.store.d.sync"
6498 # "." # Frag.ptx_elt_type
6501 # !if(WithStride, ", $ldm", "")
6505 // Create all load/store variants
6506 defset list<WMMA_INSTR> MMA_LDSTs = {
6507 foreach layout = ["row", "col"] in {
6508 foreach stride = [false, true] in {
6509 foreach space = [".global", ".shared", ""] in {
6510 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6511 foreach frag = NVVM_MMA_OPS.all_ld_ops in
6512 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6513 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6514 foreach frag = NVVM_MMA_OPS.all_st_ops in
6515 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6516 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6523 // B1 instruction variants need extra constraints.
6524 class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6526 WMMA_REGINFO Frag = FragA;
6527 list<Predicate> ret = !listconcat(
6529 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6533 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6534 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6535 string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6536 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6537 [FragA.Ins, FragB.Ins, FragC.Ins]>,
6538 // Requires does not seem to have effect on Instruction w/o Patterns.
6539 // We set it here anyways and propagate to the Pat<> we construct below.
6540 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6541 let OutOperandList = FragD.Outs;
6542 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6543 string TypeList = !cond(
6544 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6545 # "." # FragC.ptx_elt_type,
6546 1: "." # FragD.ptx_elt_type
6547 # "." # FragA.ptx_elt_type
6548 # "." # FragB.ptx_elt_type
6549 # "." # FragC.ptx_elt_type,
6551 let AsmString = "wmma.mma"
6558 # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6560 # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6561 # FragD.regstring # ",\n\t\t"
6562 # FragA.regstring # ",\n\t\t"
6563 # FragB.regstring # ",\n\t\t"
6564 # FragC.regstring # ";";
6567 defset list<WMMA_INSTR> WMMAs = {
6568 foreach layout_a = ["row", "col"] in {
6569 foreach layout_b = ["row", "col"] in {
6570 foreach satf = [0, 1] in {
6571 foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6572 foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6573 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6574 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6575 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6576 WMMA_REGINFO<op[1], "wmma.mma">,
6577 WMMA_REGINFO<op[2], "wmma.mma">,
6578 WMMA_REGINFO<op[3], "wmma.mma">,
6579 layout_a, layout_b, satf, rnd, b1op>;
6590 class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6591 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6592 string ALayout, string BLayout, int Satfinite, string b1op>
6593 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6594 [FragA.Ins, FragB.Ins, FragC.Ins]>,
6595 // Requires does not seem to have effect on Instruction w/o Patterns.
6596 // We set it here anyways and propagate to the Pat<> we construct below.
6597 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6598 let OutOperandList = FragD.Outs;
6599 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6600 string TypeList = "." # FragD.ptx_elt_type
6601 # "." # FragA.ptx_elt_type
6602 # "." # FragB.ptx_elt_type
6603 # "." # FragC.ptx_elt_type;
6604 let AsmString = "mma.sync.aligned."
6608 # !if(Satfinite, ".satfinite", "")
6611 # FragD.regstring # ",\n\t\t"
6612 # FragA.regstring # ",\n\t\t"
6613 # FragB.regstring # ",\n\t\t"
6614 # FragC.regstring # ";";
6617 defset list<WMMA_INSTR> MMAs = {
6618 foreach layout_a = ["row", "col"] in {
6619 foreach layout_b = ["row", "col"] in {
6620 foreach satf = [0, 1] in {
6621 foreach op = NVVM_MMA_OPS.all_mma_ops in {
6622 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6623 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6624 def : MMA<WMMA_REGINFO<op[0], "mma">,
6625 WMMA_REGINFO<op[1], "mma">,
6626 WMMA_REGINFO<op[2], "mma">,
6627 WMMA_REGINFO<op[3], "mma">,
6628 layout_a, layout_b, satf, b1op>;
6638 // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6640 class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6642 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6643 Requires<Frag.Predicates> {
6644 // Build PatFrag that only matches particular address space.
6645 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6646 !cond(!eq(Space, ".shared"): AS_match.shared,
6647 true: AS_match.generic)>;
6648 // Build AS-constrained pattern.
6649 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6651 let OutOperandList = Frag.Outs;
6652 let InOperandList = !con(Args, (ins MmaCode:$ptx));
6653 let AsmString = "ldmatrix.sync.aligned."
6656 # !if(Transposed, ".trans", "")
6658 # "." # Frag.ptx_elt_type
6659 # " " # Frag.regstring # ", [$src];";
6662 // Create all ldmatrix variants
6663 defset list<WMMA_INSTR> LDMATRIXs = {
6664 foreach transposed = [false, true] in {
6665 foreach space = [".shared", ""] in {
6666 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6667 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6668 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6669 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6676 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6677 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6678 // the instruction record.
6679 class MMA_PAT<WMMA_INSTR wi>
6680 : Pat<wi.IntrinsicPattern,
6681 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6683 Requires<wi.Predicates>;
6685 // Build intrinsic->instruction patterns for all MMA instructions.
6686 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6689 multiclass MAPA<string suffix, Intrinsic Intr> {
6690 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6691 "mapa" # suffix # ".u32\t$d, $a, $b;",
6692 [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6693 Requires<[hasSM<90>, hasPTX<78>]>;
6694 def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6695 "mapa" # suffix # ".u32\t$d, $a, $b;",
6696 [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6697 Requires<[hasSM<90>, hasPTX<78>]>;
6698 def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6699 "mapa" # suffix # ".u64\t$d, $a, $b;",
6700 [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6701 Requires<[hasSM<90>, hasPTX<78>]>;
6702 def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6703 "mapa" # suffix # ".u64\t$d, $a, $b;",
6704 [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6705 Requires<[hasSM<90>, hasPTX<78>]>;
6708 defm mapa : MAPA<"", int_nvvm_mapa>;
6709 defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6712 multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6713 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6714 "getctarank" # suffix # ".u32\t$d, $a;",
6715 [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6716 Requires<[hasSM<90>, hasPTX<78>]>;
6717 def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6718 "getctarank" # suffix # ".u64\t$d, $a;",
6719 [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6720 Requires<[hasSM<90>, hasPTX<78>]>;
6723 defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
6724 defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6726 def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6727 "mov.pred\t$d, %is_explicit_cluster;",
6728 [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6729 Requires<[hasSM<90>, hasPTX<78>]>;