1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10 float f = (float)N->getValueAPF().convertToFloat();
14 def immFloat1 : PatLeaf<(fpimm), [{
15 float f = (float)N->getValueAPF().convertToFloat();
19 def immDouble0 : PatLeaf<(fpimm), [{
20 double d = (double)N->getValueAPF().convertToDouble();
24 def immDouble1 : PatLeaf<(fpimm), [{
25 double d = (double)N->getValueAPF().convertToDouble();
31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
41 // A node that will be replaced with the current PTX version.
43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
46 // (i32 0) will be XForm'ed to the currently used PTX version.
47 dag version = (PTXVerXform (i32 0));
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55 [prefix # !sub(n, 1)]),
59 class THREADMASK_INFO<bit sync> {
60 list<bit> ret = !if(sync, [0, 1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = true in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
69 [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
72 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74 "bar.sync \t$src1, $src2;",
75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
78 ".reg .pred \t%p1; \n\t",
79 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
85 ".reg .pred \t%p1; \n\t",
86 ".reg .pred \t%p2; \n\t",
87 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
94 ".reg .pred \t%p1; \n\t",
95 ".reg .pred \t%p2; \n\t",
96 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103 [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106 [(int_nvvm_bar_warp_sync imm:$i)]>,
107 Requires<[hasPTX60, hasSM30]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110 Requires<[hasPTX60, hasSM30]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113 [(int_nvvm_barrier_sync imm:$i)]>,
114 Requires<[hasPTX60, hasSM30]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116 [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117 Requires<[hasPTX60, hasSM30]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120 "barrier.sync \t$id, $cnt;",
121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122 Requires<[hasPTX60, hasSM30]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124 "barrier.sync \t$id, $cnt;",
125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126 Requires<[hasPTX60, hasSM30]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128 "barrier.sync \t$id, $cnt;",
129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130 Requires<[hasPTX60, hasSM30]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132 "barrier.sync \t$id, $cnt;",
133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134 Requires<[hasPTX60, hasSM30]>;
136 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137 bit offset_imm, bit mask_imm, bit threadmask_imm>
138 : NVPTXInst<(outs), (ins), "?", []> {
139 NVPTXRegClass rc = !cond(
140 !eq(reg, "i32"): Int32Regs,
141 !eq(reg, "f32"): Float32Regs);
142 string IntrName = "int_nvvm_shfl_"
143 # !if(sync, "sync_", "")
146 # !if(return_pred, "p", "");
147 Intrinsic Intr = !cast<Intrinsic>(IntrName);
148 let InOperandList = !con(
150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157 let AsmString = "shfl."
158 # !if(sync, "sync.", "")
161 # !if(return_pred, "|$pred", "") # ", "
162 # "$src, $offset, $mask"
163 # !if(sync, ", $threadmask", "")
167 !foreach(tmp, OutOperandList,
169 !subst(i32imm, imm, tmp))),
170 (set !foreach(tmp, InOperandList,
172 !subst(i32imm, imm, tmp))))
176 foreach sync = [false, true] in {
177 foreach mode = ["up", "down", "bfly", "idx"] in {
178 foreach regclass = ["i32", "f32"] in {
179 foreach return_pred = [false, true] in {
180 foreach offset_imm = [false, true] in {
181 foreach mask_imm = [false, true] in {
182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183 def : SHFL_INSTR<sync, mode, regclass, return_pred,
184 offset_imm, mask_imm, threadmask_imm>,
185 Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
194 // vote.{all,any,uni,ballot}
195 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197 "vote." # mode # " \t$dest, $pred;",
198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199 Requires<[hasPTX60, hasSM30]>;
202 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
207 // vote.sync.{all,any,uni,ballot}
208 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210 "vote.sync." # mode # " \t$dest, $pred, $mask;",
211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212 Requires<[hasPTX60, hasSM30]>;
213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214 "vote.sync." # mode #" \t$dest, $pred, $mask;",
215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216 Requires<[hasPTX60, hasSM30]>;
219 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
224 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
226 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229 Requires<[hasPTX60, hasSM70]>;
230 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233 Requires<[hasPTX60, hasSM70]>;
234 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237 Requires<[hasPTX60, hasSM70]>;
238 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241 Requires<[hasPTX60, hasSM70]>;
244 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
246 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
249 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
251 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252 (ins i32imm:$mask, ImmOp:$value),
253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255 Requires<[hasPTX60, hasSM70]>;
256 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257 (ins Int32Regs:$mask, ImmOp:$value),
258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260 Requires<[hasPTX60, hasSM70]>;
261 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262 (ins i32imm:$mask, regclass:$value),
263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265 Requires<[hasPTX60, hasSM70]>;
266 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267 (ins Int32Regs:$mask, regclass:$value),
268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270 Requires<[hasPTX60, hasSM70]>;
272 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
274 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
277 multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
278 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
279 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
280 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
281 Requires<[hasPTX70, hasSM80]>;
284 defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
285 defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
286 defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
287 defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
288 defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
289 defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
290 defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
291 defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
293 } // isConvergent = true
295 //-----------------------------------
296 // Explicit Memory Fence Functions
297 //-----------------------------------
298 class MEMBAR<string StrOp, Intrinsic IntOP> :
299 NVPTXInst<(outs), (ins),
302 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
303 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
304 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
307 //-----------------------------------
308 // Async Copy Functions
309 //-----------------------------------
311 multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
312 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
313 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
314 [(Intrin Int32Regs:$addr)]>,
315 Requires<[hasPTX70, hasSM80]>;
316 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
317 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
318 [(Intrin Int64Regs:$addr)]>,
319 Requires<[hasPTX70, hasSM80]>;
322 defm CP_ASYNC_MBARRIER_ARRIVE :
323 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
324 defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
325 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
326 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
327 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
328 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
329 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
331 multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
332 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
333 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
334 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
335 Requires<[hasPTX70, hasSM80]>;
336 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
337 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
338 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
339 Requires<[hasPTX70, hasSM80]>;
342 defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
343 CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
345 defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
346 CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
348 defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
349 CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
351 multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
352 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
353 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
354 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
355 Requires<[hasPTX70, hasSM80]>;
356 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
357 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
358 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
359 Requires<[hasPTX70, hasSM80]>;
362 defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
363 CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
365 def CP_ASYNC_COMMIT_GROUP :
366 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
367 Requires<[hasPTX70, hasSM80]>;
369 def CP_ASYNC_WAIT_GROUP :
370 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
371 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
372 Requires<[hasPTX70, hasSM80]>;
374 def CP_ASYNC_WAIT_ALL :
375 NVPTXInst<(outs), (ins), "cp.async.wait_all;",
376 [(int_nvvm_cp_async_wait_all)]>,
377 Requires<[hasPTX70, hasSM80]>;
379 //-----------------------------------
380 // MBarrier Functions
381 //-----------------------------------
383 multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
384 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
385 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
386 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
387 Requires<[hasPTX70, hasSM80]>;
388 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
389 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
390 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
391 Requires<[hasPTX70, hasSM80]>;
394 defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
395 defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
396 int_nvvm_mbarrier_init_shared>;
398 multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
399 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
400 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
401 [(Intrin Int32Regs:$addr)]>,
402 Requires<[hasPTX70, hasSM80]>;
403 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
404 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
405 [(Intrin Int64Regs:$addr)]>,
406 Requires<[hasPTX70, hasSM80]>;
409 defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
410 defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
411 int_nvvm_mbarrier_inval_shared>;
413 multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
414 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
415 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
416 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
417 Requires<[hasPTX70, hasSM80]>;
418 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
419 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
420 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
421 Requires<[hasPTX70, hasSM80]>;
424 defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
425 defm MBARRIER_ARRIVE_SHARED :
426 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
428 multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
429 def _32 : NVPTXInst<(outs Int64Regs:$state),
430 (ins Int32Regs:$addr, Int32Regs:$count),
431 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
432 ".b64 $state, [$addr], $count;"),
433 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
434 Requires<[hasPTX70, hasSM80]>;
435 def _64 : NVPTXInst<(outs Int64Regs:$state),
436 (ins Int64Regs:$addr, Int32Regs:$count),
437 !strconcat("mbarrier.arrive.noComplete", AddrSpace,
438 ".b64 $state, [$addr], $count;"),
439 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
440 Requires<[hasPTX70, hasSM80]>;
443 defm MBARRIER_ARRIVE_NOCOMPLETE :
444 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
445 defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
446 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
448 multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
449 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
450 !strconcat("mbarrier.arrive_drop", AddrSpace,
451 ".b64 $state, [$addr];"),
452 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
453 Requires<[hasPTX70, hasSM80]>;
454 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
455 !strconcat("mbarrier.arrive_drop", AddrSpace,
456 ".b64 $state, [$addr];"),
457 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
458 Requires<[hasPTX70, hasSM80]>;
461 defm MBARRIER_ARRIVE_DROP :
462 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
463 defm MBARRIER_ARRIVE_DROP_SHARED :
464 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
466 multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
467 def _32 : NVPTXInst<(outs Int64Regs:$state),
468 (ins Int32Regs:$addr, Int32Regs:$count),
469 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
470 ".b64 $state, [$addr], $count;"),
471 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
472 Requires<[hasPTX70, hasSM80]>;
473 def _64 : NVPTXInst<(outs Int64Regs:$state),
474 (ins Int64Regs:$addr, Int32Regs:$count),
475 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
476 ".b64 $state, [$addr], $count;"),
477 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
478 Requires<[hasPTX70, hasSM80]>;
481 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
482 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
483 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
484 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
485 int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
487 multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
488 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
489 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
490 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
491 Requires<[hasPTX70, hasSM80]>;
492 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
493 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
494 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
495 Requires<[hasPTX70, hasSM80]>;
498 defm MBARRIER_TEST_WAIT :
499 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
500 defm MBARRIER_TEST_WAIT_SHARED :
501 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
503 class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
504 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
505 "mbarrier.pending_count.b64 $res, $state;",
506 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
507 Requires<[hasPTX70, hasSM80]>;
509 def MBARRIER_PENDING_COUNT :
510 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
512 //-----------------------------------
514 //-----------------------------------
516 // Map min(1.0, max(0.0, x)) to sat(x)
517 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
519 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
520 // Same story for fmax, fmin.
522 def : Pat<(int_nvvm_fmin_f immFloat1,
523 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
524 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
525 def : Pat<(int_nvvm_fmin_f immFloat1,
526 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
527 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
528 def : Pat<(int_nvvm_fmin_f
529 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531 def : Pat<(int_nvvm_fmin_f
532 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
533 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
535 def : Pat<(int_nvvm_fmin_d immDouble1,
536 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
537 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538 def : Pat<(int_nvvm_fmin_d immDouble1,
539 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
540 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
541 def : Pat<(int_nvvm_fmin_d
542 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
543 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
544 def : Pat<(int_nvvm_fmin_d
545 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
546 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
549 // We need a full string for OpcStr here because we need to deal with case like
551 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
552 NVPTXRegClass src_regclass, Intrinsic IntOP>
553 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
555 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
557 // We need a full string for OpcStr here because we need to deal with the case
558 // like INT_PTX_NATIVE_POWR_F.
559 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
560 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
561 : NVPTXInst<(outs t_regclass:$dst),
562 (ins s0_regclass:$src0, s1_regclass:$src1),
564 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
566 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
567 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
568 NVPTXRegClass s2_regclass, Intrinsic IntOP>
569 : NVPTXInst<(outs t_regclass:$dst),
570 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
572 [(set t_regclass:$dst,
573 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
579 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
580 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
586 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
587 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
588 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
589 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
591 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
592 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
593 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
594 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
596 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
597 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
598 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
599 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
606 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
607 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
608 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
609 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
611 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
612 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
613 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
614 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
616 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
617 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
618 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
619 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
620 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
621 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
622 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
624 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
626 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
628 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
629 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
630 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
633 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
634 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
635 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
636 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
637 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
638 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
639 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
640 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
642 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
643 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
644 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
645 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
651 def INT_NVVM_DIV_APPROX_FTZ_F
652 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
653 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
654 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
655 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
657 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
659 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
660 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
661 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
663 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
664 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
665 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
667 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
668 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
669 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
670 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
671 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
672 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
674 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
675 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
676 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
677 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
678 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
679 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
680 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
681 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
687 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
688 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
689 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
690 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
696 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
697 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
698 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
699 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
700 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
701 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
703 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
704 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
706 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
707 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
708 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
714 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
715 Float32Regs, int_nvvm_fabs_ftz_f>;
716 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
717 Float32Regs, int_nvvm_fabs_f>;
719 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
720 Float64Regs, int_nvvm_fabs_d>;
726 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
727 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
728 def : Pat<(int_nvvm_round_f Float32Regs:$a),
729 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
730 def : Pat<(int_nvvm_round_d Float64Regs:$a),
731 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
737 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
738 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
739 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
740 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
741 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
742 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
748 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
749 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
750 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
751 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
752 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
753 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
759 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
760 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
761 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
762 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
763 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
764 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
766 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
767 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
768 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
769 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
770 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
771 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
777 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
778 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
779 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
780 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
782 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
783 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
784 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
785 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
791 def INT_NVVM_FMA_RN_FTZ_F
792 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
793 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
794 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
795 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
796 def INT_NVVM_FMA_RZ_FTZ_F
797 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
798 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
799 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
800 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
801 def INT_NVVM_FMA_RM_FTZ_F
802 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
803 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
804 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
805 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
806 def INT_NVVM_FMA_RP_FTZ_F
807 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
808 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
809 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
810 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
812 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
813 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
814 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
815 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
816 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
817 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
818 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
819 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
825 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
826 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
827 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
828 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
829 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
830 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
831 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
832 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
833 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
834 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
835 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
836 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
837 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
838 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
839 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
840 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
842 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
843 Float64Regs, int_nvvm_rcp_rn_d>;
844 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
845 Float64Regs, int_nvvm_rcp_rz_d>;
846 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
847 Float64Regs, int_nvvm_rcp_rm_d>;
848 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
849 Float64Regs, int_nvvm_rcp_rp_d>;
851 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
852 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
858 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
859 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
860 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
861 Float32Regs, int_nvvm_sqrt_rn_f>;
862 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
863 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
864 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
865 Float32Regs, int_nvvm_sqrt_rz_f>;
866 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
867 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
868 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
869 Float32Regs, int_nvvm_sqrt_rm_f>;
870 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
871 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
872 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
873 Float32Regs, int_nvvm_sqrt_rp_f>;
874 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
875 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
876 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
877 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
879 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
880 Float64Regs, int_nvvm_sqrt_rn_d>;
881 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
882 Float64Regs, int_nvvm_sqrt_rz_d>;
883 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
884 Float64Regs, int_nvvm_sqrt_rm_d>;
885 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
886 Float64Regs, int_nvvm_sqrt_rp_d>;
888 // nvvm_sqrt intrinsic
889 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
890 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
891 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
892 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
893 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
894 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
895 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
896 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
902 def INT_NVVM_RSQRT_APPROX_FTZ_F
903 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
904 int_nvvm_rsqrt_approx_ftz_f>;
905 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
906 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
907 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
908 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
914 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
915 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
916 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
917 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
918 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
919 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
920 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
921 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
922 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
923 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
924 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
925 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
926 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
927 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
928 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
929 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
931 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
932 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
933 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
934 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
935 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
936 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
937 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
938 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
944 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
945 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
946 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
947 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
948 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
949 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
950 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
951 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
952 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
953 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
954 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
955 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
956 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
957 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
958 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
959 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
961 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
962 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
963 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
964 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
965 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
966 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
967 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
968 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
970 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
971 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
972 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
973 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
974 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
975 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
976 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
977 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
979 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
980 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
981 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
982 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
983 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
984 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
985 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
986 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
988 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
989 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
990 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
991 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
992 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
993 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
994 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
995 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
997 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
998 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
999 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1000 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1001 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1002 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1003 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1004 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1005 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1006 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1007 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1008 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1009 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1010 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1011 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1012 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1014 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1015 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1016 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1017 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1018 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1019 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1020 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1021 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1022 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1023 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1024 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1025 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1026 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1027 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1028 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1029 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1031 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1032 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1033 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1034 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1035 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1036 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1037 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1038 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1040 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1041 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1042 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1043 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1044 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1045 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1046 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1047 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1049 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1050 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1052 def INT_NVVM_D2I_LO : F_MATH_1<
1053 !strconcat("{{\n\t",
1054 ".reg .b32 %temp; \n\t",
1055 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1057 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1058 def INT_NVVM_D2I_HI : F_MATH_1<
1059 !strconcat("{{\n\t",
1060 ".reg .b32 %temp; \n\t",
1061 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1063 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1065 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1066 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1067 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1068 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1069 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1070 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1071 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1072 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1073 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1074 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1075 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1076 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1077 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1078 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1079 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1080 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1082 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1083 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1084 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1085 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1086 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1087 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1088 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1089 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1090 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1091 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1092 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1093 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1094 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1095 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1096 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1097 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1099 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1100 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1101 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1102 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1103 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1104 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1105 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1106 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1108 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1109 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1110 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1111 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1112 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1113 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1114 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1115 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1117 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1118 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1119 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1120 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1121 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1122 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1123 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1124 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1126 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1127 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1128 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1129 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1130 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1131 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1132 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1133 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1135 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1136 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1137 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1138 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1139 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1140 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1141 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1142 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1144 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1145 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1146 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1147 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1148 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1149 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1150 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1151 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1154 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1155 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
1156 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1157 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
1163 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1164 Float32Regs, int_nvvm_bitcast_f2i>;
1165 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1166 Int32Regs, int_nvvm_bitcast_i2f>;
1168 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1169 Int64Regs, int_nvvm_bitcast_ll2d>;
1170 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1171 Float64Regs, int_nvvm_bitcast_d2ll>;
1177 class INT_FNS_MBO<dag ins, dag Operands>
1178 : NVPTXInst<(outs Int32Regs:$dst), ins,
1179 "fns.b32 \t$dst, $mask, $base, $offset;",
1180 [(set Int32Regs:$dst, Operands )]>,
1181 Requires<[hasPTX60, hasSM30]>;
1183 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1184 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1185 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
1186 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
1187 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
1188 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
1189 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
1190 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
1191 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1192 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1193 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
1194 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
1195 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
1196 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
1197 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
1198 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
1200 //-----------------------------------
1202 //-----------------------------------
1204 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1205 : PatFrag<ops, frag, AS_match.global>;
1206 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1207 : PatFrag<ops, frag, AS_match.shared>;
1208 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1209 : PatFrag<ops, frag, AS_match.generic>;
1211 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1212 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1213 Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1214 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1215 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1216 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1218 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1219 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1220 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1223 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1224 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1225 list<Predicate> Pred = []> {
1226 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1227 IntOp, IMMType, IMM, Pred>;
1228 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1229 IntOp, IMMType, IMM, Pred>;
1232 // has 2 operands, neg the second one
1233 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1234 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1235 Operand IMMType, list<Predicate> Pred> {
1236 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1239 ".reg \t.s", TypeStr, " temp; \n\t",
1240 "neg.s", TypeStr, " \ttemp, $b; \n\t",
1241 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1243 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1246 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1247 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1248 list<Predicate> Pred = []> {
1249 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1250 IntOp, IMMType, Pred> ;
1251 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1252 IntOp, IMMType, Pred> ;
1256 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1257 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1258 Operand IMMType, list<Predicate> Pred> {
1259 def reg : NVPTXInst<(outs regclass:$dst),
1260 (ins ptrclass:$addr, regclass:$b, regclass:$c),
1261 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1262 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1265 def imm1 : NVPTXInst<(outs regclass:$dst),
1266 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1267 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1268 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1271 def imm2 : NVPTXInst<(outs regclass:$dst),
1272 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1273 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1274 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1277 def imm3 : NVPTXInst<(outs regclass:$dst),
1278 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1279 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1280 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1283 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1284 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1285 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1286 IntOp, IMMType, Pred>;
1287 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1288 IntOp, IMMType, Pred>;
1293 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1294 (atomic_load_add_32 node:$a, node:$b)>;
1295 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1296 (atomic_load_add_32 node:$a, node:$b)>;
1297 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1298 (atomic_load_add_32 node:$a, node:$b)>;
1299 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1300 (atomic_load_add_64 node:$a, node:$b)>;
1301 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1302 (atomic_load_add_64 node:$a, node:$b)>;
1303 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1304 (atomic_load_add_64 node:$a, node:$b)>;
1305 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1306 (atomic_load_fadd node:$a, node:$b)>;
1307 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1308 (atomic_load_fadd node:$a, node:$b)>;
1309 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1310 (atomic_load_fadd node:$a, node:$b)>;
1312 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1313 atomic_load_add_32_g, i32imm, imm>;
1314 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1315 atomic_load_add_32_s, i32imm, imm>;
1316 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1317 atomic_load_add_32_gen, i32imm, imm>;
1318 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1319 ".add", atomic_load_add_32_gen, i32imm, imm>;
1321 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1322 atomic_load_add_64_g, i64imm, imm>;
1323 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1324 atomic_load_add_64_s, i64imm, imm>;
1325 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1326 atomic_load_add_64_gen, i64imm, imm>;
1327 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1328 ".add", atomic_load_add_64_gen, i64imm, imm>;
1330 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1331 atomic_load_add_g, f32imm, fpimm>;
1332 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1333 atomic_load_add_s, f32imm, fpimm>;
1334 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1335 atomic_load_add_gen, f32imm, fpimm>;
1337 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1338 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1339 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1340 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1341 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1342 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1346 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1347 (atomic_load_sub_32 node:$a, node:$b)>;
1348 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1349 (atomic_load_sub_32 node:$a, node:$b)>;
1350 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1351 (atomic_load_sub_32 node:$a, node:$b)>;
1352 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1353 (atomic_load_sub_64 node:$a, node:$b)>;
1354 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1355 (atomic_load_sub_64 node:$a, node:$b)>;
1356 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1357 (atomic_load_sub_64 node:$a, node:$b)>;
1359 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1360 atomic_load_sub_32_g, i32imm>;
1361 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1362 atomic_load_sub_64_g, i64imm>;
1363 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1364 atomic_load_sub_32_gen, i32imm>;
1365 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1366 ".add", atomic_load_sub_32_gen, i32imm>;
1367 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1368 atomic_load_sub_32_s, i32imm>;
1369 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1370 atomic_load_sub_64_s, i64imm>;
1371 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1372 atomic_load_sub_64_gen, i64imm>;
1373 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1374 ".add", atomic_load_sub_64_gen, i64imm>;
1378 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1379 (atomic_swap_32 node:$a, node:$b)>;
1380 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1381 (atomic_swap_32 node:$a, node:$b)>;
1382 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1383 (atomic_swap_32 node:$a, node:$b)>;
1384 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1385 (atomic_swap_64 node:$a, node:$b)>;
1386 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1387 (atomic_swap_64 node:$a, node:$b)>;
1388 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1389 (atomic_swap_64 node:$a, node:$b)>;
1391 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1392 atomic_swap_32_g, i32imm, imm>;
1393 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1394 atomic_swap_32_s, i32imm, imm>;
1395 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1396 atomic_swap_32_gen, i32imm, imm>;
1397 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1398 ".exch", atomic_swap_32_gen, i32imm, imm>;
1399 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1400 atomic_swap_64_g, i64imm, imm>;
1401 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1402 atomic_swap_64_s, i64imm, imm>;
1403 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1404 atomic_swap_64_gen, i64imm, imm>;
1405 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1406 ".exch", atomic_swap_64_gen, i64imm, imm>;
1410 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1411 , (atomic_load_max_32 node:$a, node:$b)>;
1412 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1413 (atomic_load_max_32 node:$a, node:$b)>;
1414 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1415 (atomic_load_max_32 node:$a, node:$b)>;
1416 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1417 , (atomic_load_max_64 node:$a, node:$b)>;
1418 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1419 (atomic_load_max_64 node:$a, node:$b)>;
1420 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1421 (atomic_load_max_64 node:$a, node:$b)>;
1422 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1423 (atomic_load_umax_32 node:$a, node:$b)>;
1424 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1425 (atomic_load_umax_32 node:$a, node:$b)>;
1426 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1427 (atomic_load_umax_32 node:$a, node:$b)>;
1428 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1429 (atomic_load_umax_64 node:$a, node:$b)>;
1430 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1431 (atomic_load_umax_64 node:$a, node:$b)>;
1432 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1433 (atomic_load_umax_64 node:$a, node:$b)>;
1435 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1436 ".max", atomic_load_max_32_g, i32imm, imm>;
1437 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1438 ".max", atomic_load_max_32_s, i32imm, imm>;
1439 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1440 atomic_load_max_32_gen, i32imm, imm>;
1441 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1442 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1443 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1444 ".max", atomic_load_max_64_g, i64imm, imm>;
1445 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1446 ".max", atomic_load_max_64_s, i64imm, imm>;
1447 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1448 atomic_load_max_64_gen, i64imm, imm>;
1449 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1450 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1451 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1452 ".max", atomic_load_umax_32_g, i32imm, imm>;
1453 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1454 ".max", atomic_load_umax_32_s, i32imm, imm>;
1455 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1456 atomic_load_umax_32_gen, i32imm, imm>;
1457 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1458 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1459 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1460 ".max", atomic_load_umax_64_g, i64imm, imm>;
1461 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1462 ".max", atomic_load_umax_64_s, i64imm, imm>;
1463 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1464 atomic_load_umax_64_gen, i64imm, imm>;
1465 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1466 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1470 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1471 (atomic_load_min_32 node:$a, node:$b)>;
1472 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1473 (atomic_load_min_32 node:$a, node:$b)>;
1474 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1475 (atomic_load_min_32 node:$a, node:$b)>;
1476 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1477 (atomic_load_min_64 node:$a, node:$b)>;
1478 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1479 (atomic_load_min_64 node:$a, node:$b)>;
1480 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1481 (atomic_load_min_64 node:$a, node:$b)>;
1482 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1483 (atomic_load_umin_32 node:$a, node:$b)>;
1484 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1485 (atomic_load_umin_32 node:$a, node:$b)>;
1486 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1487 (atomic_load_umin_32 node:$a, node:$b)>;
1488 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1489 (atomic_load_umin_64 node:$a, node:$b)>;
1490 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1491 (atomic_load_umin_64 node:$a, node:$b)>;
1492 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1493 (atomic_load_umin_64 node:$a, node:$b)>;
1495 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1496 ".min", atomic_load_min_32_g, i32imm, imm>;
1497 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1498 ".min", atomic_load_min_32_s, i32imm, imm>;
1499 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1500 atomic_load_min_32_gen, i32imm, imm>;
1501 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1502 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1503 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1504 ".min", atomic_load_min_64_g, i64imm, imm>;
1505 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1506 ".min", atomic_load_min_64_s, i64imm, imm>;
1507 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1508 atomic_load_min_64_gen, i64imm, imm>;
1509 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1510 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1511 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1512 ".min", atomic_load_umin_32_g, i32imm, imm>;
1513 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1514 ".min", atomic_load_umin_32_s, i32imm, imm>;
1515 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1516 atomic_load_umin_32_gen, i32imm, imm>;
1517 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1518 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1519 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1520 ".min", atomic_load_umin_64_g, i64imm, imm>;
1521 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1522 ".min", atomic_load_umin_64_s, i64imm, imm>;
1523 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1524 atomic_load_umin_64_gen, i64imm, imm>;
1525 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1526 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1528 // atom_inc atom_dec
1530 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1531 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1532 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1533 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1534 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1535 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1536 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1537 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1538 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1539 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1540 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1541 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1543 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1544 atomic_load_inc_32_g, i32imm, imm>;
1545 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1546 atomic_load_inc_32_s, i32imm, imm>;
1547 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1548 atomic_load_inc_32_gen, i32imm, imm>;
1549 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1550 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1551 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1552 atomic_load_dec_32_g, i32imm, imm>;
1553 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1554 atomic_load_dec_32_s, i32imm, imm>;
1555 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1556 atomic_load_dec_32_gen, i32imm, imm>;
1557 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1558 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1562 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1563 (atomic_load_and_32 node:$a, node:$b)>;
1564 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1565 (atomic_load_and_32 node:$a, node:$b)>;
1566 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1567 (atomic_load_and_32 node:$a, node:$b)>;
1568 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1569 (atomic_load_and_64 node:$a, node:$b)>;
1570 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1571 (atomic_load_and_64 node:$a, node:$b)>;
1572 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1573 (atomic_load_and_64 node:$a, node:$b)>;
1575 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1576 atomic_load_and_32_g, i32imm, imm>;
1577 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1578 atomic_load_and_32_s, i32imm, imm>;
1579 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1580 atomic_load_and_32_gen, i32imm, imm>;
1581 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1582 ".and", atomic_load_and_32_gen, i32imm, imm>;
1583 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1584 atomic_load_and_64_g, i64imm, imm>;
1585 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1586 atomic_load_and_64_s, i64imm, imm>;
1587 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1588 atomic_load_and_64_gen, i64imm, imm>;
1589 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1590 ".and", atomic_load_and_64_gen, i64imm, imm>;
1594 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1595 (atomic_load_or_32 node:$a, node:$b)>;
1596 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1597 (atomic_load_or_32 node:$a, node:$b)>;
1598 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1599 (atomic_load_or_32 node:$a, node:$b)>;
1600 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1601 (atomic_load_or_64 node:$a, node:$b)>;
1602 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1603 (atomic_load_or_64 node:$a, node:$b)>;
1604 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1605 (atomic_load_or_64 node:$a, node:$b)>;
1607 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1608 atomic_load_or_32_g, i32imm, imm>;
1609 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1610 atomic_load_or_32_gen, i32imm, imm>;
1611 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1612 ".or", atomic_load_or_32_gen, i32imm, imm>;
1613 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1614 atomic_load_or_32_s, i32imm, imm>;
1615 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1616 atomic_load_or_64_g, i64imm, imm>;
1617 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1618 atomic_load_or_64_gen, i64imm, imm>;
1619 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1620 ".or", atomic_load_or_64_gen, i64imm, imm>;
1621 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1622 atomic_load_or_64_s, i64imm, imm>;
1626 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1627 (atomic_load_xor_32 node:$a, node:$b)>;
1628 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1629 (atomic_load_xor_32 node:$a, node:$b)>;
1630 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1631 (atomic_load_xor_32 node:$a, node:$b)>;
1632 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1633 (atomic_load_xor_64 node:$a, node:$b)>;
1634 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1635 (atomic_load_xor_64 node:$a, node:$b)>;
1636 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1637 (atomic_load_xor_64 node:$a, node:$b)>;
1639 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1640 atomic_load_xor_32_g, i32imm, imm>;
1641 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1642 atomic_load_xor_32_s, i32imm, imm>;
1643 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1644 atomic_load_xor_32_gen, i32imm, imm>;
1645 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1646 ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1647 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1648 atomic_load_xor_64_g, i64imm, imm>;
1649 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1650 atomic_load_xor_64_s, i64imm, imm>;
1651 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1652 atomic_load_xor_64_gen, i64imm, imm>;
1653 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1654 ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1658 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1659 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1660 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1661 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1662 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1663 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1664 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1665 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1666 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1667 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1668 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1669 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1671 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1672 atomic_cmp_swap_32_g, i32imm>;
1673 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1674 atomic_cmp_swap_32_s, i32imm>;
1675 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1676 atomic_cmp_swap_32_gen, i32imm>;
1677 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1678 ".cas", atomic_cmp_swap_32_gen, i32imm>;
1679 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1680 atomic_cmp_swap_64_g, i64imm>;
1681 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1682 atomic_cmp_swap_64_s, i64imm>;
1683 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1684 atomic_cmp_swap_64_gen, i64imm>;
1685 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1686 ".cas", atomic_cmp_swap_64_gen, i64imm>;
1688 // Support for scoped atomic operations. Matches
1689 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1690 // and converts it into the appropriate instruction.
1691 // NOTE: not all possible combinations are implemented
1692 // 'space' is limited to generic as it's the only one needed to support CUDA.
1693 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1694 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1695 dag ins, dag Operands>
1696 : NVPTXInst<(outs regclass:$result), ins,
1698 [(set regclass:$result, Operands)]>,
1701 // Define instruction variants for all addressing modes.
1702 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1703 NVPTXRegClass regclass, Operand ImmType,
1704 SDNode Imm, ValueType ImmTy,
1705 list<Predicate> Preds> {
1706 let AddedComplexity = 1 in {
1707 def : ATOM23_impl<AsmStr, regclass, Preds,
1708 (ins Int32Regs:$src, regclass:$b),
1709 (Intr Int32Regs:$src, regclass:$b)>;
1710 def : ATOM23_impl<AsmStr, regclass, Preds,
1711 (ins Int64Regs:$src, regclass:$b),
1712 (Intr Int64Regs:$src, regclass:$b)>;
1714 // tablegen can't infer argument types from Intrinsic (though it can
1715 // from Instruction) so we have to enforce specific type on
1716 // immediates via explicit cast to ImmTy.
1717 def : ATOM23_impl<AsmStr, regclass, Preds,
1718 (ins Int32Regs:$src, ImmType:$b),
1719 (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1720 def : ATOM23_impl<AsmStr, regclass, Preds,
1721 (ins Int64Regs:$src, ImmType:$b),
1722 (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1725 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1726 NVPTXRegClass regclass, Operand ImmType,
1727 SDNode Imm, ValueType ImmTy,
1728 list<Predicate> Preds> {
1729 // Variants for register/immediate permutations of $b and $c
1730 let AddedComplexity = 2 in {
1731 def : ATOM23_impl<AsmStr, regclass, Preds,
1732 (ins Int32Regs:$src, regclass:$b, regclass:$c),
1733 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1734 def : ATOM23_impl<AsmStr, regclass, Preds,
1735 (ins Int64Regs:$src, regclass:$b, regclass:$c),
1736 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1738 let AddedComplexity = 1 in {
1739 def : ATOM23_impl<AsmStr, regclass, Preds,
1740 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1741 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1742 def : ATOM23_impl<AsmStr, regclass, Preds,
1743 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1744 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1745 def : ATOM23_impl<AsmStr, regclass, Preds,
1746 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1747 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1748 def : ATOM23_impl<AsmStr, regclass, Preds,
1749 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1750 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1752 def : ATOM23_impl<AsmStr, regclass, Preds,
1753 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1754 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1755 def : ATOM23_impl<AsmStr, regclass, Preds,
1756 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1757 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1760 // Constructs instrinsic name and instruction asm strings.
1761 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1762 string ScopeStr, string SpaceStr,
1763 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1764 ValueType ImmTy, list<Predicate> Preds> {
1765 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1766 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1767 # "." # OpStr # "." # TypeStr
1768 # " \t$result, [$src], $b;",
1770 "int_nvvm_atomic_" # OpStr
1771 # "_" # SpaceStr # "_" # IntTypeStr
1772 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1773 regclass, ImmType, Imm, ImmTy, Preds>;
1775 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1776 string ScopeStr, string SpaceStr,
1777 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1778 ValueType ImmTy, list<Predicate> Preds> {
1779 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1780 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1781 # "." # OpStr # "." # TypeStr
1782 # " \t$result, [$src], $b, $c;",
1784 "int_nvvm_atomic_" # OpStr
1785 # "_" # SpaceStr # "_" # IntTypeStr
1786 # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1787 regclass, ImmType, Imm, ImmTy, Preds>;
1790 // Constructs variants for different address spaces.
1791 // For now we only need variants for generic space pointers.
1792 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1793 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1794 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1795 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1796 regclass, ImmType, Imm, ImmTy, Preds>;
1798 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1799 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1800 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1801 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1802 regclass, ImmType, Imm, ImmTy, Preds>;
1805 // Constructs variants for different scopes of atomic op.
1806 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1807 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1808 ValueType ImmTy, list<Predicate> Preds> {
1809 // .gpu scope is default and is currently covered by existing
1810 // atomics w/o explicitly specified scope.
1811 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1812 regclass, ImmType, Imm, ImmTy,
1813 !listconcat(Preds,[hasAtomScope])>;
1814 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1815 regclass, ImmType, Imm, ImmTy,
1816 !listconcat(Preds,[hasAtomScope])>;
1818 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1819 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1820 list<Predicate> Preds> {
1821 // No need to define ".gpu"-scoped atomics. They do the same thing
1822 // as the regular, non-scoped atomics defined elsewhere.
1823 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1824 regclass, ImmType, Imm, ImmTy,
1825 !listconcat(Preds,[hasAtomScope])>;
1826 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1827 regclass, ImmType, Imm, ImmTy,
1828 !listconcat(Preds,[hasAtomScope])>;
1832 multiclass ATOM2_add_impl<string OpStr> {
1833 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1834 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1835 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1836 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1838 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1842 // atom.{and,or,xor}
1843 multiclass ATOM2_bitwise_impl<string OpStr> {
1844 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1845 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1846 [hasAtomBitwise64]>;
1850 multiclass ATOM2_exch_impl<string OpStr> {
1851 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1852 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1856 multiclass ATOM2_minmax_impl<string OpStr> {
1857 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1858 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1859 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1861 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1866 multiclass ATOM2_incdec_impl<string OpStr> {
1867 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1871 multiclass ATOM3_cas_impl<string OpStr> {
1872 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1873 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1876 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1877 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1878 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1879 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1880 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1881 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1882 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1883 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1884 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1885 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1887 //-----------------------------------
1888 // Support for ldu on sm_20 or later
1889 //-----------------------------------
1891 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1892 // read-only in a kernel.
1896 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1897 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1898 !strconcat("ldu.global.", TyStr),
1899 []>, Requires<[hasLDU]>;
1900 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1901 !strconcat("ldu.global.", TyStr),
1902 []>, Requires<[hasLDU]>;
1903 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1904 !strconcat("ldu.global.", TyStr),
1905 []>, Requires<[hasLDU]>;
1906 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1907 !strconcat("ldu.global.", TyStr),
1908 []>, Requires<[hasLDU]>;
1909 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1910 !strconcat("ldu.global.", TyStr),
1911 []>, Requires<[hasLDU]>;
1914 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1915 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1916 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1917 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1918 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1919 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1920 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1921 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1922 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1923 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1927 // Elementized vector ldu
1928 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1929 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1930 (ins Int32Regs:$src),
1931 !strconcat("ldu.global.", TyStr), []>;
1932 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1933 (ins Int64Regs:$src),
1934 !strconcat("ldu.global.", TyStr), []>;
1935 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1937 !strconcat("ldu.global.", TyStr), []>;
1938 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1940 !strconcat("ldu.global.", TyStr), []>;
1941 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1943 !strconcat("ldu.global.", TyStr), []>;
1946 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1947 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1948 regclass:$dst4), (ins Int32Regs:$src),
1949 !strconcat("ldu.global.", TyStr), []>;
1950 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1951 regclass:$dst4), (ins Int64Regs:$src),
1952 !strconcat("ldu.global.", TyStr), []>;
1953 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1954 regclass:$dst4), (ins MEMri:$src),
1955 !strconcat("ldu.global.", TyStr), []>;
1956 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1957 regclass:$dst4), (ins MEMri64:$src),
1958 !strconcat("ldu.global.", TyStr), []>;
1959 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1960 regclass:$dst4), (ins imemAny:$src),
1961 !strconcat("ldu.global.", TyStr), []>;
1964 defm INT_PTX_LDU_G_v2i8_ELE
1965 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1966 defm INT_PTX_LDU_G_v2i16_ELE
1967 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1968 defm INT_PTX_LDU_G_v2i32_ELE
1969 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1970 defm INT_PTX_LDU_G_v2f16_ELE
1971 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1972 defm INT_PTX_LDU_G_v2f16x2_ELE
1973 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1974 defm INT_PTX_LDU_G_v2f32_ELE
1975 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1976 defm INT_PTX_LDU_G_v2i64_ELE
1977 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1978 defm INT_PTX_LDU_G_v2f64_ELE
1979 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1980 defm INT_PTX_LDU_G_v4i8_ELE
1981 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1982 defm INT_PTX_LDU_G_v4i16_ELE
1983 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1985 defm INT_PTX_LDU_G_v4i32_ELE
1986 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1988 defm INT_PTX_LDU_G_v4f16_ELE
1989 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1991 defm INT_PTX_LDU_G_v4f16x2_ELE
1992 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1994 defm INT_PTX_LDU_G_v4f32_ELE
1995 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1999 //-----------------------------------
2000 // Support for ldg on sm_35 or later
2001 //-----------------------------------
2003 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
2004 // non-coherent texture cache, and therefore the values read must be read-only
2005 // during the lifetime of the kernel.
2007 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2008 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2009 !strconcat("ld.global.nc.", TyStr),
2010 []>, Requires<[hasLDG]>;
2011 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2012 !strconcat("ld.global.nc.", TyStr),
2013 []>, Requires<[hasLDG]>;
2014 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2015 !strconcat("ld.global.nc.", TyStr),
2016 []>, Requires<[hasLDG]>;
2017 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2018 !strconcat("ld.global.nc.", TyStr),
2019 []>, Requires<[hasLDG]>;
2020 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2021 !strconcat("ld.global.nc.", TyStr),
2022 []>, Requires<[hasLDG]>;
2025 defm INT_PTX_LDG_GLOBAL_i8
2026 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2027 defm INT_PTX_LDG_GLOBAL_i16
2028 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2029 defm INT_PTX_LDG_GLOBAL_i32
2030 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2031 defm INT_PTX_LDG_GLOBAL_i64
2032 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2033 defm INT_PTX_LDG_GLOBAL_f16
2034 : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
2035 defm INT_PTX_LDG_GLOBAL_f16x2
2036 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
2037 defm INT_PTX_LDG_GLOBAL_f32
2038 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2039 defm INT_PTX_LDG_GLOBAL_f64
2040 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2041 defm INT_PTX_LDG_GLOBAL_p32
2042 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2043 defm INT_PTX_LDG_GLOBAL_p64
2044 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2048 // Elementized vector ldg
2049 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2050 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2051 (ins Int32Regs:$src),
2052 !strconcat("ld.global.nc.", TyStr), []>;
2053 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2054 (ins Int64Regs:$src),
2055 !strconcat("ld.global.nc.", TyStr), []>;
2056 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2058 !strconcat("ld.global.nc.", TyStr), []>;
2059 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2061 !strconcat("ld.global.nc.", TyStr), []>;
2062 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2064 !strconcat("ld.global.nc.", TyStr), []>;
2067 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2068 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2069 regclass:$dst4), (ins Int32Regs:$src),
2070 !strconcat("ld.global.nc.", TyStr), []>;
2071 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2072 regclass:$dst4), (ins Int64Regs:$src),
2073 !strconcat("ld.global.nc.", TyStr), []>;
2074 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2075 regclass:$dst4), (ins MEMri:$src),
2076 !strconcat("ld.global.nc.", TyStr), []>;
2077 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2078 regclass:$dst4), (ins MEMri64:$src),
2079 !strconcat("ld.global.nc.", TyStr), []>;
2080 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2081 regclass:$dst4), (ins imemAny:$src),
2082 !strconcat("ld.global.nc.", TyStr), []>;
2085 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2086 defm INT_PTX_LDG_G_v2i8_ELE
2087 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2088 defm INT_PTX_LDG_G_v2i16_ELE
2089 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2090 defm INT_PTX_LDG_G_v2i32_ELE
2091 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2092 defm INT_PTX_LDG_G_v2f16_ELE
2093 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2094 defm INT_PTX_LDG_G_v2f16x2_ELE
2095 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2096 defm INT_PTX_LDG_G_v2f32_ELE
2097 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2098 defm INT_PTX_LDG_G_v2i64_ELE
2099 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2100 defm INT_PTX_LDG_G_v2f64_ELE
2101 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2102 defm INT_PTX_LDG_G_v4i8_ELE
2103 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2104 defm INT_PTX_LDG_G_v4i16_ELE
2105 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2106 defm INT_PTX_LDG_G_v4i32_ELE
2107 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2108 defm INT_PTX_LDG_G_v4f16_ELE
2109 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
2110 defm INT_PTX_LDG_G_v4f16x2_ELE
2111 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
2112 defm INT_PTX_LDG_G_v4f32_ELE
2113 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2116 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2117 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2118 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2119 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2120 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2121 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2122 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2123 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2124 "{{ .reg .b64 %tmp;\n\t"
2125 #" cvt.u64.u32 \t%tmp, $src;\n\t"
2126 #" cvta." # Str # ".u64 \t$result, %tmp; }}",
2127 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2128 Requires<[useShortPtr]>;
2131 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2132 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2133 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2134 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2135 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2136 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2137 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2138 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2139 "{{ .reg .b64 %tmp;\n\t"
2140 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2141 #" cvt.u32.u64 \t$result, %tmp; }}",
2142 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2143 Requires<[useShortPtr]>;
2146 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2147 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2148 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2149 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2151 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2152 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2153 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2154 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2157 // nvvm.ptr.gen.to.param
2158 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2159 (ins Int32Regs:$src),
2160 "mov.u32 \t$result, $src;",
2161 [(set Int32Regs:$result,
2162 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2163 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2164 (ins Int64Regs:$src),
2165 "mov.u64 \t$result, $src;",
2166 [(set Int64Regs:$result,
2167 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2170 // nvvm.move intrinsicc
2171 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2172 "mov.b16 \t$r, $s;",
2174 (int_nvvm_move_i16 Int16Regs:$s))]>;
2175 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2176 "mov.b32 \t$r, $s;",
2178 (int_nvvm_move_i32 Int32Regs:$s))]>;
2179 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2180 "mov.b64 \t$r, $s;",
2182 (int_nvvm_move_i64 Int64Regs:$s))]>;
2183 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2184 "mov.f32 \t$r, $s;",
2185 [(set Float32Regs:$r,
2186 (int_nvvm_move_float Float32Regs:$s))]>;
2187 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2188 "mov.f64 \t$r, $s;",
2189 [(set Float64Regs:$r,
2190 (int_nvvm_move_double Float64Regs:$s))]>;
2191 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2192 "mov.u32 \t$r, $s;",
2194 (int_nvvm_move_ptr Int32Regs:$s))]>;
2195 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2196 "mov.u64 \t$r, $s;",
2198 (int_nvvm_move_ptr Int64Regs:$s))]>;
2200 // @TODO: Are these actually needed, or will we always just see symbols
2201 // copied to registers first?
2202 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2203 "mov.u32 \t$r, $s;",
2205 (int_nvvm_move_ptr texternalsym:$s))]>;
2206 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2207 "mov.u64 \t$r, $s;",
2209 (int_nvvm_move_ptr texternalsym:$s))]>;*/
2212 // MoveParam %r1, param
2213 // ptr_local_to_gen %r2, %r1
2214 // ptr_gen_to_local %r3, %r2
2218 // @TODO: Revisit this. There is a type
2219 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2220 // instructions are not currently defined. However, we can use the ptr
2221 // variants and the asm printer will do the right thing.
2222 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2223 (MoveParam texternalsym:$src)))),
2224 (nvvm_move_ptr64 texternalsym:$src)>;
2225 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2226 (MoveParam texternalsym:$src)))),
2227 (nvvm_move_ptr32 texternalsym:$src)>;
2230 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2231 "mov.u64 \t$result, $src;", []>;
2233 //-----------------------------------
2234 // Compiler Error Warn
2235 // - Just ignore them in codegen
2236 //-----------------------------------
2238 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2239 "// llvm.nvvm.compiler.warn()",
2240 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2241 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2242 "// llvm.nvvm.compiler.warn()",
2243 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2244 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2245 "// llvm.nvvm.compiler.error()",
2246 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2247 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2248 "// llvm.nvvm.compiler.error()",
2249 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2254 def ISSPACEP_CONST_32
2255 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2256 "isspacep.const \t$d, $a;",
2257 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2258 Requires<[hasPTX31]>;
2259 def ISSPACEP_CONST_64
2260 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2261 "isspacep.const \t$d, $a;",
2262 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2263 Requires<[hasPTX31]>;
2264 def ISSPACEP_GLOBAL_32
2265 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2266 "isspacep.global \t$d, $a;",
2267 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2268 def ISSPACEP_GLOBAL_64
2269 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2270 "isspacep.global \t$d, $a;",
2271 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2272 def ISSPACEP_LOCAL_32
2273 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2274 "isspacep.local \t$d, $a;",
2275 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2276 def ISSPACEP_LOCAL_64
2277 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2278 "isspacep.local \t$d, $a;",
2279 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2280 def ISSPACEP_SHARED_32
2281 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2282 "isspacep.shared \t$d, $a;",
2283 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2284 def ISSPACEP_SHARED_64
2285 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2286 "isspacep.shared \t$d, $a;",
2287 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2290 // Special register reads
2291 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2292 (ins SpecialRegs:$r),
2293 "mov.b32 \t$d, $r;", []>;
2295 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2296 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2297 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2298 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2299 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2300 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2301 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2302 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2303 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2304 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2305 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2306 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2307 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2308 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2309 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2310 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2311 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2312 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2313 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2314 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2315 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2316 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2317 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2318 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2319 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2320 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2321 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2322 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2323 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2324 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2325 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2326 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2329 // rotate builtin support
2331 def ROTATE_B32_HW_IMM
2332 : NVPTXInst<(outs Int32Regs:$dst),
2333 (ins Int32Regs:$src, i32imm:$amt),
2334 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2335 [(set Int32Regs:$dst,
2336 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2337 Requires<[hasHWROT32]> ;
2339 def ROTATE_B32_HW_REG
2340 : NVPTXInst<(outs Int32Regs:$dst),
2341 (ins Int32Regs:$src, Int32Regs:$amt),
2342 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2343 [(set Int32Regs:$dst,
2344 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2345 Requires<[hasHWROT32]> ;
2347 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2348 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2349 Requires<[noHWROT32]> ;
2351 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2352 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2353 Requires<[noHWROT32]> ;
2355 let hasSideEffects = false in {
2356 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2357 !strconcat("{{\n\t",
2358 ".reg .b32 %dummy;\n\t",
2359 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2363 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2364 !strconcat("{{\n\t",
2365 ".reg .b32 %dummy;\n\t",
2366 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2371 let hasSideEffects = false in {
2373 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2374 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2377 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2378 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2379 (GET_LO_INT64 Int64Regs:$src))> ;
2381 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2383 let hasSideEffects = false in {
2384 def SHF_L_WRAP_B32_IMM
2385 : NVPTXInst<(outs Int32Regs:$dst),
2386 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2387 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2388 Requires<[hasHWROT32]>;
2390 def SHF_L_WRAP_B32_REG
2391 : NVPTXInst<(outs Int32Regs:$dst),
2392 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2393 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2394 Requires<[hasHWROT32]>;
2396 def SHF_R_WRAP_B32_IMM
2397 : NVPTXInst<(outs Int32Regs:$dst),
2398 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2399 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2400 Requires<[hasHWROT32]>;
2402 def SHF_R_WRAP_B32_REG
2403 : NVPTXInst<(outs Int32Regs:$dst),
2404 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2405 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2406 Requires<[hasHWROT32]>;
2409 // HW version of rotate 64
2410 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2412 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2413 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2414 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2415 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2416 Requires<[hasHWROT32]>;
2418 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2420 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2421 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2422 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2423 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2424 Requires<[hasHWROT32]>;
2427 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2429 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2430 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2431 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2432 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2433 Requires<[hasHWROT32]>;
2435 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2437 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2438 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2439 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2440 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2441 Requires<[hasHWROT32]>;
2443 // SW version of rotate 64
2444 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2445 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2446 Requires<[noHWROT32]>;
2447 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2448 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2449 Requires<[noHWROT32]>;
2450 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2451 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2452 Requires<[noHWROT32]>;
2453 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2454 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2455 Requires<[noHWROT32]>;
2458 //-----------------------------------
2459 // Texture Intrinsics
2460 //-----------------------------------
2462 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2463 // also defined in NVPTXReplaceImageHandles.cpp
2465 // texmode_independent
2466 let IsTex = true, IsTexModeUnified = false in {
2467 // Texture fetch instructions using handles
2469 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2470 Float32Regs:$b, Float32Regs:$a),
2471 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2472 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2475 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2476 Float32Regs:$b, Float32Regs:$a),
2477 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2478 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2480 def TEX_1D_F32_F32_LEVEL
2481 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2482 Float32Regs:$b, Float32Regs:$a),
2483 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2484 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2485 "[$t, $s, \\{$x\\}], $lod;",
2487 def TEX_1D_F32_F32_GRAD
2488 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2489 Float32Regs:$b, Float32Regs:$a),
2490 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2491 Float32Regs:$gradx, Float32Regs:$grady),
2492 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2493 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2496 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2497 Int32Regs:$b, Int32Regs:$a),
2498 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2499 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2502 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2503 Int32Regs:$b, Int32Regs:$a),
2504 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2505 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2507 def TEX_1D_S32_F32_LEVEL
2508 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2509 Int32Regs:$b, Int32Regs:$a),
2510 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2512 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2513 "[$t, $s, \\{$x\\}], $lod;",
2515 def TEX_1D_S32_F32_GRAD
2516 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2517 Int32Regs:$b, Int32Regs:$a),
2518 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2519 Float32Regs:$gradx, Float32Regs:$grady),
2520 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2521 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2524 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2525 Int32Regs:$b, Int32Regs:$a),
2526 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2527 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2530 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2531 Int32Regs:$b, Int32Regs:$a),
2532 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2533 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2535 def TEX_1D_U32_F32_LEVEL
2536 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2537 Int32Regs:$b, Int32Regs:$a),
2538 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2540 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2541 "[$t, $s, \\{$x\\}], $lod;",
2543 def TEX_1D_U32_F32_GRAD
2544 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2545 Int32Regs:$b, Int32Regs:$a),
2546 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2547 Float32Regs:$gradx, Float32Regs:$grady),
2548 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2549 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2552 def TEX_1D_ARRAY_F32_S32
2553 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2554 Float32Regs:$b, Float32Regs:$a),
2555 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2556 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2557 "[$t, $s, \\{$l, $x\\}];",
2559 def TEX_1D_ARRAY_F32_F32
2560 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2561 Float32Regs:$b, Float32Regs:$a),
2562 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2563 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2564 "[$t, $s, \\{$l, $x\\}];",
2566 def TEX_1D_ARRAY_F32_F32_LEVEL
2567 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2568 Float32Regs:$b, Float32Regs:$a),
2569 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2571 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2572 "[$t, $s, \\{$l, $x\\}], $lod;",
2574 def TEX_1D_ARRAY_F32_F32_GRAD
2575 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2576 Float32Regs:$b, Float32Regs:$a),
2577 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2578 Float32Regs:$gradx, Float32Regs:$grady),
2579 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2580 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2582 def TEX_1D_ARRAY_S32_S32
2583 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2584 Int32Regs:$b, Int32Regs:$a),
2585 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2586 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2587 "[$t, $s, \\{$l, $x\\}];",
2589 def TEX_1D_ARRAY_S32_F32
2590 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2591 Int32Regs:$b, Int32Regs:$a),
2592 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2593 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2594 "[$t, $s, \\{$l, $x\\}];",
2596 def TEX_1D_ARRAY_S32_F32_LEVEL
2597 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2598 Int32Regs:$b, Int32Regs:$a),
2599 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2601 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2602 "[$t, $s, \\{$l, $x\\}], $lod;",
2604 def TEX_1D_ARRAY_S32_F32_GRAD
2605 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606 Int32Regs:$b, Int32Regs:$a),
2607 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2608 Float32Regs:$gradx, Float32Regs:$grady),
2609 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2610 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2612 def TEX_1D_ARRAY_U32_S32
2613 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614 Int32Regs:$b, Int32Regs:$a),
2615 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2616 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2617 "[$t, $s, \\{$l, $x\\}];",
2619 def TEX_1D_ARRAY_U32_F32
2620 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2621 Int32Regs:$b, Int32Regs:$a),
2622 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2623 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2624 "[$t, $s, \\{$l, $x\\}];",
2626 def TEX_1D_ARRAY_U32_F32_LEVEL
2627 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2628 Int32Regs:$b, Int32Regs:$a),
2629 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2631 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2632 "[$t, $s, \\{$l, $x\\}], $lod;",
2634 def TEX_1D_ARRAY_U32_F32_GRAD
2635 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2636 Int32Regs:$b, Int32Regs:$a),
2637 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2638 Float32Regs:$gradx, Float32Regs:$grady),
2639 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2640 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2644 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2645 Float32Regs:$b, Float32Regs:$a),
2646 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2647 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2648 "[$t, $s, \\{$x, $y\\}];",
2651 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2652 Float32Regs:$b, Float32Regs:$a),
2653 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2654 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2655 "[$t, $s, \\{$x, $y\\}];",
2657 def TEX_2D_F32_F32_LEVEL
2658 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2659 Float32Regs:$b, Float32Regs:$a),
2660 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2662 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2663 "[$t, $s, \\{$x, $y\\}], $lod;",
2665 def TEX_2D_F32_F32_GRAD
2666 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2667 Float32Regs:$b, Float32Regs:$a),
2668 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2669 Float32Regs:$gradx0, Float32Regs:$gradx1,
2670 Float32Regs:$grady0, Float32Regs:$grady1),
2671 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2672 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2673 "\\{$grady0, $grady1\\};",
2676 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2677 Int32Regs:$b, Int32Regs:$a),
2678 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2679 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2680 "[$t, $s, \\{$x, $y\\}];",
2683 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2684 Int32Regs:$b, Int32Regs:$a),
2685 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2686 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2687 "[$t, $s, \\{$x, $y\\}];",
2689 def TEX_2D_S32_F32_LEVEL
2690 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2691 Int32Regs:$b, Int32Regs:$a),
2692 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2694 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2695 "[$t, $s, \\{$x, $y\\}], $lod;",
2697 def TEX_2D_S32_F32_GRAD
2698 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2699 Int32Regs:$b, Int32Regs:$a),
2700 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2701 Float32Regs:$gradx0, Float32Regs:$gradx1,
2702 Float32Regs:$grady0, Float32Regs:$grady1),
2703 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2704 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2705 "\\{$grady0, $grady1\\};",
2708 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2709 Int32Regs:$b, Int32Regs:$a),
2710 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2711 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2712 "[$t, $s, \\{$x, $y\\}];",
2715 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716 Int32Regs:$b, Int32Regs:$a),
2717 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2718 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2719 "[$t, $s, \\{$x, $y\\}];",
2721 def TEX_2D_U32_F32_LEVEL
2722 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2723 Int32Regs:$b, Int32Regs:$a),
2724 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2726 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2727 "[$t, $s, \\{$x, $y\\}], $lod;",
2729 def TEX_2D_U32_F32_GRAD
2730 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2731 Int32Regs:$b, Int32Regs:$a),
2732 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2733 Float32Regs:$gradx0, Float32Regs:$gradx1,
2734 Float32Regs:$grady0, Float32Regs:$grady1),
2735 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2736 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2737 "\\{$grady0, $grady1\\};",
2740 def TEX_2D_ARRAY_F32_S32
2741 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2742 Float32Regs:$b, Float32Regs:$a),
2743 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2745 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2746 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2748 def TEX_2D_ARRAY_F32_F32
2749 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2750 Float32Regs:$b, Float32Regs:$a),
2751 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2753 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2754 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2756 def TEX_2D_ARRAY_F32_F32_LEVEL
2757 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2758 Float32Regs:$b, Float32Regs:$a),
2759 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2760 Float32Regs:$y, Float32Regs:$lod),
2761 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2762 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2764 def TEX_2D_ARRAY_F32_F32_GRAD
2765 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2766 Float32Regs:$b, Float32Regs:$a),
2767 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2768 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2769 Float32Regs:$grady0, Float32Regs:$grady1),
2770 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2771 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2772 "\\{$grady0, $grady1\\};",
2774 def TEX_2D_ARRAY_S32_S32
2775 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2776 Int32Regs:$b, Int32Regs:$a),
2777 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2779 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2780 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2782 def TEX_2D_ARRAY_S32_F32
2783 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2784 Int32Regs:$b, Int32Regs:$a),
2785 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2787 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2788 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2790 def TEX_2D_ARRAY_S32_F32_LEVEL
2791 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2792 Int32Regs:$b, Int32Regs:$a),
2793 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2794 Float32Regs:$y, Float32Regs:$lod),
2795 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2796 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2798 def TEX_2D_ARRAY_S32_F32_GRAD
2799 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2800 Int32Regs:$b, Int32Regs:$a),
2801 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2803 Float32Regs:$gradx0, Float32Regs:$gradx1,
2804 Float32Regs:$grady0, Float32Regs:$grady1),
2805 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2806 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2807 "\\{$grady0, $grady1\\};",
2809 def TEX_2D_ARRAY_U32_S32
2810 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2811 Int32Regs:$b, Int32Regs:$a),
2812 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2814 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2815 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2817 def TEX_2D_ARRAY_U32_F32
2818 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2819 Int32Regs:$b, Int32Regs:$a),
2820 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2822 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2823 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2825 def TEX_2D_ARRAY_U32_F32_LEVEL
2826 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2827 Int32Regs:$b, Int32Regs:$a),
2828 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2829 Float32Regs:$y, Float32Regs:$lod),
2830 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2831 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2833 def TEX_2D_ARRAY_U32_F32_GRAD
2834 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2835 Int32Regs:$b, Int32Regs:$a),
2836 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2838 Float32Regs:$gradx0, Float32Regs:$gradx1,
2839 Float32Regs:$grady0, Float32Regs:$grady1),
2840 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2841 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2842 "\\{$grady0, $grady1\\};",
2846 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2847 Float32Regs:$b, Float32Regs:$a),
2848 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2850 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2851 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2854 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2855 Float32Regs:$b, Float32Regs:$a),
2856 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2858 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2859 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2861 def TEX_3D_F32_F32_LEVEL
2862 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2863 Float32Regs:$b, Float32Regs:$a),
2864 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2865 Float32Regs:$z, Float32Regs:$lod),
2866 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2867 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2869 def TEX_3D_F32_F32_GRAD
2870 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2871 Float32Regs:$b, Float32Regs:$a),
2872 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2874 Float32Regs:$gradx0, Float32Regs:$gradx1,
2875 Float32Regs:$gradx2, Float32Regs:$grady0,
2876 Float32Regs:$grady1, Float32Regs:$grady2),
2877 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2878 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2879 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2880 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2883 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2884 Int32Regs:$b, Int32Regs:$a),
2885 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2887 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2888 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2891 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2892 Int32Regs:$b, Int32Regs:$a),
2893 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2895 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2896 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2898 def TEX_3D_S32_F32_LEVEL
2899 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2900 Int32Regs:$b, Int32Regs:$a),
2901 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2902 Float32Regs:$z, Float32Regs:$lod),
2903 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2904 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2906 def TEX_3D_S32_F32_GRAD
2907 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2908 Int32Regs:$b, Int32Regs:$a),
2909 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2911 Float32Regs:$gradx0, Float32Regs:$gradx1,
2912 Float32Regs:$gradx2, Float32Regs:$grady0,
2913 Float32Regs:$grady1, Float32Regs:$grady2),
2914 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2915 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2916 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2917 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2920 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2921 Int32Regs:$b, Int32Regs:$a),
2922 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2924 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2925 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2928 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2929 Int32Regs:$b, Int32Regs:$a),
2930 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2932 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2933 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2935 def TEX_3D_U32_F32_LEVEL
2936 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2937 Int32Regs:$b, Int32Regs:$a),
2938 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2939 Float32Regs:$z, Float32Regs:$lod),
2940 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2941 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2943 def TEX_3D_U32_F32_GRAD
2944 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2945 Int32Regs:$b, Int32Regs:$a),
2946 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2948 Float32Regs:$gradx0, Float32Regs:$gradx1,
2949 Float32Regs:$gradx2, Float32Regs:$grady0,
2950 Float32Regs:$grady1, Float32Regs:$grady2),
2951 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2952 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2953 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2954 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2957 def TEX_CUBE_F32_F32
2958 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2959 Float32Regs:$b, Float32Regs:$a),
2960 (ins Int64Regs:$t, Int64Regs:$s,
2961 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2962 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2963 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2965 def TEX_CUBE_F32_F32_LEVEL
2966 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2967 Float32Regs:$b, Float32Regs:$a),
2968 (ins Int64Regs:$t, Int64Regs:$s,
2969 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2971 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2972 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2974 def TEX_CUBE_S32_F32
2975 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2976 Int32Regs:$b, Int32Regs:$a),
2977 (ins Int64Regs:$t, Int64Regs:$s,
2978 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2979 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2980 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2982 def TEX_CUBE_S32_F32_LEVEL
2983 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2984 Int32Regs:$b, Int32Regs:$a),
2985 (ins Int64Regs:$t, Int64Regs:$s,
2986 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2988 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2989 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2991 def TEX_CUBE_U32_F32
2992 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2993 Int32Regs:$b, Int32Regs:$a),
2994 (ins Int64Regs:$t, Int64Regs:$s,
2995 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2996 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2997 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2999 def TEX_CUBE_U32_F32_LEVEL
3000 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3001 Int32Regs:$b, Int32Regs:$a),
3002 (ins Int64Regs:$t, Int64Regs:$s,
3003 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3005 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3006 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3009 def TEX_CUBE_ARRAY_F32_F32
3010 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3011 Float32Regs:$b, Float32Regs:$a),
3012 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3013 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3014 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3015 "[$t, $s, \\{$l, $x, $y, $z\\}];",
3017 def TEX_CUBE_ARRAY_F32_F32_LEVEL
3018 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3019 Float32Regs:$b, Float32Regs:$a),
3020 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3021 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3023 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3024 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3026 def TEX_CUBE_ARRAY_S32_F32
3027 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3028 Int32Regs:$b, Int32Regs:$a),
3029 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3030 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3031 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3032 "[$t, $s, \\{$l, $x, $y, $z\\}];",
3034 def TEX_CUBE_ARRAY_S32_F32_LEVEL
3035 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3036 Int32Regs:$b, Int32Regs:$a),
3037 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3038 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3040 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3041 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3043 def TEX_CUBE_ARRAY_U32_F32
3044 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3045 Int32Regs:$b, Int32Regs:$a),
3046 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3047 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3048 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3049 "[$t, $s, \\{$l, $x, $y, $z\\}];",
3051 def TEX_CUBE_ARRAY_U32_F32_LEVEL
3052 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3053 Int32Regs:$b, Int32Regs:$a),
3054 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3055 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3057 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3058 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3061 def TLD4_R_2D_F32_F32
3062 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3063 Float32Regs:$v2, Float32Regs:$v3),
3064 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3065 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3066 "[$t, $s, \\{$x, $y\\}];",
3068 def TLD4_G_2D_F32_F32
3069 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3070 Float32Regs:$v2, Float32Regs:$v3),
3071 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3072 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3073 "[$t, $s, \\{$x, $y\\}];",
3075 def TLD4_B_2D_F32_F32
3076 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3077 Float32Regs:$v2, Float32Regs:$v3),
3078 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3079 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3080 "[$t, $s, \\{$x, $y\\}];",
3082 def TLD4_A_2D_F32_F32
3083 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3084 Float32Regs:$v2, Float32Regs:$v3),
3085 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3086 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3087 "[$t, $s, \\{$x, $y\\}];",
3089 def TLD4_R_2D_S32_F32
3090 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3091 Int32Regs:$v2, Int32Regs:$v3),
3092 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3093 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3094 "[$t, $s, \\{$x, $y\\}];",
3096 def TLD4_G_2D_S32_F32
3097 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3098 Int32Regs:$v2, Int32Regs:$v3),
3099 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3100 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3101 "[$t, $s, \\{$x, $y\\}];",
3103 def TLD4_B_2D_S32_F32
3104 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3105 Int32Regs:$v2, Int32Regs:$v3),
3106 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3107 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3108 "[$t, $s, \\{$x, $y\\}];",
3110 def TLD4_A_2D_S32_F32
3111 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3112 Int32Regs:$v2, Int32Regs:$v3),
3113 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3114 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3115 "[$t, $s, \\{$x, $y\\}];",
3117 def TLD4_R_2D_U32_F32
3118 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3119 Int32Regs:$v2, Int32Regs:$v3),
3120 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3121 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3122 "[$t, $s, \\{$x, $y\\}];",
3124 def TLD4_G_2D_U32_F32
3125 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3126 Int32Regs:$v2, Int32Regs:$v3),
3127 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3128 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3129 "[$t, $s, \\{$x, $y\\}];",
3131 def TLD4_B_2D_U32_F32
3132 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3133 Int32Regs:$v2, Int32Regs:$v3),
3134 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3135 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3136 "[$t, $s, \\{$x, $y\\}];",
3138 def TLD4_A_2D_U32_F32
3139 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3140 Int32Regs:$v2, Int32Regs:$v3),
3141 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3142 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3143 "[$t, $s, \\{$x, $y\\}];",
3149 let IsTex = true, IsTexModeUnified = true in {
3150 // Texture fetch instructions using handles
3151 def TEX_UNIFIED_1D_F32_S32
3152 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3153 Float32Regs:$b, Float32Regs:$a),
3154 (ins Int64Regs:$t, Int32Regs:$x),
3155 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3157 def TEX_UNIFIED_1D_F32_F32
3158 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3159 Float32Regs:$b, Float32Regs:$a),
3160 (ins Int64Regs:$t, Float32Regs:$x),
3161 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3163 def TEX_UNIFIED_1D_F32_F32_LEVEL
3164 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3165 Float32Regs:$b, Float32Regs:$a),
3166 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
3167 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3168 "[$t, \\{$x\\}], $lod;",
3170 def TEX_UNIFIED_1D_F32_F32_GRAD
3171 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3172 Float32Regs:$b, Float32Regs:$a),
3173 (ins Int64Regs:$t, Float32Regs:$x,
3174 Float32Regs:$gradx, Float32Regs:$grady),
3175 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3176 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3178 def TEX_UNIFIED_1D_S32_S32
3179 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3180 Int32Regs:$b, Int32Regs:$a),
3181 (ins Int64Regs:$t, Int32Regs:$x),
3182 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3184 def TEX_UNIFIED_1D_S32_F32
3185 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3186 Int32Regs:$b, Int32Regs:$a),
3187 (ins Int64Regs:$t, Float32Regs:$x),
3188 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3190 def TEX_UNIFIED_1D_S32_F32_LEVEL
3191 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3192 Int32Regs:$b, Int32Regs:$a),
3193 (ins Int64Regs:$t, Float32Regs:$x,
3195 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3196 "[$t, \\{$x\\}], $lod;",
3198 def TEX_UNIFIED_1D_S32_F32_GRAD
3199 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3200 Int32Regs:$b, Int32Regs:$a),
3201 (ins Int64Regs:$t, Float32Regs:$x,
3202 Float32Regs:$gradx, Float32Regs:$grady),
3203 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3204 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3206 def TEX_UNIFIED_1D_U32_S32
3207 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3208 Int32Regs:$b, Int32Regs:$a),
3209 (ins Int64Regs:$t, Int32Regs:$x),
3210 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3212 def TEX_UNIFIED_1D_U32_F32
3213 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3214 Int32Regs:$b, Int32Regs:$a),
3215 (ins Int64Regs:$t, Float32Regs:$x),
3216 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3218 def TEX_UNIFIED_1D_U32_F32_LEVEL
3219 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3220 Int32Regs:$b, Int32Regs:$a),
3221 (ins Int64Regs:$t, Float32Regs:$x,
3223 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3224 "[$t, \\{$x\\}], $lod;",
3226 def TEX_UNIFIED_1D_U32_F32_GRAD
3227 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3228 Int32Regs:$b, Int32Regs:$a),
3229 (ins Int64Regs:$t, Float32Regs:$x,
3230 Float32Regs:$gradx, Float32Regs:$grady),
3231 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3232 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3235 def TEX_UNIFIED_1D_ARRAY_F32_S32
3236 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3237 Float32Regs:$b, Float32Regs:$a),
3238 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3239 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3240 "[$t, \\{$l, $x\\}];",
3242 def TEX_UNIFIED_1D_ARRAY_F32_F32
3243 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3244 Float32Regs:$b, Float32Regs:$a),
3245 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3246 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3247 "[$t, \\{$l, $x\\}];",
3249 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3250 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3251 Float32Regs:$b, Float32Regs:$a),
3252 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3254 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3255 "[$t, \\{$l, $x\\}], $lod;",
3257 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3258 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3259 Float32Regs:$b, Float32Regs:$a),
3260 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3261 Float32Regs:$gradx, Float32Regs:$grady),
3262 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3263 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3265 def TEX_UNIFIED_1D_ARRAY_S32_S32
3266 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3267 Int32Regs:$b, Int32Regs:$a),
3268 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3269 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3270 "[$t, \\{$l, $x\\}];",
3272 def TEX_UNIFIED_1D_ARRAY_S32_F32
3273 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3274 Int32Regs:$b, Int32Regs:$a),
3275 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3276 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3277 "[$t, \\{$l, $x\\}];",
3279 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3280 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3281 Int32Regs:$b, Int32Regs:$a),
3282 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3284 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3285 "[$t, \\{$l, $x\\}], $lod;",
3287 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3288 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289 Int32Regs:$b, Int32Regs:$a),
3290 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3291 Float32Regs:$gradx, Float32Regs:$grady),
3292 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3293 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3295 def TEX_UNIFIED_1D_ARRAY_U32_S32
3296 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297 Int32Regs:$b, Int32Regs:$a),
3298 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3299 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3300 "[$t, \\{$l, $x\\}];",
3302 def TEX_UNIFIED_1D_ARRAY_U32_F32
3303 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3304 Int32Regs:$b, Int32Regs:$a),
3305 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3306 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3307 "[$t, \\{$l, $x\\}];",
3309 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3310 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3311 Int32Regs:$b, Int32Regs:$a),
3312 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3314 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3315 "[$t, \\{$l, $x\\}], $lod;",
3317 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3318 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3319 Int32Regs:$b, Int32Regs:$a),
3320 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3321 Float32Regs:$gradx, Float32Regs:$grady),
3322 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3323 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3326 def TEX_UNIFIED_2D_F32_S32
3327 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3328 Float32Regs:$b, Float32Regs:$a),
3329 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3330 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3331 "[$t, \\{$x, $y\\}];",
3333 def TEX_UNIFIED_2D_F32_F32
3334 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3335 Float32Regs:$b, Float32Regs:$a),
3336 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3337 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3338 "[$t, \\{$x, $y\\}];",
3340 def TEX_UNIFIED_2D_F32_F32_LEVEL
3341 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3342 Float32Regs:$b, Float32Regs:$a),
3343 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3345 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3346 "[$t, \\{$x, $y\\}], $lod;",
3348 def TEX_UNIFIED_2D_F32_F32_GRAD
3349 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3350 Float32Regs:$b, Float32Regs:$a),
3351 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3352 Float32Regs:$gradx0, Float32Regs:$gradx1,
3353 Float32Regs:$grady0, Float32Regs:$grady1),
3354 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3355 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3356 "\\{$grady0, $grady1\\};",
3358 def TEX_UNIFIED_2D_S32_S32
3359 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3360 Int32Regs:$b, Int32Regs:$a),
3361 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3362 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3363 "[$t, \\{$x, $y\\}];",
3365 def TEX_UNIFIED_2D_S32_F32
3366 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3367 Int32Regs:$b, Int32Regs:$a),
3368 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3369 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3370 "[$t, \\{$x, $y\\}];",
3372 def TEX_UNIFIED_2D_S32_F32_LEVEL
3373 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3374 Int32Regs:$b, Int32Regs:$a),
3375 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3377 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3378 "[$t, \\{$x, $y\\}], $lod;",
3380 def TEX_UNIFIED_2D_S32_F32_GRAD
3381 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3382 Int32Regs:$b, Int32Regs:$a),
3383 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3384 Float32Regs:$gradx0, Float32Regs:$gradx1,
3385 Float32Regs:$grady0, Float32Regs:$grady1),
3386 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3387 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3388 "\\{$grady0, $grady1\\};",
3390 def TEX_UNIFIED_2D_U32_S32
3391 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3392 Int32Regs:$b, Int32Regs:$a),
3393 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3394 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3395 "[$t, \\{$x, $y\\}];",
3397 def TEX_UNIFIED_2D_U32_F32
3398 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399 Int32Regs:$b, Int32Regs:$a),
3400 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3401 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3402 "[$t, \\{$x, $y\\}];",
3404 def TEX_UNIFIED_2D_U32_F32_LEVEL
3405 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3406 Int32Regs:$b, Int32Regs:$a),
3407 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3409 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3410 "[$t, \\{$x, $y\\}], $lod;",
3412 def TEX_UNIFIED_2D_U32_F32_GRAD
3413 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3414 Int32Regs:$b, Int32Regs:$a),
3415 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3416 Float32Regs:$gradx0, Float32Regs:$gradx1,
3417 Float32Regs:$grady0, Float32Regs:$grady1),
3418 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3419 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3420 "\\{$grady0, $grady1\\};",
3423 def TEX_UNIFIED_2D_ARRAY_F32_S32
3424 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3425 Float32Regs:$b, Float32Regs:$a),
3426 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3428 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3429 "[$t, \\{$l, $x, $y, $y\\}];",
3431 def TEX_UNIFIED_2D_ARRAY_F32_F32
3432 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3433 Float32Regs:$b, Float32Regs:$a),
3434 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3436 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3437 "[$t, \\{$l, $x, $y, $y\\}];",
3439 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3440 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3441 Float32Regs:$b, Float32Regs:$a),
3442 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3443 Float32Regs:$y, Float32Regs:$lod),
3444 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3445 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3447 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3448 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3449 Float32Regs:$b, Float32Regs:$a),
3450 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3451 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3452 Float32Regs:$grady0, Float32Regs:$grady1),
3453 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3454 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3455 "\\{$grady0, $grady1\\};",
3457 def TEX_UNIFIED_2D_ARRAY_S32_S32
3458 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3459 Int32Regs:$b, Int32Regs:$a),
3460 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3462 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3463 "[$t, \\{$l, $x, $y, $y\\}];",
3465 def TEX_UNIFIED_2D_ARRAY_S32_F32
3466 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3467 Int32Regs:$b, Int32Regs:$a),
3468 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3470 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3471 "[$t, \\{$l, $x, $y, $y\\}];",
3473 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3474 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3475 Int32Regs:$b, Int32Regs:$a),
3476 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3477 Float32Regs:$y, Float32Regs:$lod),
3478 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3479 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3481 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3482 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3483 Int32Regs:$b, Int32Regs:$a),
3484 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3486 Float32Regs:$gradx0, Float32Regs:$gradx1,
3487 Float32Regs:$grady0, Float32Regs:$grady1),
3488 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3489 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3490 "\\{$grady0, $grady1\\};",
3492 def TEX_UNIFIED_2D_ARRAY_U32_S32
3493 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3494 Int32Regs:$b, Int32Regs:$a),
3495 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3497 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3498 "[$t, \\{$l, $x, $y, $y\\}];",
3500 def TEX_UNIFIED_2D_ARRAY_U32_F32
3501 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3502 Int32Regs:$b, Int32Regs:$a),
3503 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3505 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3506 "[$t, \\{$l, $x, $y, $y\\}];",
3508 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3509 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3510 Int32Regs:$b, Int32Regs:$a),
3511 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3512 Float32Regs:$y, Float32Regs:$lod),
3513 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3514 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3516 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3517 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3518 Int32Regs:$b, Int32Regs:$a),
3519 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3521 Float32Regs:$gradx0, Float32Regs:$gradx1,
3522 Float32Regs:$grady0, Float32Regs:$grady1),
3523 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3524 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3525 "\\{$grady0, $grady1\\};",
3528 def TEX_UNIFIED_3D_F32_S32
3529 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3530 Float32Regs:$b, Float32Regs:$a),
3531 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3533 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3534 "[$t, \\{$x, $y, $z, $z\\}];",
3536 def TEX_UNIFIED_3D_F32_F32
3537 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3538 Float32Regs:$b, Float32Regs:$a),
3539 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3541 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3542 "[$t, \\{$x, $y, $z, $z\\}];",
3544 def TEX_UNIFIED_3D_F32_F32_LEVEL
3545 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3546 Float32Regs:$b, Float32Regs:$a),
3547 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3548 Float32Regs:$z, Float32Regs:$lod),
3549 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3550 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3552 def TEX_UNIFIED_3D_F32_F32_GRAD
3553 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3554 Float32Regs:$b, Float32Regs:$a),
3555 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3557 Float32Regs:$gradx0, Float32Regs:$gradx1,
3558 Float32Regs:$gradx2, Float32Regs:$grady0,
3559 Float32Regs:$grady1, Float32Regs:$grady2),
3560 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3561 "[$t, \\{$x, $y, $z, $z\\}], "
3562 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3563 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3565 def TEX_UNIFIED_3D_S32_S32
3566 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3567 Int32Regs:$b, Int32Regs:$a),
3568 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3570 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3571 "[$t, \\{$x, $y, $z, $z\\}];",
3573 def TEX_UNIFIED_3D_S32_F32
3574 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3575 Int32Regs:$b, Int32Regs:$a),
3576 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3578 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3579 "[$t, \\{$x, $y, $z, $z\\}];",
3581 def TEX_UNIFIED_3D_S32_F32_LEVEL
3582 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3583 Int32Regs:$b, Int32Regs:$a),
3584 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3585 Float32Regs:$z, Float32Regs:$lod),
3586 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3587 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3589 def TEX_UNIFIED_3D_S32_F32_GRAD
3590 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3591 Int32Regs:$b, Int32Regs:$a),
3592 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3594 Float32Regs:$gradx0, Float32Regs:$gradx1,
3595 Float32Regs:$gradx2, Float32Regs:$grady0,
3596 Float32Regs:$grady1, Float32Regs:$grady2),
3597 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3598 "[$t, \\{$x, $y, $z, $z\\}], "
3599 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3600 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3602 def TEX_UNIFIED_3D_U32_S32
3603 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3604 Int32Regs:$b, Int32Regs:$a),
3605 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3607 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3608 "[$t, \\{$x, $y, $z, $z\\}];",
3610 def TEX_UNIFIED_3D_U32_F32
3611 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3612 Int32Regs:$b, Int32Regs:$a),
3613 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3615 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3616 "[$t, \\{$x, $y, $z, $z\\}];",
3618 def TEX_UNIFIED_3D_U32_F32_LEVEL
3619 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3620 Int32Regs:$b, Int32Regs:$a),
3621 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3622 Float32Regs:$z, Float32Regs:$lod),
3623 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3624 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3626 def TEX_UNIFIED_3D_U32_F32_GRAD
3627 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3628 Int32Regs:$b, Int32Regs:$a),
3629 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3631 Float32Regs:$gradx0, Float32Regs:$gradx1,
3632 Float32Regs:$gradx2, Float32Regs:$grady0,
3633 Float32Regs:$grady1, Float32Regs:$grady2),
3634 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3635 "[$t, \\{$x, $y, $z, $z\\}], "
3636 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3637 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3640 def TEX_UNIFIED_CUBE_F32_F32
3641 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3642 Float32Regs:$b, Float32Regs:$a),
3644 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3645 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3646 "[$t, \\{$x, $y, $z, $z\\}];",
3648 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3649 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3650 Float32Regs:$b, Float32Regs:$a),
3652 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3654 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3655 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3657 def TEX_UNIFIED_CUBE_S32_F32
3658 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3659 Int32Regs:$b, Int32Regs:$a),
3661 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3662 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3663 "[$t, \\{$x, $y, $z, $z\\}];",
3665 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3666 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3667 Int32Regs:$b, Int32Regs:$a),
3669 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3671 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3672 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3674 def TEX_UNIFIED_CUBE_U32_F32
3675 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3676 Int32Regs:$b, Int32Regs:$a),
3678 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3679 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3680 "[$t, \\{$x, $y, $z, $z\\}];",
3682 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3683 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3684 Int32Regs:$b, Int32Regs:$a),
3686 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3688 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3689 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3692 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3693 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3694 Float32Regs:$b, Float32Regs:$a),
3695 (ins Int64Regs:$t, Int32Regs:$l,
3696 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3697 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3698 "[$t, \\{$l, $x, $y, $z\\}];",
3700 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3701 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3702 Float32Regs:$b, Float32Regs:$a),
3703 (ins Int64Regs:$t, Int32Regs:$l,
3704 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3706 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3707 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3709 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3710 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3711 Int32Regs:$b, Int32Regs:$a),
3712 (ins Int64Regs:$t, Int32Regs:$l,
3713 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3714 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3715 "[$t, \\{$l, $x, $y, $z\\}];",
3717 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3718 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3719 Int32Regs:$b, Int32Regs:$a),
3720 (ins Int64Regs:$t, Int32Regs:$l,
3721 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3723 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3724 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3726 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3727 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3728 Int32Regs:$b, Int32Regs:$a),
3729 (ins Int64Regs:$t, Int32Regs:$l,
3730 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3731 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3732 "[$t, \\{$l, $x, $y, $z\\}];",
3734 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3735 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3736 Int32Regs:$b, Int32Regs:$a),
3737 (ins Int64Regs:$t, Int32Regs:$l,
3738 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3740 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3741 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3744 def TLD4_UNIFIED_R_2D_F32_F32
3745 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3746 Float32Regs:$v2, Float32Regs:$v3),
3747 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3748 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3749 "[$t, \\{$x, $y\\}];",
3751 def TLD4_UNIFIED_G_2D_F32_F32
3752 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3753 Float32Regs:$v2, Float32Regs:$v3),
3754 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3755 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3756 "[$t, \\{$x, $y\\}];",
3758 def TLD4_UNIFIED_B_2D_F32_F32
3759 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3760 Float32Regs:$v2, Float32Regs:$v3),
3761 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3762 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3763 "[$t, \\{$x, $y\\}];",
3765 def TLD4_UNIFIED_A_2D_F32_F32
3766 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3767 Float32Regs:$v2, Float32Regs:$v3),
3768 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3769 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3770 "[$t, \\{$x, $y\\}];",
3772 def TLD4_UNIFIED_R_2D_S32_F32
3773 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3774 Int32Regs:$v2, Int32Regs:$v3),
3775 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3776 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3777 "[$t, \\{$x, $y\\}];",
3779 def TLD4_UNIFIED_G_2D_S32_F32
3780 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3781 Int32Regs:$v2, Int32Regs:$v3),
3782 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3783 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3784 "[$t, \\{$x, $y\\}];",
3786 def TLD4_UNIFIED_B_2D_S32_F32
3787 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3788 Int32Regs:$v2, Int32Regs:$v3),
3789 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3790 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3791 "[$t, \\{$x, $y\\}];",
3793 def TLD4_UNIFIED_A_2D_S32_F32
3794 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3795 Int32Regs:$v2, Int32Regs:$v3),
3796 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3797 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3798 "[$t, \\{$x, $y\\}];",
3800 def TLD4_UNIFIED_R_2D_U32_F32
3801 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3802 Int32Regs:$v2, Int32Regs:$v3),
3803 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3804 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3805 "[$t, \\{$x, $y\\}];",
3807 def TLD4_UNIFIED_G_2D_U32_F32
3808 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3809 Int32Regs:$v2, Int32Regs:$v3),
3810 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3811 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3812 "[$t, \\{$x, $y\\}];",
3814 def TLD4_UNIFIED_B_2D_U32_F32
3815 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3816 Int32Regs:$v2, Int32Regs:$v3),
3817 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3818 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3819 "[$t, \\{$x, $y\\}];",
3821 def TLD4_UNIFIED_A_2D_U32_F32
3822 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3823 Int32Regs:$v2, Int32Regs:$v3),
3824 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3825 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3826 "[$t, \\{$x, $y\\}];",
3832 //=== Surface load instructions
3834 let IsSuld = true in {
3835 def SULD_1D_I8_CLAMP
3836 : NVPTXInst<(outs Int16Regs:$r),
3837 (ins Int64Regs:$s, Int32Regs:$x),
3838 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3840 def SULD_1D_I16_CLAMP
3841 : NVPTXInst<(outs Int16Regs:$r),
3842 (ins Int64Regs:$s, Int32Regs:$x),
3843 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3845 def SULD_1D_I32_CLAMP
3846 : NVPTXInst<(outs Int32Regs:$r),
3847 (ins Int64Regs:$s, Int32Regs:$x),
3848 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3850 def SULD_1D_I64_CLAMP
3851 : NVPTXInst<(outs Int64Regs:$r),
3852 (ins Int64Regs:$s, Int32Regs:$x),
3853 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3856 def SULD_1D_ARRAY_I8_CLAMP
3857 : NVPTXInst<(outs Int16Regs:$r),
3858 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3859 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3861 def SULD_1D_ARRAY_I16_CLAMP
3862 : NVPTXInst<(outs Int16Regs:$r),
3863 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3864 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3866 def SULD_1D_ARRAY_I32_CLAMP
3867 : NVPTXInst<(outs Int32Regs:$r),
3868 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3869 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3871 def SULD_1D_ARRAY_I64_CLAMP
3872 : NVPTXInst<(outs Int64Regs:$r),
3873 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3874 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3877 def SULD_2D_I8_CLAMP
3878 : NVPTXInst<(outs Int16Regs:$r),
3879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3880 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3882 def SULD_2D_I16_CLAMP
3883 : NVPTXInst<(outs Int16Regs:$r),
3884 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3885 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3887 def SULD_2D_I32_CLAMP
3888 : NVPTXInst<(outs Int32Regs:$r),
3889 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3890 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3892 def SULD_2D_I64_CLAMP
3893 : NVPTXInst<(outs Int64Regs:$r),
3894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3895 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3898 def SULD_2D_ARRAY_I8_CLAMP
3899 : NVPTXInst<(outs Int16Regs:$r),
3900 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3901 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3903 def SULD_2D_ARRAY_I16_CLAMP
3904 : NVPTXInst<(outs Int16Regs:$r),
3905 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3906 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3908 def SULD_2D_ARRAY_I32_CLAMP
3909 : NVPTXInst<(outs Int32Regs:$r),
3910 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3911 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3913 def SULD_2D_ARRAY_I64_CLAMP
3914 : NVPTXInst<(outs Int64Regs:$r),
3915 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3916 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3919 def SULD_3D_I8_CLAMP
3920 : NVPTXInst<(outs Int16Regs:$r),
3921 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3922 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3924 def SULD_3D_I16_CLAMP
3925 : NVPTXInst<(outs Int16Regs:$r),
3926 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3927 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3929 def SULD_3D_I32_CLAMP
3930 : NVPTXInst<(outs Int32Regs:$r),
3931 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3932 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3934 def SULD_3D_I64_CLAMP
3935 : NVPTXInst<(outs Int64Regs:$r),
3936 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3937 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3942 def SULD_1D_V2I8_CLAMP
3943 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3944 (ins Int64Regs:$s, Int32Regs:$x),
3945 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3947 def SULD_1D_V2I16_CLAMP
3948 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3949 (ins Int64Regs:$s, Int32Regs:$x),
3950 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3952 def SULD_1D_V2I32_CLAMP
3953 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3954 (ins Int64Regs:$s, Int32Regs:$x),
3955 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3957 def SULD_1D_V2I64_CLAMP
3958 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3959 (ins Int64Regs:$s, Int32Regs:$x),
3960 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3963 def SULD_1D_ARRAY_V2I8_CLAMP
3964 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3965 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3966 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3968 def SULD_1D_ARRAY_V2I16_CLAMP
3969 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3970 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3971 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3973 def SULD_1D_ARRAY_V2I32_CLAMP
3974 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3975 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3976 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3978 def SULD_1D_ARRAY_V2I64_CLAMP
3979 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3980 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3981 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3984 def SULD_2D_V2I8_CLAMP
3985 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3986 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3987 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3989 def SULD_2D_V2I16_CLAMP
3990 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3991 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3992 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3994 def SULD_2D_V2I32_CLAMP
3995 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3996 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3997 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3999 def SULD_2D_V2I64_CLAMP
4000 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4001 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4002 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4005 def SULD_2D_ARRAY_V2I8_CLAMP
4006 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4007 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4008 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
4009 "[$s, \\{$l, $x, $y, $y\\}];",
4011 def SULD_2D_ARRAY_V2I16_CLAMP
4012 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4013 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4014 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
4015 "[$s, \\{$l, $x, $y, $y\\}];",
4017 def SULD_2D_ARRAY_V2I32_CLAMP
4018 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4019 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4020 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
4021 "[$s, \\{$l, $x, $y, $y\\}];",
4023 def SULD_2D_ARRAY_V2I64_CLAMP
4024 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4025 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4026 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
4027 "[$s, \\{$l, $x, $y, $y\\}];",
4030 def SULD_3D_V2I8_CLAMP
4031 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4032 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4033 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4035 def SULD_3D_V2I16_CLAMP
4036 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4037 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4038 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4040 def SULD_3D_V2I32_CLAMP
4041 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4042 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4043 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4045 def SULD_3D_V2I64_CLAMP
4046 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4047 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4048 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4053 def SULD_1D_V4I8_CLAMP
4054 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4055 (ins Int64Regs:$s, Int32Regs:$x),
4056 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4058 def SULD_1D_V4I16_CLAMP
4059 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4060 (ins Int64Regs:$s, Int32Regs:$x),
4061 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4063 def SULD_1D_V4I32_CLAMP
4064 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4065 (ins Int64Regs:$s, Int32Regs:$x),
4066 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4069 def SULD_1D_ARRAY_V4I8_CLAMP
4070 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4071 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4072 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4073 "[$s, \\{$l, $x\\}];",
4075 def SULD_1D_ARRAY_V4I16_CLAMP
4076 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4077 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4078 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4079 "[$s, \\{$l, $x\\}];",
4081 def SULD_1D_ARRAY_V4I32_CLAMP
4082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4083 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4084 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4085 "[$s, \\{$l, $x\\}];",
4088 def SULD_2D_V4I8_CLAMP
4089 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4090 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4091 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4093 def SULD_2D_V4I16_CLAMP
4094 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4095 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4096 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4098 def SULD_2D_V4I32_CLAMP
4099 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4100 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4101 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4104 def SULD_2D_ARRAY_V4I8_CLAMP
4105 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4106 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4107 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4108 "[$s, \\{$l, $x, $y, $y\\}];",
4110 def SULD_2D_ARRAY_V4I16_CLAMP
4111 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4112 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4113 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4114 "[$s, \\{$l, $x, $y, $y\\}];",
4116 def SULD_2D_ARRAY_V4I32_CLAMP
4117 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4118 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4119 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4120 "[$s, \\{$l, $x, $y, $y\\}];",
4124 def SULD_3D_V4I8_CLAMP
4125 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4126 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4127 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4128 "[$s, \\{$x, $y, $z, $z\\}];",
4130 def SULD_3D_V4I16_CLAMP
4131 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4132 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4133 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4134 "[$s, \\{$x, $y, $z, $z\\}];",
4136 def SULD_3D_V4I32_CLAMP
4137 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4138 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4139 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4140 "[$s, \\{$x, $y, $z, $z\\}];",
4146 let IsSuld = true in {
4148 : NVPTXInst<(outs Int16Regs:$r),
4149 (ins Int64Regs:$s, Int32Regs:$x),
4150 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
4152 def SULD_1D_I16_TRAP
4153 : NVPTXInst<(outs Int16Regs:$r),
4154 (ins Int64Regs:$s, Int32Regs:$x),
4155 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
4157 def SULD_1D_I32_TRAP
4158 : NVPTXInst<(outs Int32Regs:$r),
4159 (ins Int64Regs:$s, Int32Regs:$x),
4160 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
4162 def SULD_1D_I64_TRAP
4163 : NVPTXInst<(outs Int64Regs:$r),
4164 (ins Int64Regs:$s, Int32Regs:$x),
4165 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
4168 def SULD_1D_ARRAY_I8_TRAP
4169 : NVPTXInst<(outs Int16Regs:$r),
4170 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4171 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4173 def SULD_1D_ARRAY_I16_TRAP
4174 : NVPTXInst<(outs Int16Regs:$r),
4175 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4176 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4178 def SULD_1D_ARRAY_I32_TRAP
4179 : NVPTXInst<(outs Int32Regs:$r),
4180 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4181 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4183 def SULD_1D_ARRAY_I64_TRAP
4184 : NVPTXInst<(outs Int64Regs:$r),
4185 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4186 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4190 : NVPTXInst<(outs Int16Regs:$r),
4191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4192 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4194 def SULD_2D_I16_TRAP
4195 : NVPTXInst<(outs Int16Regs:$r),
4196 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4197 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4199 def SULD_2D_I32_TRAP
4200 : NVPTXInst<(outs Int32Regs:$r),
4201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4202 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4204 def SULD_2D_I64_TRAP
4205 : NVPTXInst<(outs Int64Regs:$r),
4206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4207 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4210 def SULD_2D_ARRAY_I8_TRAP
4211 : NVPTXInst<(outs Int16Regs:$r),
4212 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4213 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4215 def SULD_2D_ARRAY_I16_TRAP
4216 : NVPTXInst<(outs Int16Regs:$r),
4217 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4218 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4220 def SULD_2D_ARRAY_I32_TRAP
4221 : NVPTXInst<(outs Int32Regs:$r),
4222 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4223 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4225 def SULD_2D_ARRAY_I64_TRAP
4226 : NVPTXInst<(outs Int64Regs:$r),
4227 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4228 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4232 : NVPTXInst<(outs Int16Regs:$r),
4233 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4234 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4236 def SULD_3D_I16_TRAP
4237 : NVPTXInst<(outs Int16Regs:$r),
4238 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4239 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4241 def SULD_3D_I32_TRAP
4242 : NVPTXInst<(outs Int32Regs:$r),
4243 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4244 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4246 def SULD_3D_I64_TRAP
4247 : NVPTXInst<(outs Int64Regs:$r),
4248 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4249 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4254 def SULD_1D_V2I8_TRAP
4255 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4256 (ins Int64Regs:$s, Int32Regs:$x),
4257 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4259 def SULD_1D_V2I16_TRAP
4260 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4261 (ins Int64Regs:$s, Int32Regs:$x),
4262 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4264 def SULD_1D_V2I32_TRAP
4265 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4266 (ins Int64Regs:$s, Int32Regs:$x),
4267 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4269 def SULD_1D_V2I64_TRAP
4270 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4271 (ins Int64Regs:$s, Int32Regs:$x),
4272 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4275 def SULD_1D_ARRAY_V2I8_TRAP
4276 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4277 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4278 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4280 def SULD_1D_ARRAY_V2I16_TRAP
4281 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4282 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4283 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4285 def SULD_1D_ARRAY_V2I32_TRAP
4286 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4287 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4288 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4290 def SULD_1D_ARRAY_V2I64_TRAP
4291 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4292 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4293 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4296 def SULD_2D_V2I8_TRAP
4297 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4298 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4299 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4301 def SULD_2D_V2I16_TRAP
4302 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4303 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4304 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4306 def SULD_2D_V2I32_TRAP
4307 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4308 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4309 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4311 def SULD_2D_V2I64_TRAP
4312 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4313 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4314 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4317 def SULD_2D_ARRAY_V2I8_TRAP
4318 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4319 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4320 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4321 "[$s, \\{$l, $x, $y, $y\\}];",
4323 def SULD_2D_ARRAY_V2I16_TRAP
4324 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4325 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4326 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4327 "[$s, \\{$l, $x, $y, $y\\}];",
4329 def SULD_2D_ARRAY_V2I32_TRAP
4330 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4331 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4332 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4333 "[$s, \\{$l, $x, $y, $y\\}];",
4335 def SULD_2D_ARRAY_V2I64_TRAP
4336 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4337 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4338 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4339 "[$s, \\{$l, $x, $y, $y\\}];",
4342 def SULD_3D_V2I8_TRAP
4343 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4345 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4347 def SULD_3D_V2I16_TRAP
4348 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4349 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4350 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4352 def SULD_3D_V2I32_TRAP
4353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4354 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4355 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4357 def SULD_3D_V2I64_TRAP
4358 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4359 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4360 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4365 def SULD_1D_V4I8_TRAP
4366 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4367 (ins Int64Regs:$s, Int32Regs:$x),
4368 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4370 def SULD_1D_V4I16_TRAP
4371 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4372 (ins Int64Regs:$s, Int32Regs:$x),
4373 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4375 def SULD_1D_V4I32_TRAP
4376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4377 (ins Int64Regs:$s, Int32Regs:$x),
4378 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4381 def SULD_1D_ARRAY_V4I8_TRAP
4382 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4383 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4384 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4385 "[$s, \\{$l, $x\\}];",
4387 def SULD_1D_ARRAY_V4I16_TRAP
4388 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4389 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4390 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4391 "[$s, \\{$l, $x\\}];",
4393 def SULD_1D_ARRAY_V4I32_TRAP
4394 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4395 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4396 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4397 "[$s, \\{$l, $x\\}];",
4400 def SULD_2D_V4I8_TRAP
4401 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4402 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4403 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4405 def SULD_2D_V4I16_TRAP
4406 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4407 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4408 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4410 def SULD_2D_V4I32_TRAP
4411 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4412 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4413 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4416 def SULD_2D_ARRAY_V4I8_TRAP
4417 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4418 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4419 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4420 "[$s, \\{$l, $x, $y, $y\\}];",
4422 def SULD_2D_ARRAY_V4I16_TRAP
4423 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4424 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4425 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4426 "[$s, \\{$l, $x, $y, $y\\}];",
4428 def SULD_2D_ARRAY_V4I32_TRAP
4429 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4430 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4431 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4432 "[$s, \\{$l, $x, $y, $y\\}];",
4436 def SULD_3D_V4I8_TRAP
4437 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4438 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4439 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4440 "[$s, \\{$x, $y, $z, $z\\}];",
4442 def SULD_3D_V4I16_TRAP
4443 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4444 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4446 "[$s, \\{$x, $y, $z, $z\\}];",
4448 def SULD_3D_V4I32_TRAP
4449 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4450 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4451 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4452 "[$s, \\{$x, $y, $z, $z\\}];",
4457 let IsSuld = true in {
4459 : NVPTXInst<(outs Int16Regs:$r),
4460 (ins Int64Regs:$s, Int32Regs:$x),
4461 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4463 def SULD_1D_I16_ZERO
4464 : NVPTXInst<(outs Int16Regs:$r),
4465 (ins Int64Regs:$s, Int32Regs:$x),
4466 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4468 def SULD_1D_I32_ZERO
4469 : NVPTXInst<(outs Int32Regs:$r),
4470 (ins Int64Regs:$s, Int32Regs:$x),
4471 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4473 def SULD_1D_I64_ZERO
4474 : NVPTXInst<(outs Int64Regs:$r),
4475 (ins Int64Regs:$s, Int32Regs:$x),
4476 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4479 def SULD_1D_ARRAY_I8_ZERO
4480 : NVPTXInst<(outs Int16Regs:$r),
4481 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4482 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4484 def SULD_1D_ARRAY_I16_ZERO
4485 : NVPTXInst<(outs Int16Regs:$r),
4486 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4487 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4489 def SULD_1D_ARRAY_I32_ZERO
4490 : NVPTXInst<(outs Int32Regs:$r),
4491 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4492 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4494 def SULD_1D_ARRAY_I64_ZERO
4495 : NVPTXInst<(outs Int64Regs:$r),
4496 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4497 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4501 : NVPTXInst<(outs Int16Regs:$r),
4502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4503 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4505 def SULD_2D_I16_ZERO
4506 : NVPTXInst<(outs Int16Regs:$r),
4507 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4508 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4510 def SULD_2D_I32_ZERO
4511 : NVPTXInst<(outs Int32Regs:$r),
4512 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4513 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4515 def SULD_2D_I64_ZERO
4516 : NVPTXInst<(outs Int64Regs:$r),
4517 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4518 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4521 def SULD_2D_ARRAY_I8_ZERO
4522 : NVPTXInst<(outs Int16Regs:$r),
4523 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4524 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4526 def SULD_2D_ARRAY_I16_ZERO
4527 : NVPTXInst<(outs Int16Regs:$r),
4528 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4529 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4531 def SULD_2D_ARRAY_I32_ZERO
4532 : NVPTXInst<(outs Int32Regs:$r),
4533 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4534 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4536 def SULD_2D_ARRAY_I64_ZERO
4537 : NVPTXInst<(outs Int64Regs:$r),
4538 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4539 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4543 : NVPTXInst<(outs Int16Regs:$r),
4544 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4545 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4547 def SULD_3D_I16_ZERO
4548 : NVPTXInst<(outs Int16Regs:$r),
4549 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4550 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4552 def SULD_3D_I32_ZERO
4553 : NVPTXInst<(outs Int32Regs:$r),
4554 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4555 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4557 def SULD_3D_I64_ZERO
4558 : NVPTXInst<(outs Int64Regs:$r),
4559 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4560 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4565 def SULD_1D_V2I8_ZERO
4566 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4567 (ins Int64Regs:$s, Int32Regs:$x),
4568 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4570 def SULD_1D_V2I16_ZERO
4571 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4572 (ins Int64Regs:$s, Int32Regs:$x),
4573 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4575 def SULD_1D_V2I32_ZERO
4576 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4577 (ins Int64Regs:$s, Int32Regs:$x),
4578 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4580 def SULD_1D_V2I64_ZERO
4581 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4582 (ins Int64Regs:$s, Int32Regs:$x),
4583 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4586 def SULD_1D_ARRAY_V2I8_ZERO
4587 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4588 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4589 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4591 def SULD_1D_ARRAY_V2I16_ZERO
4592 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4593 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4594 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4596 def SULD_1D_ARRAY_V2I32_ZERO
4597 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4598 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4599 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4601 def SULD_1D_ARRAY_V2I64_ZERO
4602 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4603 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4604 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4607 def SULD_2D_V2I8_ZERO
4608 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4609 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4610 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4612 def SULD_2D_V2I16_ZERO
4613 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4614 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4615 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4617 def SULD_2D_V2I32_ZERO
4618 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4619 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4620 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4622 def SULD_2D_V2I64_ZERO
4623 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4624 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4625 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4628 def SULD_2D_ARRAY_V2I8_ZERO
4629 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4630 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4631 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4632 "[$s, \\{$l, $x, $y, $y\\}];",
4634 def SULD_2D_ARRAY_V2I16_ZERO
4635 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4636 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4637 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4638 "[$s, \\{$l, $x, $y, $y\\}];",
4640 def SULD_2D_ARRAY_V2I32_ZERO
4641 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4642 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4643 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4644 "[$s, \\{$l, $x, $y, $y\\}];",
4646 def SULD_2D_ARRAY_V2I64_ZERO
4647 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4648 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4649 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4650 "[$s, \\{$l, $x, $y, $y\\}];",
4653 def SULD_3D_V2I8_ZERO
4654 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4655 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4656 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4658 def SULD_3D_V2I16_ZERO
4659 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4660 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4661 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4663 def SULD_3D_V2I32_ZERO
4664 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4665 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4666 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4668 def SULD_3D_V2I64_ZERO
4669 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4670 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4671 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4676 def SULD_1D_V4I8_ZERO
4677 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4678 (ins Int64Regs:$s, Int32Regs:$x),
4679 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4681 def SULD_1D_V4I16_ZERO
4682 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4683 (ins Int64Regs:$s, Int32Regs:$x),
4684 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4686 def SULD_1D_V4I32_ZERO
4687 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4688 (ins Int64Regs:$s, Int32Regs:$x),
4689 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4692 def SULD_1D_ARRAY_V4I8_ZERO
4693 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4694 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4695 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4696 "[$s, \\{$l, $x\\}];",
4698 def SULD_1D_ARRAY_V4I16_ZERO
4699 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4700 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4701 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4702 "[$s, \\{$l, $x\\}];",
4704 def SULD_1D_ARRAY_V4I32_ZERO
4705 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4706 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4707 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4708 "[$s, \\{$l, $x\\}];",
4711 def SULD_2D_V4I8_ZERO
4712 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4713 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4714 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4716 def SULD_2D_V4I16_ZERO
4717 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4718 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4719 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4721 def SULD_2D_V4I32_ZERO
4722 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4723 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4724 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4727 def SULD_2D_ARRAY_V4I8_ZERO
4728 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4729 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4730 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4731 "[$s, \\{$l, $x, $y, $y\\}];",
4733 def SULD_2D_ARRAY_V4I16_ZERO
4734 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4735 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4736 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4737 "[$s, \\{$l, $x, $y, $y\\}];",
4739 def SULD_2D_ARRAY_V4I32_ZERO
4740 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4741 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4742 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4743 "[$s, \\{$l, $x, $y, $y\\}];",
4747 def SULD_3D_V4I8_ZERO
4748 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4749 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4750 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4751 "[$s, \\{$x, $y, $z, $z\\}];",
4753 def SULD_3D_V4I16_ZERO
4754 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4755 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4756 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4757 "[$s, \\{$x, $y, $z, $z\\}];",
4759 def SULD_3D_V4I32_ZERO
4760 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4761 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4762 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4763 "[$s, \\{$x, $y, $z, $z\\}];",
4767 //-----------------------------------
4768 // Texture Query Intrinsics
4769 //-----------------------------------
4771 let IsSurfTexQuery = true in {
4772 def TXQ_CHANNEL_ORDER
4773 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4774 "txq.channel_order.b32 \t$d, [$a];",
4776 def TXQ_CHANNEL_DATA_TYPE
4777 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4778 "txq.channel_data_type.b32 \t$d, [$a];",
4781 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4782 "txq.width.b32 \t$d, [$a];",
4785 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4786 "txq.height.b32 \t$d, [$a];",
4789 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4790 "txq.depth.b32 \t$d, [$a];",
4793 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4794 "txq.array_size.b32 \t$d, [$a];",
4797 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4798 "txq.num_samples.b32 \t$d, [$a];",
4800 def TXQ_NUM_MIPMAP_LEVELS
4801 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4802 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4806 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4807 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4808 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4809 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4810 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4811 (TXQ_WIDTH Int64Regs:$a)>;
4812 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4813 (TXQ_HEIGHT Int64Regs:$a)>;
4814 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4815 (TXQ_DEPTH Int64Regs:$a)>;
4816 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4817 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4818 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4819 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4820 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4821 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4824 //-----------------------------------
4825 // Surface Query Intrinsics
4826 //-----------------------------------
4828 let IsSurfTexQuery = true in {
4829 def SUQ_CHANNEL_ORDER
4830 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4831 "suq.channel_order.b32 \t$d, [$a];",
4833 def SUQ_CHANNEL_DATA_TYPE
4834 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4835 "suq.channel_data_type.b32 \t$d, [$a];",
4838 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4839 "suq.width.b32 \t$d, [$a];",
4842 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4843 "suq.height.b32 \t$d, [$a];",
4846 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4847 "suq.depth.b32 \t$d, [$a];",
4850 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4851 "suq.array_size.b32 \t$d, [$a];",
4855 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4856 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4857 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4858 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4859 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4860 (SUQ_WIDTH Int64Regs:$a)>;
4861 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4862 (SUQ_HEIGHT Int64Regs:$a)>;
4863 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4864 (SUQ_DEPTH Int64Regs:$a)>;
4865 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4866 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4869 //===- Handle Query -------------------------------------------------------===//
4871 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4873 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4874 "istypep.samplerref \t$d, $a;",
4875 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4877 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4878 "istypep.surfref \t$d, $a;",
4879 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4881 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4882 "istypep.texref \t$d, $a;",
4883 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4885 //===- Surface Stores -----------------------------------------------------===//
4887 let IsSust = true in {
4890 def SUST_B_1D_B8_CLAMP
4892 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4893 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4895 def SUST_B_1D_B16_CLAMP
4897 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4898 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4900 def SUST_B_1D_B32_CLAMP
4902 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4903 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4905 def SUST_B_1D_B64_CLAMP
4907 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4908 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4910 def SUST_B_1D_V2B8_CLAMP
4912 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4913 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4915 def SUST_B_1D_V2B16_CLAMP
4917 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4918 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4920 def SUST_B_1D_V2B32_CLAMP
4922 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4923 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4925 def SUST_B_1D_V2B64_CLAMP
4927 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4928 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4930 def SUST_B_1D_V4B8_CLAMP
4932 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4933 Int16Regs:$b, Int16Regs:$a),
4934 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4936 def SUST_B_1D_V4B16_CLAMP
4938 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4939 Int16Regs:$b, Int16Regs:$a),
4940 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4942 def SUST_B_1D_V4B32_CLAMP
4944 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4945 Int32Regs:$b, Int32Regs:$a),
4946 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4950 def SUST_B_1D_ARRAY_B8_CLAMP
4952 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4953 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4955 def SUST_B_1D_ARRAY_B16_CLAMP
4957 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4958 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4960 def SUST_B_1D_ARRAY_B32_CLAMP
4962 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4963 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4965 def SUST_B_1D_ARRAY_B64_CLAMP
4967 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4968 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4970 def SUST_B_1D_ARRAY_V2B8_CLAMP
4972 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4974 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4976 def SUST_B_1D_ARRAY_V2B16_CLAMP
4978 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4980 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4982 def SUST_B_1D_ARRAY_V2B32_CLAMP
4984 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4986 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4988 def SUST_B_1D_ARRAY_V2B64_CLAMP
4990 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4992 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4994 def SUST_B_1D_ARRAY_V4B8_CLAMP
4996 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4997 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4998 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4999 "\\{$r, $g, $b, $a\\};",
5001 def SUST_B_1D_ARRAY_V4B16_CLAMP
5003 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5004 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5005 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
5006 "\\{$r, $g, $b, $a\\};",
5008 def SUST_B_1D_ARRAY_V4B32_CLAMP
5010 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5011 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5012 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
5013 "\\{$r, $g, $b, $a\\};",
5017 def SUST_B_2D_B8_CLAMP
5019 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5020 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5022 def SUST_B_2D_B16_CLAMP
5024 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5025 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5027 def SUST_B_2D_B32_CLAMP
5029 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5030 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5032 def SUST_B_2D_B64_CLAMP
5034 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5035 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5037 def SUST_B_2D_V2B8_CLAMP
5039 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5041 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5043 def SUST_B_2D_V2B16_CLAMP
5045 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5047 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5049 def SUST_B_2D_V2B32_CLAMP
5051 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5053 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5055 def SUST_B_2D_V2B64_CLAMP
5057 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5059 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5061 def SUST_B_2D_V4B8_CLAMP
5063 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5064 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5065 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
5066 "\\{$r, $g, $b, $a\\};",
5068 def SUST_B_2D_V4B16_CLAMP
5070 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5071 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5072 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
5073 "\\{$r, $g, $b, $a\\};",
5075 def SUST_B_2D_V4B32_CLAMP
5077 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5078 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5079 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
5080 "\\{$r, $g, $b, $a\\};",
5084 def SUST_B_2D_ARRAY_B8_CLAMP
5086 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5088 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5090 def SUST_B_2D_ARRAY_B16_CLAMP
5092 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5094 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5096 def SUST_B_2D_ARRAY_B32_CLAMP
5098 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5100 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5102 def SUST_B_2D_ARRAY_B64_CLAMP
5104 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5106 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5108 def SUST_B_2D_ARRAY_V2B8_CLAMP
5110 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5111 Int16Regs:$r, Int16Regs:$g),
5112 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5115 def SUST_B_2D_ARRAY_V2B16_CLAMP
5117 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5118 Int16Regs:$r, Int16Regs:$g),
5119 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5122 def SUST_B_2D_ARRAY_V2B32_CLAMP
5124 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5125 Int32Regs:$r, Int32Regs:$g),
5126 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5129 def SUST_B_2D_ARRAY_V2B64_CLAMP
5131 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5132 Int64Regs:$r, Int64Regs:$g),
5133 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5136 def SUST_B_2D_ARRAY_V4B8_CLAMP
5138 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5139 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5140 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5141 "\\{$r, $g, $b, $a\\};",
5143 def SUST_B_2D_ARRAY_V4B16_CLAMP
5145 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5146 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5147 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5148 "\\{$r, $g, $b, $a\\};",
5150 def SUST_B_2D_ARRAY_V4B32_CLAMP
5152 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5153 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5154 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5155 "\\{$r, $g, $b, $a\\};",
5159 def SUST_B_3D_B8_CLAMP
5161 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5163 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5165 def SUST_B_3D_B16_CLAMP
5167 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5169 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5171 def SUST_B_3D_B32_CLAMP
5173 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5175 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5177 def SUST_B_3D_B64_CLAMP
5179 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5181 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5183 def SUST_B_3D_V2B8_CLAMP
5185 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5186 Int16Regs:$r, Int16Regs:$g),
5187 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5190 def SUST_B_3D_V2B16_CLAMP
5192 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5193 Int16Regs:$r, Int16Regs:$g),
5194 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5197 def SUST_B_3D_V2B32_CLAMP
5199 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5200 Int32Regs:$r, Int32Regs:$g),
5201 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5204 def SUST_B_3D_V2B64_CLAMP
5206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5207 Int64Regs:$r, Int64Regs:$g),
5208 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5211 def SUST_B_3D_V4B8_CLAMP
5213 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5215 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5216 "\\{$r, $g, $b, $a\\};",
5218 def SUST_B_3D_V4B16_CLAMP
5220 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5221 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5222 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5223 "\\{$r, $g, $b, $a\\};",
5225 def SUST_B_3D_V4B32_CLAMP
5227 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5228 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5229 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5230 "\\{$r, $g, $b, $a\\};",
5235 def SUST_B_1D_B8_TRAP
5237 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5238 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5240 def SUST_B_1D_B16_TRAP
5242 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5243 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5245 def SUST_B_1D_B32_TRAP
5247 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5248 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5250 def SUST_B_1D_B64_TRAP
5252 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5253 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5255 def SUST_B_1D_V2B8_TRAP
5257 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5258 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5260 def SUST_B_1D_V2B16_TRAP
5262 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5263 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5265 def SUST_B_1D_V2B32_TRAP
5267 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5268 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5270 def SUST_B_1D_V2B64_TRAP
5272 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5273 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5275 def SUST_B_1D_V4B8_TRAP
5277 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5278 Int16Regs:$b, Int16Regs:$a),
5279 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5281 def SUST_B_1D_V4B16_TRAP
5283 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5284 Int16Regs:$b, Int16Regs:$a),
5285 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5287 def SUST_B_1D_V4B32_TRAP
5289 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5290 Int32Regs:$b, Int32Regs:$a),
5291 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5295 def SUST_B_1D_ARRAY_B8_TRAP
5297 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5298 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5300 def SUST_B_1D_ARRAY_B16_TRAP
5302 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5303 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5305 def SUST_B_1D_ARRAY_B32_TRAP
5307 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5308 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5310 def SUST_B_1D_ARRAY_B64_TRAP
5312 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5313 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5315 def SUST_B_1D_ARRAY_V2B8_TRAP
5317 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5319 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5321 def SUST_B_1D_ARRAY_V2B16_TRAP
5323 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5325 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5327 def SUST_B_1D_ARRAY_V2B32_TRAP
5329 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5331 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5333 def SUST_B_1D_ARRAY_V2B64_TRAP
5335 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5337 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5339 def SUST_B_1D_ARRAY_V4B8_TRAP
5341 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5342 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5343 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5344 "\\{$r, $g, $b, $a\\};",
5346 def SUST_B_1D_ARRAY_V4B16_TRAP
5348 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5349 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5350 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5351 "\\{$r, $g, $b, $a\\};",
5353 def SUST_B_1D_ARRAY_V4B32_TRAP
5355 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5356 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5357 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5358 "\\{$r, $g, $b, $a\\};",
5362 def SUST_B_2D_B8_TRAP
5364 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5365 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5367 def SUST_B_2D_B16_TRAP
5369 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5370 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5372 def SUST_B_2D_B32_TRAP
5374 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5375 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5377 def SUST_B_2D_B64_TRAP
5379 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5380 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5382 def SUST_B_2D_V2B8_TRAP
5384 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5386 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5388 def SUST_B_2D_V2B16_TRAP
5390 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5392 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5394 def SUST_B_2D_V2B32_TRAP
5396 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5398 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5400 def SUST_B_2D_V2B64_TRAP
5402 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5404 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5406 def SUST_B_2D_V4B8_TRAP
5408 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5409 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5410 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5411 "\\{$r, $g, $b, $a\\};",
5413 def SUST_B_2D_V4B16_TRAP
5415 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5416 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5417 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5418 "\\{$r, $g, $b, $a\\};",
5420 def SUST_B_2D_V4B32_TRAP
5422 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5423 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5424 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5425 "\\{$r, $g, $b, $a\\};",
5429 def SUST_B_2D_ARRAY_B8_TRAP
5431 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5433 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5435 def SUST_B_2D_ARRAY_B16_TRAP
5437 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5439 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5441 def SUST_B_2D_ARRAY_B32_TRAP
5443 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5445 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5447 def SUST_B_2D_ARRAY_B64_TRAP
5449 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5451 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5453 def SUST_B_2D_ARRAY_V2B8_TRAP
5455 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5456 Int16Regs:$r, Int16Regs:$g),
5457 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5460 def SUST_B_2D_ARRAY_V2B16_TRAP
5462 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5463 Int16Regs:$r, Int16Regs:$g),
5464 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5467 def SUST_B_2D_ARRAY_V2B32_TRAP
5469 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5470 Int32Regs:$r, Int32Regs:$g),
5471 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5474 def SUST_B_2D_ARRAY_V2B64_TRAP
5476 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5477 Int64Regs:$r, Int64Regs:$g),
5478 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5481 def SUST_B_2D_ARRAY_V4B8_TRAP
5483 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5484 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5485 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5486 "\\{$r, $g, $b, $a\\};",
5488 def SUST_B_2D_ARRAY_V4B16_TRAP
5490 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5491 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5492 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5493 "\\{$r, $g, $b, $a\\};",
5495 def SUST_B_2D_ARRAY_V4B32_TRAP
5497 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5498 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5499 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5500 "\\{$r, $g, $b, $a\\};",
5504 def SUST_B_3D_B8_TRAP
5506 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5508 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5510 def SUST_B_3D_B16_TRAP
5512 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5514 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5516 def SUST_B_3D_B32_TRAP
5518 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5520 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5522 def SUST_B_3D_B64_TRAP
5524 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5526 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5528 def SUST_B_3D_V2B8_TRAP
5530 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5531 Int16Regs:$r, Int16Regs:$g),
5532 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5535 def SUST_B_3D_V2B16_TRAP
5537 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5538 Int16Regs:$r, Int16Regs:$g),
5539 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5542 def SUST_B_3D_V2B32_TRAP
5544 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5545 Int32Regs:$r, Int32Regs:$g),
5546 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5549 def SUST_B_3D_V2B64_TRAP
5551 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5552 Int64Regs:$r, Int64Regs:$g),
5553 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5556 def SUST_B_3D_V4B8_TRAP
5558 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5559 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5560 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5561 "\\{$r, $g, $b, $a\\};",
5563 def SUST_B_3D_V4B16_TRAP
5565 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5566 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5567 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5568 "\\{$r, $g, $b, $a\\};",
5570 def SUST_B_3D_V4B32_TRAP
5572 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5573 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5574 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5575 "\\{$r, $g, $b, $a\\};",
5580 def SUST_B_1D_B8_ZERO
5582 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5583 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5585 def SUST_B_1D_B16_ZERO
5587 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5588 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5590 def SUST_B_1D_B32_ZERO
5592 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5593 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5595 def SUST_B_1D_B64_ZERO
5597 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5598 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5600 def SUST_B_1D_V2B8_ZERO
5602 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5603 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5605 def SUST_B_1D_V2B16_ZERO
5607 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5608 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5610 def SUST_B_1D_V2B32_ZERO
5612 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5613 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5615 def SUST_B_1D_V2B64_ZERO
5617 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5618 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5620 def SUST_B_1D_V4B8_ZERO
5622 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5623 Int16Regs:$b, Int16Regs:$a),
5624 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5626 def SUST_B_1D_V4B16_ZERO
5628 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5629 Int16Regs:$b, Int16Regs:$a),
5630 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5632 def SUST_B_1D_V4B32_ZERO
5634 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5635 Int32Regs:$b, Int32Regs:$a),
5636 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5640 def SUST_B_1D_ARRAY_B8_ZERO
5642 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5643 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5645 def SUST_B_1D_ARRAY_B16_ZERO
5647 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5648 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5650 def SUST_B_1D_ARRAY_B32_ZERO
5652 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5653 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5655 def SUST_B_1D_ARRAY_B64_ZERO
5657 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5658 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5660 def SUST_B_1D_ARRAY_V2B8_ZERO
5662 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5664 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5666 def SUST_B_1D_ARRAY_V2B16_ZERO
5668 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5670 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5672 def SUST_B_1D_ARRAY_V2B32_ZERO
5674 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5676 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5678 def SUST_B_1D_ARRAY_V2B64_ZERO
5680 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5682 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5684 def SUST_B_1D_ARRAY_V4B8_ZERO
5686 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5687 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5688 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5689 "\\{$r, $g, $b, $a\\};",
5691 def SUST_B_1D_ARRAY_V4B16_ZERO
5693 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5694 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5695 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5696 "\\{$r, $g, $b, $a\\};",
5698 def SUST_B_1D_ARRAY_V4B32_ZERO
5700 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5701 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5702 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5703 "\\{$r, $g, $b, $a\\};",
5707 def SUST_B_2D_B8_ZERO
5709 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5710 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5712 def SUST_B_2D_B16_ZERO
5714 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5715 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5717 def SUST_B_2D_B32_ZERO
5719 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5720 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5722 def SUST_B_2D_B64_ZERO
5724 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5725 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5727 def SUST_B_2D_V2B8_ZERO
5729 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5731 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5733 def SUST_B_2D_V2B16_ZERO
5735 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5737 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5739 def SUST_B_2D_V2B32_ZERO
5741 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5743 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5745 def SUST_B_2D_V2B64_ZERO
5747 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5749 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5751 def SUST_B_2D_V4B8_ZERO
5753 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5754 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5755 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5756 "\\{$r, $g, $b, $a\\};",
5758 def SUST_B_2D_V4B16_ZERO
5760 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5761 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5762 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5763 "\\{$r, $g, $b, $a\\};",
5765 def SUST_B_2D_V4B32_ZERO
5767 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5768 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5769 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5770 "\\{$r, $g, $b, $a\\};",
5774 def SUST_B_2D_ARRAY_B8_ZERO
5776 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5778 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5780 def SUST_B_2D_ARRAY_B16_ZERO
5782 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5784 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5786 def SUST_B_2D_ARRAY_B32_ZERO
5788 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5790 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5792 def SUST_B_2D_ARRAY_B64_ZERO
5794 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5796 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5798 def SUST_B_2D_ARRAY_V2B8_ZERO
5800 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5801 Int16Regs:$r, Int16Regs:$g),
5802 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5805 def SUST_B_2D_ARRAY_V2B16_ZERO
5807 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5808 Int16Regs:$r, Int16Regs:$g),
5809 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5812 def SUST_B_2D_ARRAY_V2B32_ZERO
5814 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5815 Int32Regs:$r, Int32Regs:$g),
5816 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5819 def SUST_B_2D_ARRAY_V2B64_ZERO
5821 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5822 Int64Regs:$r, Int64Regs:$g),
5823 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5826 def SUST_B_2D_ARRAY_V4B8_ZERO
5828 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5829 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5830 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5831 "\\{$r, $g, $b, $a\\};",
5833 def SUST_B_2D_ARRAY_V4B16_ZERO
5835 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5836 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5837 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5838 "\\{$r, $g, $b, $a\\};",
5840 def SUST_B_2D_ARRAY_V4B32_ZERO
5842 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5843 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5844 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5845 "\\{$r, $g, $b, $a\\};",
5849 def SUST_B_3D_B8_ZERO
5851 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5853 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5855 def SUST_B_3D_B16_ZERO
5857 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5859 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5861 def SUST_B_3D_B32_ZERO
5863 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5865 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5867 def SUST_B_3D_B64_ZERO
5869 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5871 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5873 def SUST_B_3D_V2B8_ZERO
5875 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5876 Int16Regs:$r, Int16Regs:$g),
5877 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5880 def SUST_B_3D_V2B16_ZERO
5882 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5883 Int16Regs:$r, Int16Regs:$g),
5884 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5887 def SUST_B_3D_V2B32_ZERO
5889 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5890 Int32Regs:$r, Int32Regs:$g),
5891 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5894 def SUST_B_3D_V2B64_ZERO
5896 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5897 Int64Regs:$r, Int64Regs:$g),
5898 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5901 def SUST_B_3D_V4B8_ZERO
5903 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5904 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5905 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5906 "\\{$r, $g, $b, $a\\};",
5908 def SUST_B_3D_V4B16_ZERO
5910 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5911 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5912 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5913 "\\{$r, $g, $b, $a\\};",
5915 def SUST_B_3D_V4B32_ZERO
5917 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5918 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5919 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5920 "\\{$r, $g, $b, $a\\};",
5927 def SUST_P_1D_B8_TRAP
5929 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5930 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5932 def SUST_P_1D_B16_TRAP
5934 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5935 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5937 def SUST_P_1D_B32_TRAP
5939 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5940 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5942 def SUST_P_1D_V2B8_TRAP
5944 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5945 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5947 def SUST_P_1D_V2B16_TRAP
5949 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5950 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5952 def SUST_P_1D_V2B32_TRAP
5954 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5955 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5957 def SUST_P_1D_V4B8_TRAP
5959 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5960 Int16Regs:$b, Int16Regs:$a),
5961 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5963 def SUST_P_1D_V4B16_TRAP
5965 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5966 Int16Regs:$b, Int16Regs:$a),
5967 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5969 def SUST_P_1D_V4B32_TRAP
5971 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5972 Int32Regs:$b, Int32Regs:$a),
5973 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5977 def SUST_P_1D_ARRAY_B8_TRAP
5979 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5980 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5982 def SUST_P_1D_ARRAY_B16_TRAP
5984 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5985 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5987 def SUST_P_1D_ARRAY_B32_TRAP
5989 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5990 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5992 def SUST_P_1D_ARRAY_V2B8_TRAP
5994 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5996 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5998 def SUST_P_1D_ARRAY_V2B16_TRAP
6000 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6002 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6004 def SUST_P_1D_ARRAY_V2B32_TRAP
6006 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6008 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6010 def SUST_P_1D_ARRAY_V4B8_TRAP
6012 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6013 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6014 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
6015 "\\{$r, $g, $b, $a\\};",
6017 def SUST_P_1D_ARRAY_V4B16_TRAP
6019 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6020 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6021 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
6022 "\\{$r, $g, $b, $a\\};",
6024 def SUST_P_1D_ARRAY_V4B32_TRAP
6026 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6027 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6028 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
6029 "\\{$r, $g, $b, $a\\};",
6033 def SUST_P_2D_B8_TRAP
6035 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6036 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6038 def SUST_P_2D_B16_TRAP
6040 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6041 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6043 def SUST_P_2D_B32_TRAP
6045 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6046 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6048 def SUST_P_2D_V2B8_TRAP
6050 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6052 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6054 def SUST_P_2D_V2B16_TRAP
6056 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6058 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6060 def SUST_P_2D_V2B32_TRAP
6062 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6064 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6066 def SUST_P_2D_V4B8_TRAP
6068 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6069 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6070 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
6071 "\\{$r, $g, $b, $a\\};",
6073 def SUST_P_2D_V4B16_TRAP
6075 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6076 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6077 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
6078 "\\{$r, $g, $b, $a\\};",
6080 def SUST_P_2D_V4B32_TRAP
6082 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6083 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6084 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
6085 "\\{$r, $g, $b, $a\\};",
6089 def SUST_P_2D_ARRAY_B8_TRAP
6091 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6093 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6095 def SUST_P_2D_ARRAY_B16_TRAP
6097 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6099 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6101 def SUST_P_2D_ARRAY_B32_TRAP
6103 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6105 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6107 def SUST_P_2D_ARRAY_V2B8_TRAP
6109 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6110 Int16Regs:$r, Int16Regs:$g),
6111 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6114 def SUST_P_2D_ARRAY_V2B16_TRAP
6116 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6117 Int16Regs:$r, Int16Regs:$g),
6118 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6121 def SUST_P_2D_ARRAY_V2B32_TRAP
6123 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6124 Int32Regs:$r, Int32Regs:$g),
6125 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6128 def SUST_P_2D_ARRAY_V4B8_TRAP
6130 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6131 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6132 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6133 "\\{$r, $g, $b, $a\\};",
6135 def SUST_P_2D_ARRAY_V4B16_TRAP
6137 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6138 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6139 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6140 "\\{$r, $g, $b, $a\\};",
6142 def SUST_P_2D_ARRAY_V4B32_TRAP
6144 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6145 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6146 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6147 "\\{$r, $g, $b, $a\\};",
6151 def SUST_P_3D_B8_TRAP
6153 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6155 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6157 def SUST_P_3D_B16_TRAP
6159 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6161 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6163 def SUST_P_3D_B32_TRAP
6165 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6167 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6169 def SUST_P_3D_V2B8_TRAP
6171 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6172 Int16Regs:$r, Int16Regs:$g),
6173 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6176 def SUST_P_3D_V2B16_TRAP
6178 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6179 Int16Regs:$r, Int16Regs:$g),
6180 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6183 def SUST_P_3D_V2B32_TRAP
6185 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6186 Int32Regs:$r, Int32Regs:$g),
6187 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6190 def SUST_P_3D_V4B8_TRAP
6192 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6193 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6194 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6195 "\\{$r, $g, $b, $a\\};",
6197 def SUST_P_3D_V4B16_TRAP
6199 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6200 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6201 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6202 "\\{$r, $g, $b, $a\\};",
6204 def SUST_P_3D_V4B32_TRAP
6206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6207 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6208 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6209 "\\{$r, $g, $b, $a\\};",
6213 // Surface store instruction patterns
6214 // I'm not sure why we can't just include these in the instruction definitions,
6215 // but TableGen complains of type errors :(
6218 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6219 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6220 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6222 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6223 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6224 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6226 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6227 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6228 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6230 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6231 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6232 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6234 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6235 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6236 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6237 Int16Regs:$r, Int16Regs:$g)>;
6239 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6240 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6241 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6242 Int16Regs:$r, Int16Regs:$g)>;
6244 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6245 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6246 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6247 Int32Regs:$r, Int32Regs:$g)>;
6249 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6250 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6251 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6252 Int64Regs:$r, Int64Regs:$g)>;
6254 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6255 Int64Regs:$s, Int32Regs:$x,
6256 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6257 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6258 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6260 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6261 Int64Regs:$s, Int32Regs:$x,
6262 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6263 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6264 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6266 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6267 Int64Regs:$s, Int32Regs:$x,
6268 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6269 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6270 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6274 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6275 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6276 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6279 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6280 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6281 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6284 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6285 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6286 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6289 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6290 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6291 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6294 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6295 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6296 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6297 Int16Regs:$r, Int16Regs:$g)>;
6299 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6300 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6301 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6302 Int16Regs:$r, Int16Regs:$g)>;
6304 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6305 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6306 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6307 Int32Regs:$r, Int32Regs:$g)>;
6309 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6310 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6311 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6312 Int64Regs:$r, Int64Regs:$g)>;
6314 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6315 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6316 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6317 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6318 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6320 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6321 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6322 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6323 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6324 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6326 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6327 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6328 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6329 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6330 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6334 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6335 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6336 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6339 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6340 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6341 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6344 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6345 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6346 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6349 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6350 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6351 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6354 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6355 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6356 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6357 Int16Regs:$r, Int16Regs:$g)>;
6359 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6360 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6361 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6362 Int16Regs:$r, Int16Regs:$g)>;
6364 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6365 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6366 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6367 Int32Regs:$r, Int32Regs:$g)>;
6369 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6370 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6371 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6372 Int64Regs:$r, Int64Regs:$g)>;
6374 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6375 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6376 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6377 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6378 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6380 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6381 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6382 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6383 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6384 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6386 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6387 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6388 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6389 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6390 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6394 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6395 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6396 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6397 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6400 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6401 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6402 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6403 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6406 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6407 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6408 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6409 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6412 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6413 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6414 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6415 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6418 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6419 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6420 Int16Regs:$r, Int16Regs:$g),
6421 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6422 Int32Regs:$x, Int32Regs:$y,
6423 Int16Regs:$r, Int16Regs:$g)>;
6425 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6426 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6427 Int16Regs:$r, Int16Regs:$g),
6428 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6429 Int32Regs:$x, Int32Regs:$y,
6430 Int16Regs:$r, Int16Regs:$g)>;
6432 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6433 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6435 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6436 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6438 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6439 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6441 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6442 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6444 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6446 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6447 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6448 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6449 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6451 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6452 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6453 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6454 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6455 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6456 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6458 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6459 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6460 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6461 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6462 Int32Regs:$x, Int32Regs:$y,
6463 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6467 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6468 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6470 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6471 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6474 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6475 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6477 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6478 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6481 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6482 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6484 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6485 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6488 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6489 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6491 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6492 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6495 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6496 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6497 Int16Regs:$r, Int16Regs:$g),
6498 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6499 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6500 Int16Regs:$r, Int16Regs:$g)>;
6502 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6503 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6504 Int16Regs:$r, Int16Regs:$g),
6505 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6506 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6507 Int16Regs:$r, Int16Regs:$g)>;
6509 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6511 Int32Regs:$r, Int32Regs:$g),
6512 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6513 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6514 Int32Regs:$r, Int32Regs:$g)>;
6516 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6517 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6518 Int64Regs:$r, Int64Regs:$g),
6519 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6520 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6521 Int64Regs:$r, Int64Regs:$g)>;
6523 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6524 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6525 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6526 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6527 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6528 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6530 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6531 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6532 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6533 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6534 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6535 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6537 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6538 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6539 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6540 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6541 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6542 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6546 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6547 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6548 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6550 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6551 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6552 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6554 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6555 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6556 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6558 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6559 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6560 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6562 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6563 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6564 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6565 Int16Regs:$r, Int16Regs:$g)>;
6567 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6568 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6569 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6570 Int16Regs:$r, Int16Regs:$g)>;
6572 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6573 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6574 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6575 Int32Regs:$r, Int32Regs:$g)>;
6577 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6578 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6579 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6580 Int64Regs:$r, Int64Regs:$g)>;
6582 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6583 Int64Regs:$s, Int32Regs:$x,
6584 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6585 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6586 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6588 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6589 Int64Regs:$s, Int32Regs:$x,
6590 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6591 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6592 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6594 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6595 Int64Regs:$s, Int32Regs:$x,
6596 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6597 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6598 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6602 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6603 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6604 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6607 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6608 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6609 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6612 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6613 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6614 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6617 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6618 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6619 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6622 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6623 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6624 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6625 Int16Regs:$r, Int16Regs:$g)>;
6627 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6628 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6629 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6630 Int16Regs:$r, Int16Regs:$g)>;
6632 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6633 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6634 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6635 Int32Regs:$r, Int32Regs:$g)>;
6637 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6638 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6639 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6640 Int64Regs:$r, Int64Regs:$g)>;
6642 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6643 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6644 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6645 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6646 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6648 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6649 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6650 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6651 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6652 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6654 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6655 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6656 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6657 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6658 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6662 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6663 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6664 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6667 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6668 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6669 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6672 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6673 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6674 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6677 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6678 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6679 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6682 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6683 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6684 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6685 Int16Regs:$r, Int16Regs:$g)>;
6687 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6688 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6689 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6690 Int16Regs:$r, Int16Regs:$g)>;
6692 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6693 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6694 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6695 Int32Regs:$r, Int32Regs:$g)>;
6697 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6698 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6699 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6700 Int64Regs:$r, Int64Regs:$g)>;
6702 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6703 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6704 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6705 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6706 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6708 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6709 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6710 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6711 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6712 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6714 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6715 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6716 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6717 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6718 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6722 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6723 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6724 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6725 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6728 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6729 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6730 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6731 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6734 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6735 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6736 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6737 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6740 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6741 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6742 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6743 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6746 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6747 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6748 Int16Regs:$r, Int16Regs:$g),
6749 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6750 Int32Regs:$x, Int32Regs:$y,
6751 Int16Regs:$r, Int16Regs:$g)>;
6753 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6754 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6755 Int16Regs:$r, Int16Regs:$g),
6756 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6757 Int32Regs:$x, Int32Regs:$y,
6758 Int16Regs:$r, Int16Regs:$g)>;
6760 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6761 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6763 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6764 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6766 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6767 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6769 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6770 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6772 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6774 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6775 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6776 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6777 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6779 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6780 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6781 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6782 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6783 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6784 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6786 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6787 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6788 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6789 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6790 Int32Regs:$x, Int32Regs:$y,
6791 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6795 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6796 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6798 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6799 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6802 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6803 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6805 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6806 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6809 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6810 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6812 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6813 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6816 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6817 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6819 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6820 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6823 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6824 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6825 Int16Regs:$r, Int16Regs:$g),
6826 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6827 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6828 Int16Regs:$r, Int16Regs:$g)>;
6830 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6831 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6832 Int16Regs:$r, Int16Regs:$g),
6833 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6834 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6835 Int16Regs:$r, Int16Regs:$g)>;
6837 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6838 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6839 Int32Regs:$r, Int32Regs:$g),
6840 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6841 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6842 Int32Regs:$r, Int32Regs:$g)>;
6844 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6845 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6846 Int64Regs:$r, Int64Regs:$g),
6847 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6848 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6849 Int64Regs:$r, Int64Regs:$g)>;
6851 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6852 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6853 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6854 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6855 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6856 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6858 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6859 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6860 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6861 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6862 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6863 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6865 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6866 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6867 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6868 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6869 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6870 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6874 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6875 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6876 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6878 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6879 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6880 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6882 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6883 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6884 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6886 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6887 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6888 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6890 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6891 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6892 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6893 Int16Regs:$r, Int16Regs:$g)>;
6895 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6896 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6897 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6898 Int16Regs:$r, Int16Regs:$g)>;
6900 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6901 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6902 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6903 Int32Regs:$r, Int32Regs:$g)>;
6905 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6906 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6907 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6908 Int64Regs:$r, Int64Regs:$g)>;
6910 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6911 Int64Regs:$s, Int32Regs:$x,
6912 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6913 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6914 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6916 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6917 Int64Regs:$s, Int32Regs:$x,
6918 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6919 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6920 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6922 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6923 Int64Regs:$s, Int32Regs:$x,
6924 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6925 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6926 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6930 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6931 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6932 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6935 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6936 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6937 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6940 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6941 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6942 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6945 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6946 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6947 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6950 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6951 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6952 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6953 Int16Regs:$r, Int16Regs:$g)>;
6955 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6956 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6957 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6958 Int16Regs:$r, Int16Regs:$g)>;
6960 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6961 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6962 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6963 Int32Regs:$r, Int32Regs:$g)>;
6965 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6966 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6967 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6968 Int64Regs:$r, Int64Regs:$g)>;
6970 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6971 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6972 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6973 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6974 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6976 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6977 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6978 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6979 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6980 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6982 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6983 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6984 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6985 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6986 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6990 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6991 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6992 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6995 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6996 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6997 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7000 def : Pat<(int_nvvm_sust_b_2d_i32_zero
7001 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7002 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7005 def : Pat<(int_nvvm_sust_b_2d_i64_zero
7006 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7007 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7010 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
7011 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7012 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7013 Int16Regs:$r, Int16Regs:$g)>;
7015 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
7016 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7017 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7018 Int16Regs:$r, Int16Regs:$g)>;
7020 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
7021 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7022 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7023 Int32Regs:$r, Int32Regs:$g)>;
7025 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
7026 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
7027 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7028 Int64Regs:$r, Int64Regs:$g)>;
7030 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
7031 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7032 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7033 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7034 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7036 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
7037 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7038 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7039 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7040 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7042 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
7043 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7044 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7045 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7046 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7050 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
7051 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7052 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
7053 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7056 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
7057 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7058 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
7059 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7062 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
7063 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7064 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
7065 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7068 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
7069 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7070 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
7071 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7074 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
7075 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7076 Int16Regs:$r, Int16Regs:$g),
7077 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
7078 Int32Regs:$x, Int32Regs:$y,
7079 Int16Regs:$r, Int16Regs:$g)>;
7081 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
7082 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7083 Int16Regs:$r, Int16Regs:$g),
7084 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
7085 Int32Regs:$x, Int32Regs:$y,
7086 Int16Regs:$r, Int16Regs:$g)>;
7088 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
7089 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7091 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
7092 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7094 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
7095 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
7097 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
7098 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
7100 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
7101 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7102 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7103 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
7104 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7105 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7107 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
7108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7109 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7110 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
7111 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7112 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7114 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
7115 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7116 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7117 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
7118 Int32Regs:$x, Int32Regs:$y,
7119 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7123 def : Pat<(int_nvvm_sust_b_3d_i8_zero
7124 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7126 (SUST_B_3D_B8_ZERO Int64Regs:$s,
7127 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7130 def : Pat<(int_nvvm_sust_b_3d_i16_zero
7131 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7133 (SUST_B_3D_B16_ZERO Int64Regs:$s,
7134 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7137 def : Pat<(int_nvvm_sust_b_3d_i32_zero
7138 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7140 (SUST_B_3D_B32_ZERO Int64Regs:$s,
7141 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7144 def : Pat<(int_nvvm_sust_b_3d_i64_zero
7145 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7147 (SUST_B_3D_B64_ZERO Int64Regs:$s,
7148 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7151 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
7152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7153 Int16Regs:$r, Int16Regs:$g),
7154 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
7155 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7156 Int16Regs:$r, Int16Regs:$g)>;
7158 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
7159 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7160 Int16Regs:$r, Int16Regs:$g),
7161 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
7162 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7163 Int16Regs:$r, Int16Regs:$g)>;
7165 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
7166 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7167 Int32Regs:$r, Int32Regs:$g),
7168 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
7169 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7170 Int32Regs:$r, Int32Regs:$g)>;
7172 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
7173 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7174 Int64Regs:$r, Int64Regs:$g),
7175 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
7176 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7177 Int64Regs:$r, Int64Regs:$g)>;
7179 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
7180 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7181 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7182 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7183 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7184 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7186 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7188 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7189 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7190 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7191 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7193 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7194 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7195 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7196 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7197 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7198 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7203 def : Pat<(int_nvvm_sust_p_1d_i8_trap
7204 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7205 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7207 def : Pat<(int_nvvm_sust_p_1d_i16_trap
7208 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7209 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7211 def : Pat<(int_nvvm_sust_p_1d_i32_trap
7212 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7213 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7215 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7216 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7217 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7218 Int16Regs:$r, Int16Regs:$g)>;
7220 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7221 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7222 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7223 Int16Regs:$r, Int16Regs:$g)>;
7225 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7226 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7227 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7228 Int32Regs:$r, Int32Regs:$g)>;
7230 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7231 Int64Regs:$s, Int32Regs:$x,
7232 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7233 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7234 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7236 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7237 Int64Regs:$s, Int32Regs:$x,
7238 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7239 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7240 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7242 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7243 Int64Regs:$s, Int32Regs:$x,
7244 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7245 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7246 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7250 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7251 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7252 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7255 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7256 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7257 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7260 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7261 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7262 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7265 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7266 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7267 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7268 Int16Regs:$r, Int16Regs:$g)>;
7270 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7271 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7272 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7273 Int16Regs:$r, Int16Regs:$g)>;
7275 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7276 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7277 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7278 Int32Regs:$r, Int32Regs:$g)>;
7280 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7281 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7282 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7283 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7284 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7286 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7287 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7288 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7289 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7290 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7292 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7293 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7294 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7295 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7296 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7300 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7301 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7302 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7305 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7306 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7307 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7310 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7311 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7312 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7315 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7316 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7317 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7318 Int16Regs:$r, Int16Regs:$g)>;
7320 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7321 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7322 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7323 Int16Regs:$r, Int16Regs:$g)>;
7325 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7326 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7327 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7328 Int32Regs:$r, Int32Regs:$g)>;
7330 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7331 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7332 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7333 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7334 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7336 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7337 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7338 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7339 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7340 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7342 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7343 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7344 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7345 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7346 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7350 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7351 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7352 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7353 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7356 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7357 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7358 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7359 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7362 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7363 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7364 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7365 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7368 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7369 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7370 Int16Regs:$r, Int16Regs:$g),
7371 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7372 Int32Regs:$x, Int32Regs:$y,
7373 Int16Regs:$r, Int16Regs:$g)>;
7375 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7376 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7377 Int16Regs:$r, Int16Regs:$g),
7378 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7379 Int32Regs:$x, Int32Regs:$y,
7380 Int16Regs:$r, Int16Regs:$g)>;
7382 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7383 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7385 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7386 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7388 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7389 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7390 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7391 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7392 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7393 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7395 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7396 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7397 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7398 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7399 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7400 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7402 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7403 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7404 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7405 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7406 Int32Regs:$x, Int32Regs:$y,
7407 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7411 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7412 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7414 (SUST_P_3D_B8_TRAP Int64Regs:$s,
7415 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7418 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7419 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7421 (SUST_P_3D_B16_TRAP Int64Regs:$s,
7422 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7425 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7426 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7428 (SUST_P_3D_B32_TRAP Int64Regs:$s,
7429 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7432 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7433 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7434 Int16Regs:$r, Int16Regs:$g),
7435 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7436 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7437 Int16Regs:$r, Int16Regs:$g)>;
7439 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7440 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7441 Int16Regs:$r, Int16Regs:$g),
7442 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7443 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7444 Int16Regs:$r, Int16Regs:$g)>;
7446 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7447 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7448 Int32Regs:$r, Int32Regs:$g),
7449 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7450 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7451 Int32Regs:$r, Int32Regs:$g)>;
7453 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7454 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7455 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7456 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7457 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7458 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7460 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7461 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7462 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7463 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7464 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7465 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7467 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7468 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7469 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7470 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7471 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7472 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7474 //-----------------------------------
7475 // Read Special Registers
7476 //-----------------------------------
7478 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7479 : NVPTXInst<(outs Int64Regs:$d), (ins),
7480 !strconcat("mov.u64 \t$d, %", regname, ";"),
7481 [(set Int64Regs:$d, (intop))]>;
7483 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7484 : NVPTXInst<(outs Int32Regs:$d), (ins),
7485 !strconcat("mov.u32 \t$d, %", regname, ";"),
7486 [(set Int32Regs:$d, (intop))]>;
7488 // TODO Add read vector-version of special registers
7490 def INT_PTX_SREG_TID_X :
7491 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7492 def INT_PTX_SREG_TID_Y :
7493 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7494 def INT_PTX_SREG_TID_Z :
7495 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7496 def INT_PTX_SREG_TID_W :
7497 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7499 def INT_PTX_SREG_NTID_X :
7500 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7501 def INT_PTX_SREG_NTID_Y :
7502 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7503 def INT_PTX_SREG_NTID_Z :
7504 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7505 def INT_PTX_SREG_NTID_W :
7506 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7508 def INT_PTX_SREG_LANEID :
7509 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7510 def INT_PTX_SREG_WARPID :
7511 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7512 def INT_PTX_SREG_NWARPID :
7513 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7515 def INT_PTX_SREG_CTAID_X :
7516 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7517 def INT_PTX_SREG_CTAID_Y :
7518 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7519 def INT_PTX_SREG_CTAID_Z :
7520 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7521 def INT_PTX_SREG_CTAID_W :
7522 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7524 def INT_PTX_SREG_NCTAID_X :
7525 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7526 def INT_PTX_SREG_NCTAID_Y :
7527 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7528 def INT_PTX_SREG_NCTAID_Z :
7529 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7530 def INT_PTX_SREG_NCTAID_W :
7531 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7533 def INT_PTX_SREG_SMID :
7534 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7535 def INT_PTX_SREG_NSMID :
7536 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7537 def INT_PTX_SREG_GRIDID :
7538 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7540 def INT_PTX_SREG_LANEMASK_EQ :
7541 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7542 def INT_PTX_SREG_LANEMASK_LE :
7543 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7544 def INT_PTX_SREG_LANEMASK_LT :
7545 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7546 def INT_PTX_SREG_LANEMASK_GE :
7547 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7548 def INT_PTX_SREG_LANEMASK_GT :
7549 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7551 def INT_PTX_SREG_CLOCK :
7552 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7553 def INT_PTX_SREG_CLOCK64 :
7554 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7556 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7557 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7558 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7559 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7561 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7562 // handle the constant.
7563 def INT_PTX_SREG_WARPSIZE :
7564 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7565 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7567 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7568 // In addition to target-independent fields provided by WMMA_REGS, it adds
7569 // the fields commonly used to implement specific PTX instruction -- register
7570 // types and names, constraints, parts of assembly, etc.
7571 class WMMA_REGINFO<WMMA_REGS r, string op>
7572 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7573 // NVPTX register types used to carry fragment data.
7574 NVPTXRegClass regclass = !cond(
7575 !eq(ptx_elt_type, "f16") : Float16x2Regs,
7576 !eq(ptx_elt_type, "f32") : Float32Regs,
7577 !eq(ptx_elt_type, "f64") : Float64Regs,
7578 !eq(ptx_elt_type, "bf16") : Int32Regs,
7579 !eq(ptx_elt_type, "tf32") : Int32Regs,
7580 !eq(ptx_elt_type, "s32") : Int32Regs,
7581 !eq(ptx_elt_type, "b16") : Int32Regs,
7582 !eq(ptx_elt_type, "s8") : Int32Regs,
7583 !eq(ptx_elt_type, "u8") : Int32Regs,
7584 !eq(ptx_elt_type, "s4") : Int32Regs,
7585 !eq(ptx_elt_type, "u4") : Int32Regs,
7586 !eq(ptx_elt_type, "b1") : Int32Regs);
7588 // Instruction input/output arguments for the fragment.
7589 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
7591 // List of register names for the fragment -- ["ra0", "ra1",...]
7592 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7594 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7595 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
7597 // Predicates for particular fragment variant. Technically those are
7598 // per-instruction predicates, but currently all fragments that can be used in
7599 // a given instruction are subject to the same constraints, so an instruction
7600 // can use predicates from any of its fragments. If/when this is no
7601 // longer the case, we can concat all per-fragment predicates to enforce that
7602 // all fragments of the instruction are viable.
7603 list<Predicate> Predicates = !cond(
7604 // fp16 -> fp16/fp32 @ m16n16k16
7605 !and(!eq(geom, "m16n16k16"),
7606 !or(!eq(ptx_elt_type, "f16"),
7607 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7609 !and(!eq(geom,"m8n8k4"),
7610 !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
7612 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7613 !and(!or(!eq(geom, "m8n32k16"),
7614 !eq(geom, "m32n8k16")),
7615 !or(!eq(ptx_elt_type, "f16"),
7616 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7618 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7619 !and(!or(!eq(geom,"m16n16k16"),
7620 !eq(geom,"m8n32k16"),
7621 !eq(geom,"m32n8k16")),
7622 !or(!eq(ptx_elt_type, "u8"),
7623 !eq(ptx_elt_type, "s8"),
7624 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7626 !and(!or(!eq(geom,"m16n16k16"),
7627 !eq(geom,"m8n32k16"),
7628 !eq(geom,"m32n8k16")),
7629 !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
7631 !and(!eq(geom,"m16n16k8"),
7632 !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
7634 !and(!eq(geom,"m16n16k8"),
7635 !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
7637 // b1 -> s32 @ m8n8k128(b1)
7639 !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
7641 // u4/s4 -> s32 @ m8n8k32 (u4/s4)
7643 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
7645 !or(!eq(geom,"m16n8k8"),
7646 !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
7648 !and(!ne(ptx_elt_type,"f64"),
7649 !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
7651 // mma m8n8k32 requires higher PTX version
7653 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
7655 !and(!eq(ptx_elt_type,"f64"),
7656 !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
7659 !or(!eq(geom, "m16n8k16"),
7660 !eq(geom, "m16n8k4"),
7661 !eq(geom, "m16n8k32"),
7662 !eq(geom, "m16n8k64"),
7663 !eq(geom, "m8n8k128"),
7664 !eq(geom, "m16n8k128"),
7665 !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
7667 !and(!eq(op,"ldmatrix"),
7668 !eq(ptx_elt_type,"b16"),
7669 !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
7671 // template DAGs for instruction inputs/output.
7672 dag Outs = !dag(outs, ptx_regs, reg_names);
7673 dag Ins = !dag(ins, ptx_regs, reg_names);
7676 // Convert dag of arguments into a dag to match given intrinsic.
7677 class BuildPatternI<Intrinsic Intr, dag Ins> {
7678 // Build a dag pattern that matches the intrinsic call.
7679 dag ret = !foreach(tmp, Ins,
7680 !subst(imem, ADDRvar,
7681 !subst(MEMri64, ADDRri64,
7682 !subst(MEMri, ADDRri,
7683 !subst(ins, Intr, tmp)))));
7686 // Same as above, but uses PatFrag instead of an Intrinsic.
7687 class BuildPatternPF<PatFrag Intr, dag Ins> {
7688 // Build a dag pattern that matches the intrinsic call.
7689 dag ret = !foreach(tmp, Ins,
7690 !subst(imem, ADDRvar,
7691 !subst(MEMri64, ADDRri64,
7692 !subst(MEMri, ADDRri,
7693 !subst(ins, Intr, tmp)))));
7696 // Common WMMA-related fields used for building patterns for all MMA instructions.
7697 class WMMA_INSTR<string _Intr, list<dag> _Args>
7698 : NVPTXInst<(outs), (ins), "?", []> {
7699 Intrinsic Intr = !cast<Intrinsic>(_Intr);
7700 // Concatenate all arguments into a single dag.
7701 dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7702 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7703 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7707 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7710 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7712 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7713 [!con((ins SrcOp:$src),
7714 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7715 Requires<Frag.Predicates> {
7716 // Load/store intrinsics are overloaded on pointer's address space.
7717 // To match the right intrinsic, we need to build AS-constrained PatFrag.
7718 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7719 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7720 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
7721 // Build PatFrag that only matches particular address space.
7722 PatFrag IntrFrag = PatFrag<PFOperands,
7724 !cond(!eq(Space, ".shared"): AS_match.shared,
7725 !eq(Space, ".global"): AS_match.global,
7726 true: AS_match.generic)>;
7727 // Build AS-constrained pattern.
7728 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7730 let OutOperandList = Frag.Outs;
7731 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7732 let AsmString = "wmma.load."
7739 # "." # Frag.ptx_elt_type # " \t"
7742 # !if(WithStride, ", $ldm", "")
7747 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7749 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7750 bit WithStride, DAGOperand DstOp>
7751 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7752 [!con((ins DstOp:$dst),
7754 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7755 Requires<Frag.Predicates> {
7757 // Load/store intrinsics are overloaded on pointer's address space.
7758 // To match the right intrinsic, we need to build AS-constrained PatFrag.
7759 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7760 dag PFOperands = !con((ops node:$dst),
7761 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
7762 !if(WithStride, (ops node:$ldm), (ops)));
7763 // Build PatFrag that only matches particular address space.
7764 PatFrag IntrFrag = PatFrag<PFOperands,
7765 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7766 !cond(!eq(Space, ".shared"): AS_match.shared,
7767 !eq(Space, ".global"): AS_match.global,
7768 true: AS_match.generic)>;
7769 // Build AS-constrained pattern.
7770 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7772 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7773 let OutOperandList = (outs);
7774 let AsmString = "wmma.store.d.sync"
7779 # "." # Frag.ptx_elt_type
7782 # !if(WithStride, ", $ldm", "")
7786 // Create all load/store variants
7787 defset list<WMMA_INSTR> MMA_LDSTs = {
7788 foreach layout = ["row", "col"] in {
7789 foreach stride = [false, true] in {
7790 foreach space = [".global", ".shared", ""] in {
7791 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7792 foreach frag = NVVM_MMA_OPS.all_ld_ops in
7793 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7794 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
7795 foreach frag = NVVM_MMA_OPS.all_st_ops in
7796 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7797 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
7804 // B1 instruction variants need extra constraints.
7805 class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
7807 WMMA_REGINFO Frag = FragA;
7808 list<Predicate> ret = !listconcat(
7810 !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
7814 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7815 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7816 string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
7817 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
7818 [FragA.Ins, FragB.Ins, FragC.Ins]>,
7819 // Requires does not seem to have effect on Instruction w/o Patterns.
7820 // We set it here anyways and propagate to the Pat<> we construct below.
7821 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7822 let OutOperandList = FragD.Outs;
7823 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7824 string TypeList = !cond(
7825 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
7826 # "." # FragC.ptx_elt_type,
7827 1: "." # FragD.ptx_elt_type
7828 # "." # FragA.ptx_elt_type
7829 # "." # FragB.ptx_elt_type
7830 # "." # FragC.ptx_elt_type,
7832 let AsmString = "wmma.mma"
7839 # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
7841 # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7842 # FragD.regstring # ",\n\t\t"
7843 # FragA.regstring # ",\n\t\t"
7844 # FragB.regstring # ",\n\t\t"
7845 # FragC.regstring # ";";
7848 defset list<WMMA_INSTR> WMMAs = {
7849 foreach layout_a = ["row", "col"] in {
7850 foreach layout_b = ["row", "col"] in {
7851 foreach satf = [0, 1] in {
7852 foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
7853 foreach op = NVVM_MMA_OPS.all_wmma_ops in {
7854 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7855 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
7856 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
7857 WMMA_REGINFO<op[1], "wmma.mma">,
7858 WMMA_REGINFO<op[2], "wmma.mma">,
7859 WMMA_REGINFO<op[3], "wmma.mma">,
7860 layout_a, layout_b, satf, rnd, b1op>;
7871 class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7872 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7873 string ALayout, string BLayout, int Satfinite, string b1op>
7874 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
7875 [FragA.Ins, FragB.Ins, FragC.Ins]>,
7876 // Requires does not seem to have effect on Instruction w/o Patterns.
7877 // We set it here anyways and propagate to the Pat<> we construct below.
7878 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7879 let OutOperandList = FragD.Outs;
7880 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7881 string TypeList = "." # FragD.ptx_elt_type
7882 # "." # FragA.ptx_elt_type
7883 # "." # FragB.ptx_elt_type
7884 # "." # FragC.ptx_elt_type;
7885 let AsmString = "mma.sync.aligned."
7889 # !if(Satfinite, ".satfinite", "")
7892 # FragD.regstring # ",\n\t\t"
7893 # FragA.regstring # ",\n\t\t"
7894 # FragB.regstring # ",\n\t\t"
7895 # FragC.regstring # ";";
7898 defset list<WMMA_INSTR> MMAs = {
7899 foreach layout_a = ["row", "col"] in {
7900 foreach layout_b = ["row", "col"] in {
7901 foreach satf = [0, 1] in {
7902 foreach op = NVVM_MMA_OPS.all_mma_ops in {
7903 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7904 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
7905 def : MMA<WMMA_REGINFO<op[0], "mma">,
7906 WMMA_REGINFO<op[1], "mma">,
7907 WMMA_REGINFO<op[2], "mma">,
7908 WMMA_REGINFO<op[3], "mma">,
7909 layout_a, layout_b, satf, b1op>;
7919 // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
7921 class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
7923 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
7924 Requires<Frag.Predicates> {
7925 // Build PatFrag that only matches particular address space.
7926 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
7927 !cond(!eq(Space, ".shared"): AS_match.shared,
7928 true: AS_match.generic)>;
7929 // Build AS-constrained pattern.
7930 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7932 let OutOperandList = Frag.Outs;
7933 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7934 let AsmString = "ldmatrix.sync.aligned."
7937 # !if(Transposed, ".trans", "")
7939 # "." # Frag.ptx_elt_type
7940 # " " # Frag.regstring # ", [$src];";
7943 // Create all ldmatrix variants
7944 defset list<WMMA_INSTR> LDMATRIXs = {
7945 foreach transposed = [false, true] in {
7946 foreach space = [".shared", ""] in {
7947 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7948 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
7949 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
7950 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
7957 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7958 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7959 // the instruction record.
7960 class MMA_PAT<WMMA_INSTR wi>
7961 : Pat<wi.IntrinsicPattern,
7962 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7964 Requires<wi.Predicates>;
7966 // Build intrinsic->instruction patterns for all MMA instructions.
7967 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in