1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10 float f = (float)N->getValueAPF().convertToFloat();
14 def immFloat1 : PatLeaf<(fpimm), [{
15 float f = (float)N->getValueAPF().convertToFloat();
19 def immDouble0 : PatLeaf<(fpimm), [{
20 double d = (double)N->getValueAPF().convertToDouble();
24 def immDouble1 : PatLeaf<(fpimm), [{
25 double d = (double)N->getValueAPF().convertToDouble();
31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
41 // A node that will be replaced with the current PTX version.
43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
46 // (i32 0) will be XForm'ed to the currently used PTX version.
47 dag version = (PTXVerXform (i32 0));
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54 list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
55 [prefix # !add(n, -1)]),
59 //-----------------------------------
60 // Synchronization and shuffle functions
61 //-----------------------------------
62 let isConvergent = 1 in {
63 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
65 [(int_nvvm_barrier0)]>;
66 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
68 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
69 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
70 "bar.sync \t$src1, $src2;",
71 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
72 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
74 ".reg .pred \t%p1; \n\t",
75 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
76 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
78 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
79 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
81 ".reg .pred \t%p1; \n\t",
82 ".reg .pred \t%p2; \n\t",
83 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
84 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
85 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
87 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
88 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
90 ".reg .pred \t%p1; \n\t",
91 ".reg .pred \t%p2; \n\t",
92 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
93 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
94 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
96 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
98 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
99 [(int_nvvm_bar_sync imm:$i)]>;
101 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
102 [(int_nvvm_bar_warp_sync imm:$i)]>,
103 Requires<[hasPTX60, hasSM30]>;
104 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
105 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
106 Requires<[hasPTX60, hasSM30]>;
108 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
109 [(int_nvvm_barrier_sync imm:$i)]>,
110 Requires<[hasPTX60, hasSM30]>;
111 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
112 [(int_nvvm_barrier_sync Int32Regs:$i)]>,
113 Requires<[hasPTX60, hasSM30]>;
115 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
116 "barrier.sync \t$id, $cnt;",
117 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
118 Requires<[hasPTX60, hasSM30]>;
119 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
120 "barrier.sync \t$id, $cnt;",
121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
122 Requires<[hasPTX60, hasSM30]>;
123 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
124 "barrier.sync \t$id, $cnt;",
125 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
126 Requires<[hasPTX60, hasSM30]>;
127 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
128 "barrier.sync \t$id, $cnt;",
129 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
130 Requires<[hasPTX60, hasSM30]>;
133 // shfl.{up,down,bfly,idx}.b32
134 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
135 // The last two parameters to shfl can be regs or imms. ptxas is smart
136 // enough to inline constant registers, so strictly speaking we don't need to
137 // handle immediates here. But it's easy enough, and it makes our ptx more
140 (outs regclass:$dst),
141 (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
142 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
143 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
145 def imm1 : NVPTXInst<
146 (outs regclass:$dst),
147 (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
148 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
149 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
151 def imm2 : NVPTXInst<
152 (outs regclass:$dst),
153 (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
154 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
155 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
157 def imm3 : NVPTXInst<
158 (outs regclass:$dst),
159 (ins regclass:$src, i32imm:$offset, i32imm:$mask),
160 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
161 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
164 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
165 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
166 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
167 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
168 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
169 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
170 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
171 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
173 multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
174 // Threadmask and the last two parameters to shfl.sync can be regs or imms.
175 // ptxas is smart enough to inline constant registers, so strictly speaking we
176 // don't need to handle immediates here. But it's easy enough, and it makes
177 // our ptx more readable.
179 (outs regclass:$dst),
180 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
181 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
182 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
183 Int32Regs:$offset, Int32Regs:$mask))]>;
186 (outs regclass:$dst),
187 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
188 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
189 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
190 Int32Regs:$offset, imm:$mask))]>;
193 (outs regclass:$dst),
194 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
195 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
196 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
197 imm:$offset, Int32Regs:$mask))]>;
200 (outs regclass:$dst),
201 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
202 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
203 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
204 imm:$offset, imm:$mask))]>;
207 (outs regclass:$dst),
208 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
209 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
210 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
211 Int32Regs:$offset, Int32Regs:$mask))]>;
214 (outs regclass:$dst),
215 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
216 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
217 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
218 Int32Regs:$offset, imm:$mask))]>;
221 (outs regclass:$dst),
222 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
223 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
224 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
225 imm:$offset, Int32Regs:$mask))]>;
228 (outs regclass:$dst),
229 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
230 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
231 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
232 imm:$offset, imm:$mask))]>;
235 // On sm_70 these don't have to be convergent, so we may eventually want to
236 // implement non-convergent variant of this intrinsic.
237 defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
238 defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
239 defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
240 defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
241 defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
242 defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
243 defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
244 defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
247 // vote.{all,any,uni,ballot}
248 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
249 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
250 "vote." # mode # " \t$dest, $pred;",
251 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
252 Requires<[hasPTX60, hasSM30]>;
255 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
256 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
257 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
258 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
260 // vote.sync.{all,any,uni,ballot}
261 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
262 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
263 "vote.sync." # mode # " \t$dest, $pred, $mask;",
264 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
265 Requires<[hasPTX60, hasSM30]>;
266 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
267 "vote.sync." # mode #" \t$dest, $pred, $mask;",
268 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
269 Requires<[hasPTX60, hasSM30]>;
272 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
273 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
274 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
275 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
277 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
279 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
280 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
281 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
282 Requires<[hasPTX60, hasSM70]>;
283 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
284 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
285 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
286 Requires<[hasPTX60, hasSM70]>;
287 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
288 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
289 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
290 Requires<[hasPTX60, hasSM70]>;
291 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
292 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
293 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
294 Requires<[hasPTX60, hasSM70]>;
297 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
299 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
302 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
304 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
305 (ins i32imm:$mask, ImmOp:$value),
306 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
307 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
308 Requires<[hasPTX60, hasSM70]>;
309 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
310 (ins Int32Regs:$mask, ImmOp:$value),
311 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
312 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
313 Requires<[hasPTX60, hasSM70]>;
314 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
315 (ins i32imm:$mask, regclass:$value),
316 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
317 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
318 Requires<[hasPTX60, hasSM70]>;
319 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
320 (ins Int32Regs:$mask, regclass:$value),
321 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
322 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
323 Requires<[hasPTX60, hasSM70]>;
325 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
327 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
330 } // isConvergent = 1
332 //-----------------------------------
333 // Explicit Memory Fence Functions
334 //-----------------------------------
335 class MEMBAR<string StrOp, Intrinsic IntOP> :
336 NVPTXInst<(outs), (ins),
339 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
340 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
341 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
344 //-----------------------------------
346 //-----------------------------------
348 // Map min(1.0, max(0.0, x)) to sat(x)
349 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
351 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
352 // Same story for fmax, fmin.
354 def : Pat<(int_nvvm_fmin_f immFloat1,
355 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
356 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
357 def : Pat<(int_nvvm_fmin_f immFloat1,
358 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
359 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
360 def : Pat<(int_nvvm_fmin_f
361 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
362 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
363 def : Pat<(int_nvvm_fmin_f
364 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
365 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
367 def : Pat<(int_nvvm_fmin_d immDouble1,
368 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
369 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
370 def : Pat<(int_nvvm_fmin_d immDouble1,
371 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
372 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
373 def : Pat<(int_nvvm_fmin_d
374 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
375 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
376 def : Pat<(int_nvvm_fmin_d
377 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
378 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
381 // We need a full string for OpcStr here because we need to deal with case like
383 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
384 NVPTXRegClass src_regclass, Intrinsic IntOP>
385 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
387 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
389 // We need a full string for OpcStr here because we need to deal with the case
390 // like INT_PTX_NATIVE_POWR_F.
391 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
392 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
393 : NVPTXInst<(outs t_regclass:$dst),
394 (ins s0_regclass:$src0, s1_regclass:$src1),
396 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
398 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
399 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
400 NVPTXRegClass s2_regclass, Intrinsic IntOP>
401 : NVPTXInst<(outs t_regclass:$dst),
402 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
404 [(set t_regclass:$dst,
405 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
411 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
412 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
418 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
419 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
420 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
421 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
423 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
424 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
425 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
426 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
428 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
429 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
430 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
431 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
438 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
439 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
440 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
441 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
443 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
444 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
445 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
446 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
448 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
449 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
450 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
451 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
452 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
453 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
454 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
455 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
456 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
457 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
458 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
459 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
460 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
461 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
462 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
463 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
465 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
466 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
467 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
468 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
469 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
470 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
471 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
472 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
474 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
475 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
476 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
477 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
483 def INT_NVVM_DIV_APPROX_FTZ_F
484 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
485 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
486 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
487 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
489 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
490 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
491 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
492 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
493 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
494 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
495 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
496 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
497 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
498 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
499 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
500 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
501 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
502 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
503 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
504 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
506 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
507 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
508 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
509 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
510 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
511 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
512 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
513 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
519 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
520 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
521 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
522 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
528 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
529 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
530 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
531 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
532 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
533 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
535 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
536 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
537 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
538 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
539 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
540 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
546 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
547 Float32Regs, int_nvvm_fabs_ftz_f>;
548 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
549 Float32Regs, int_nvvm_fabs_f>;
551 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
552 Float64Regs, int_nvvm_fabs_d>;
558 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
559 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
560 def : Pat<(int_nvvm_round_f Float32Regs:$a),
561 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
562 def : Pat<(int_nvvm_round_d Float64Regs:$a),
563 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
569 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
570 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
571 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
572 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
573 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
574 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
580 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
581 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
582 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
583 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
584 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
585 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
591 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
592 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
593 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
594 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
595 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
596 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
598 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
599 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
600 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
601 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
602 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
603 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
609 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
610 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
611 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
612 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
614 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
615 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
616 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
617 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
623 def INT_NVVM_FMA_RN_FTZ_F
624 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
626 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
627 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
628 def INT_NVVM_FMA_RZ_FTZ_F
629 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
630 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
631 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
632 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
633 def INT_NVVM_FMA_RM_FTZ_F
634 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
635 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
636 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
637 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
638 def INT_NVVM_FMA_RP_FTZ_F
639 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
640 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
641 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
642 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
644 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
645 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
646 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
647 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
648 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
649 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
650 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
651 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
657 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
658 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
659 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
660 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
661 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
662 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
663 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
664 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
665 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
666 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
667 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
668 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
669 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
670 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
671 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
672 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
674 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
675 Float64Regs, int_nvvm_rcp_rn_d>;
676 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
677 Float64Regs, int_nvvm_rcp_rz_d>;
678 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
679 Float64Regs, int_nvvm_rcp_rm_d>;
680 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
681 Float64Regs, int_nvvm_rcp_rp_d>;
683 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
684 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
690 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
691 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
692 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
693 Float32Regs, int_nvvm_sqrt_rn_f>;
694 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
695 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
696 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
697 Float32Regs, int_nvvm_sqrt_rz_f>;
698 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
699 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
700 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
701 Float32Regs, int_nvvm_sqrt_rm_f>;
702 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
703 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
704 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
705 Float32Regs, int_nvvm_sqrt_rp_f>;
706 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
707 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
708 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
709 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
711 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
712 Float64Regs, int_nvvm_sqrt_rn_d>;
713 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
714 Float64Regs, int_nvvm_sqrt_rz_d>;
715 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
716 Float64Regs, int_nvvm_sqrt_rm_d>;
717 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
718 Float64Regs, int_nvvm_sqrt_rp_d>;
720 // nvvm_sqrt intrinsic
721 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
722 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
723 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
724 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
725 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
726 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
727 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
728 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
734 def INT_NVVM_RSQRT_APPROX_FTZ_F
735 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
736 int_nvvm_rsqrt_approx_ftz_f>;
737 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
738 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
739 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
740 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
746 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
747 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
748 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
749 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
750 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
751 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
752 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
753 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
754 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
755 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
756 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
757 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
758 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
759 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
760 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
761 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
763 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
764 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
765 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
766 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
767 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
768 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
769 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
770 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
776 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
777 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
778 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
779 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
780 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
781 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
782 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
783 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
784 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
785 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
786 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
787 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
788 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
789 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
790 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
791 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
793 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
794 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
795 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
796 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
797 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
798 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
799 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
800 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
802 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
803 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
804 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
805 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
806 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
807 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
808 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
809 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
811 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
812 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
813 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
814 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
815 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
816 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
817 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
818 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
820 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
821 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
822 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
823 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
824 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
825 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
826 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
827 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
829 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
830 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
831 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
832 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
833 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
834 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
835 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
836 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
837 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
838 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
839 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
840 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
841 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
842 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
843 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
844 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
846 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
847 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
848 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
849 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
850 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
851 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
852 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
853 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
854 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
855 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
856 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
857 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
858 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
859 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
860 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
861 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
863 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
864 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
865 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
866 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
867 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
868 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
869 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
870 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
872 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
873 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
874 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
875 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
876 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
877 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
878 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
879 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
881 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
882 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
884 def INT_NVVM_D2I_LO : F_MATH_1<
886 ".reg .b32 %temp; \n\t",
887 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
889 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
890 def INT_NVVM_D2I_HI : F_MATH_1<
892 ".reg .b32 %temp; \n\t",
893 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
895 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
897 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
898 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
899 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
900 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
901 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
902 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
903 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
904 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
905 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
906 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
907 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
908 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
909 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
910 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
911 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
912 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
914 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
915 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
916 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
917 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
918 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
919 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
920 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
921 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
922 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
923 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
924 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
925 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
926 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
927 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
928 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
929 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
931 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
932 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
933 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
934 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
935 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
936 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
937 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
938 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
940 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
941 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
942 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
943 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
944 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
945 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
946 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
947 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
949 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
950 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
951 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
952 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
953 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
954 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
955 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
956 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
958 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
959 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
960 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
961 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
962 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
963 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
964 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
965 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
967 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
968 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
969 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
970 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
971 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
972 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
973 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
974 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
976 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
977 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
978 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
979 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
980 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
981 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
982 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
983 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
986 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
987 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
988 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
989 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
995 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
996 Float32Regs, int_nvvm_bitcast_f2i>;
997 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
998 Int32Regs, int_nvvm_bitcast_i2f>;
1000 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1001 Int64Regs, int_nvvm_bitcast_ll2d>;
1002 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1003 Float64Regs, int_nvvm_bitcast_d2ll>;
1009 class INT_FNS_MBO<dag ins, dag Operands>
1010 : NVPTXInst<(outs Int32Regs:$dst), ins,
1011 "fns.b32 \t$dst, $mask, $base, $offset;",
1012 [(set Int32Regs:$dst, Operands )]>,
1013 Requires<[hasPTX60, hasSM30]>;
1015 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1016 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1017 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
1018 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
1019 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
1020 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
1021 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
1022 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
1023 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1024 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1025 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
1026 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
1027 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
1028 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
1029 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
1030 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
1032 //-----------------------------------
1034 //-----------------------------------
1036 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1037 : PatFrag<ops, frag, AS_match.global>;
1038 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1039 : PatFrag<ops, frag, AS_match.shared>;
1040 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1041 : PatFrag<ops, frag, AS_match.generic>;
1043 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1044 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1045 Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1046 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1047 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1048 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1050 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1051 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1052 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1055 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1056 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1057 list<Predicate> Pred = []> {
1058 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1059 IntOp, IMMType, IMM, Pred>;
1060 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1061 IntOp, IMMType, IMM, Pred>;
1064 // has 2 operands, neg the second one
1065 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1066 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1067 Operand IMMType, list<Predicate> Pred> {
1068 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1071 ".reg \t.s", TypeStr, " temp; \n\t",
1072 "neg.s", TypeStr, " \ttemp, $b; \n\t",
1073 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1075 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1078 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1079 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1080 list<Predicate> Pred = []> {
1081 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1082 IntOp, IMMType, Pred> ;
1083 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1084 IntOp, IMMType, Pred> ;
1088 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1089 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1090 Operand IMMType, list<Predicate> Pred> {
1091 def reg : NVPTXInst<(outs regclass:$dst),
1092 (ins ptrclass:$addr, regclass:$b, regclass:$c),
1093 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1094 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1097 def imm1 : NVPTXInst<(outs regclass:$dst),
1098 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1099 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1100 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1103 def imm2 : NVPTXInst<(outs regclass:$dst),
1104 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1105 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1106 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1109 def imm3 : NVPTXInst<(outs regclass:$dst),
1110 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1111 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1112 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1115 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1116 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1117 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1118 IntOp, IMMType, Pred>;
1119 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1120 IntOp, IMMType, Pred>;
1125 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126 (atomic_load_add_32 node:$a, node:$b)>;
1127 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128 (atomic_load_add_32 node:$a, node:$b)>;
1129 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130 (atomic_load_add_32 node:$a, node:$b)>;
1131 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132 (atomic_load_add_64 node:$a, node:$b)>;
1133 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134 (atomic_load_add_64 node:$a, node:$b)>;
1135 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136 (atomic_load_add_64 node:$a, node:$b)>;
1137 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1138 (atomic_load_fadd node:$a, node:$b)>;
1139 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1140 (atomic_load_fadd node:$a, node:$b)>;
1141 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1142 (atomic_load_fadd node:$a, node:$b)>;
1144 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1145 atomic_load_add_32_g, i32imm, imm>;
1146 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1147 atomic_load_add_32_s, i32imm, imm>;
1148 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1149 atomic_load_add_32_gen, i32imm, imm>;
1150 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1151 ".add", atomic_load_add_32_gen, i32imm, imm>;
1153 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1154 atomic_load_add_64_g, i64imm, imm>;
1155 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1156 atomic_load_add_64_s, i64imm, imm>;
1157 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1158 atomic_load_add_64_gen, i64imm, imm>;
1159 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1160 ".add", atomic_load_add_64_gen, i64imm, imm>;
1162 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1163 atomic_load_add_g, f32imm, fpimm>;
1164 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1165 atomic_load_add_s, f32imm, fpimm>;
1166 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1167 atomic_load_add_gen, f32imm, fpimm>;
1169 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1170 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1171 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1172 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1173 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1174 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1178 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1179 (atomic_load_sub_32 node:$a, node:$b)>;
1180 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1181 (atomic_load_sub_32 node:$a, node:$b)>;
1182 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1183 (atomic_load_sub_32 node:$a, node:$b)>;
1184 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1185 (atomic_load_sub_64 node:$a, node:$b)>;
1186 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1187 (atomic_load_sub_64 node:$a, node:$b)>;
1188 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1189 (atomic_load_sub_64 node:$a, node:$b)>;
1191 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1192 atomic_load_sub_32_g, i32imm>;
1193 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1194 atomic_load_sub_64_g, i64imm>;
1195 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1196 atomic_load_sub_32_gen, i32imm>;
1197 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1198 ".add", atomic_load_sub_32_gen, i32imm>;
1199 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1200 atomic_load_sub_32_s, i32imm>;
1201 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1202 atomic_load_sub_64_s, i64imm>;
1203 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1204 atomic_load_sub_64_gen, i64imm>;
1205 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1206 ".add", atomic_load_sub_64_gen, i64imm>;
1210 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1211 (atomic_swap_32 node:$a, node:$b)>;
1212 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1213 (atomic_swap_32 node:$a, node:$b)>;
1214 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1215 (atomic_swap_32 node:$a, node:$b)>;
1216 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1217 (atomic_swap_64 node:$a, node:$b)>;
1218 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1219 (atomic_swap_64 node:$a, node:$b)>;
1220 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1221 (atomic_swap_64 node:$a, node:$b)>;
1223 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1224 atomic_swap_32_g, i32imm, imm>;
1225 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1226 atomic_swap_32_s, i32imm, imm>;
1227 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1228 atomic_swap_32_gen, i32imm, imm>;
1229 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1230 ".exch", atomic_swap_32_gen, i32imm, imm>;
1231 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1232 atomic_swap_64_g, i64imm, imm>;
1233 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1234 atomic_swap_64_s, i64imm, imm>;
1235 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1236 atomic_swap_64_gen, i64imm, imm>;
1237 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1238 ".exch", atomic_swap_64_gen, i64imm, imm>;
1242 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1243 , (atomic_load_max_32 node:$a, node:$b)>;
1244 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1245 (atomic_load_max_32 node:$a, node:$b)>;
1246 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1247 (atomic_load_max_32 node:$a, node:$b)>;
1248 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1249 , (atomic_load_max_64 node:$a, node:$b)>;
1250 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1251 (atomic_load_max_64 node:$a, node:$b)>;
1252 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1253 (atomic_load_max_64 node:$a, node:$b)>;
1254 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1255 (atomic_load_umax_32 node:$a, node:$b)>;
1256 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1257 (atomic_load_umax_32 node:$a, node:$b)>;
1258 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1259 (atomic_load_umax_32 node:$a, node:$b)>;
1260 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1261 (atomic_load_umax_64 node:$a, node:$b)>;
1262 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1263 (atomic_load_umax_64 node:$a, node:$b)>;
1264 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1265 (atomic_load_umax_64 node:$a, node:$b)>;
1267 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1268 ".max", atomic_load_max_32_g, i32imm, imm>;
1269 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1270 ".max", atomic_load_max_32_s, i32imm, imm>;
1271 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1272 atomic_load_max_32_gen, i32imm, imm>;
1273 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1274 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1275 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1276 ".max", atomic_load_max_64_g, i64imm, imm>;
1277 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1278 ".max", atomic_load_max_64_s, i64imm, imm>;
1279 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1280 atomic_load_max_64_gen, i64imm, imm>;
1281 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1282 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1283 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1284 ".max", atomic_load_umax_32_g, i32imm, imm>;
1285 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1286 ".max", atomic_load_umax_32_s, i32imm, imm>;
1287 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1288 atomic_load_umax_32_gen, i32imm, imm>;
1289 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1290 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1291 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1292 ".max", atomic_load_umax_64_g, i64imm, imm>;
1293 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1294 ".max", atomic_load_umax_64_s, i64imm, imm>;
1295 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1296 atomic_load_umax_64_gen, i64imm, imm>;
1297 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1298 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1302 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1303 (atomic_load_min_32 node:$a, node:$b)>;
1304 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1305 (atomic_load_min_32 node:$a, node:$b)>;
1306 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1307 (atomic_load_min_32 node:$a, node:$b)>;
1308 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1309 (atomic_load_min_64 node:$a, node:$b)>;
1310 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1311 (atomic_load_min_64 node:$a, node:$b)>;
1312 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1313 (atomic_load_min_64 node:$a, node:$b)>;
1314 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1315 (atomic_load_umin_32 node:$a, node:$b)>;
1316 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1317 (atomic_load_umin_32 node:$a, node:$b)>;
1318 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1319 (atomic_load_umin_32 node:$a, node:$b)>;
1320 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1321 (atomic_load_umin_64 node:$a, node:$b)>;
1322 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1323 (atomic_load_umin_64 node:$a, node:$b)>;
1324 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1325 (atomic_load_umin_64 node:$a, node:$b)>;
1327 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1328 ".min", atomic_load_min_32_g, i32imm, imm>;
1329 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1330 ".min", atomic_load_min_32_s, i32imm, imm>;
1331 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1332 atomic_load_min_32_gen, i32imm, imm>;
1333 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1334 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1335 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1336 ".min", atomic_load_min_64_g, i64imm, imm>;
1337 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1338 ".min", atomic_load_min_64_s, i64imm, imm>;
1339 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1340 atomic_load_min_64_gen, i64imm, imm>;
1341 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1342 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1343 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1344 ".min", atomic_load_umin_32_g, i32imm, imm>;
1345 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1346 ".min", atomic_load_umin_32_s, i32imm, imm>;
1347 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1348 atomic_load_umin_32_gen, i32imm, imm>;
1349 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1350 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1351 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1352 ".min", atomic_load_umin_64_g, i64imm, imm>;
1353 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1354 ".min", atomic_load_umin_64_s, i64imm, imm>;
1355 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1356 atomic_load_umin_64_gen, i64imm, imm>;
1357 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1358 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1360 // atom_inc atom_dec
1362 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1363 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1364 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1365 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1366 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1367 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1368 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1369 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1370 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1371 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1372 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1373 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1375 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1376 atomic_load_inc_32_g, i32imm, imm>;
1377 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1378 atomic_load_inc_32_s, i32imm, imm>;
1379 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1380 atomic_load_inc_32_gen, i32imm, imm>;
1381 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1382 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1383 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1384 atomic_load_dec_32_g, i32imm, imm>;
1385 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1386 atomic_load_dec_32_s, i32imm, imm>;
1387 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1388 atomic_load_dec_32_gen, i32imm, imm>;
1389 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1390 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1394 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1395 (atomic_load_and_32 node:$a, node:$b)>;
1396 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1397 (atomic_load_and_32 node:$a, node:$b)>;
1398 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1399 (atomic_load_and_32 node:$a, node:$b)>;
1400 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1401 (atomic_load_and_64 node:$a, node:$b)>;
1402 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1403 (atomic_load_and_64 node:$a, node:$b)>;
1404 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1405 (atomic_load_and_64 node:$a, node:$b)>;
1407 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1408 atomic_load_and_32_g, i32imm, imm>;
1409 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1410 atomic_load_and_32_s, i32imm, imm>;
1411 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1412 atomic_load_and_32_gen, i32imm, imm>;
1413 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1414 ".and", atomic_load_and_32_gen, i32imm, imm>;
1415 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1416 atomic_load_and_64_g, i64imm, imm>;
1417 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1418 atomic_load_and_64_s, i64imm, imm>;
1419 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1420 atomic_load_and_64_gen, i64imm, imm>;
1421 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1422 ".and", atomic_load_and_64_gen, i64imm, imm>;
1426 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1427 (atomic_load_or_32 node:$a, node:$b)>;
1428 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1429 (atomic_load_or_32 node:$a, node:$b)>;
1430 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1431 (atomic_load_or_32 node:$a, node:$b)>;
1432 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1433 (atomic_load_or_64 node:$a, node:$b)>;
1434 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1435 (atomic_load_or_64 node:$a, node:$b)>;
1436 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1437 (atomic_load_or_64 node:$a, node:$b)>;
1439 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1440 atomic_load_or_32_g, i32imm, imm>;
1441 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1442 atomic_load_or_32_gen, i32imm, imm>;
1443 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1444 ".or", atomic_load_or_32_gen, i32imm, imm>;
1445 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1446 atomic_load_or_32_s, i32imm, imm>;
1447 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1448 atomic_load_or_64_g, i64imm, imm>;
1449 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1450 atomic_load_or_64_gen, i64imm, imm>;
1451 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1452 ".or", atomic_load_or_64_gen, i64imm, imm>;
1453 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1454 atomic_load_or_64_s, i64imm, imm>;
1458 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1459 (atomic_load_xor_32 node:$a, node:$b)>;
1460 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1461 (atomic_load_xor_32 node:$a, node:$b)>;
1462 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1463 (atomic_load_xor_32 node:$a, node:$b)>;
1464 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1465 (atomic_load_xor_64 node:$a, node:$b)>;
1466 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1467 (atomic_load_xor_64 node:$a, node:$b)>;
1468 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1469 (atomic_load_xor_64 node:$a, node:$b)>;
1471 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1472 atomic_load_xor_32_g, i32imm, imm>;
1473 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1474 atomic_load_xor_32_s, i32imm, imm>;
1475 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1476 atomic_load_xor_32_gen, i32imm, imm>;
1477 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1478 ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1479 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1480 atomic_load_xor_64_g, i64imm, imm>;
1481 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1482 atomic_load_xor_64_s, i64imm, imm>;
1483 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1484 atomic_load_xor_64_gen, i64imm, imm>;
1485 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1486 ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1490 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1491 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1492 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1493 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1494 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1495 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1496 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1497 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1498 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1499 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1500 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1501 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1503 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1504 atomic_cmp_swap_32_g, i32imm>;
1505 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1506 atomic_cmp_swap_32_s, i32imm>;
1507 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1508 atomic_cmp_swap_32_gen, i32imm>;
1509 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1510 ".cas", atomic_cmp_swap_32_gen, i32imm>;
1511 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1512 atomic_cmp_swap_64_g, i64imm>;
1513 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1514 atomic_cmp_swap_64_s, i64imm>;
1515 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1516 atomic_cmp_swap_64_gen, i64imm>;
1517 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1518 ".cas", atomic_cmp_swap_64_gen, i64imm>;
1520 // Support for scoped atomic operations. Matches
1521 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1522 // and converts it into the appropriate instruction.
1523 // NOTE: not all possible combinations are implemented
1524 // 'space' is limited to generic as it's the only one needed to support CUDA.
1525 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1526 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1527 dag ins, dag Operands>
1528 : NVPTXInst<(outs regclass:$result), ins,
1530 [(set regclass:$result, Operands)]>,
1533 // Define instruction variants for all addressing modes.
1534 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1535 NVPTXRegClass regclass, Operand ImmType,
1536 SDNode Imm, ValueType ImmTy,
1537 list<Predicate> Preds> {
1538 let AddedComplexity = 1 in {
1539 def : ATOM23_impl<AsmStr, regclass, Preds,
1540 (ins Int32Regs:$src, regclass:$b),
1541 (Intr Int32Regs:$src, regclass:$b)>;
1542 def : ATOM23_impl<AsmStr, regclass, Preds,
1543 (ins Int64Regs:$src, regclass:$b),
1544 (Intr Int64Regs:$src, regclass:$b)>;
1546 // tablegen can't infer argument types from Intrinsic (though it can
1547 // from Instruction) so we have to enforce specific type on
1548 // immediates via explicit cast to ImmTy.
1549 def : ATOM23_impl<AsmStr, regclass, Preds,
1550 (ins Int32Regs:$src, ImmType:$b),
1551 (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1552 def : ATOM23_impl<AsmStr, regclass, Preds,
1553 (ins Int64Regs:$src, ImmType:$b),
1554 (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1557 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1558 NVPTXRegClass regclass, Operand ImmType,
1559 SDNode Imm, ValueType ImmTy,
1560 list<Predicate> Preds> {
1561 // Variants for register/immediate permutations of $b and $c
1562 let AddedComplexity = 2 in {
1563 def : ATOM23_impl<AsmStr, regclass, Preds,
1564 (ins Int32Regs:$src, regclass:$b, regclass:$c),
1565 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1566 def : ATOM23_impl<AsmStr, regclass, Preds,
1567 (ins Int64Regs:$src, regclass:$b, regclass:$c),
1568 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1570 let AddedComplexity = 1 in {
1571 def : ATOM23_impl<AsmStr, regclass, Preds,
1572 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1573 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1574 def : ATOM23_impl<AsmStr, regclass, Preds,
1575 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1576 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1577 def : ATOM23_impl<AsmStr, regclass, Preds,
1578 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1579 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1580 def : ATOM23_impl<AsmStr, regclass, Preds,
1581 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1582 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1584 def : ATOM23_impl<AsmStr, regclass, Preds,
1585 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1586 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1587 def : ATOM23_impl<AsmStr, regclass, Preds,
1588 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1589 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1592 // Constructs instrinsic name and instruction asm strings.
1593 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1594 string ScopeStr, string SpaceStr,
1595 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1596 ValueType ImmTy, list<Predicate> Preds> {
1597 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1598 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1599 # "." # OpStr # "." # TypeStr
1600 # " \t$result, [$src], $b;",
1602 "int_nvvm_atomic_" # OpStr
1603 # "_" # SpaceStr # "_" # IntTypeStr
1604 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1605 regclass, ImmType, Imm, ImmTy, Preds>;
1607 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1608 string ScopeStr, string SpaceStr,
1609 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1610 ValueType ImmTy, list<Predicate> Preds> {
1611 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1612 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1613 # "." # OpStr # "." # TypeStr
1614 # " \t$result, [$src], $b, $c;",
1616 "int_nvvm_atomic_" # OpStr
1617 # "_" # SpaceStr # "_" # IntTypeStr
1618 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1619 regclass, ImmType, Imm, ImmTy, Preds>;
1622 // Constructs variants for different address spaces.
1623 // For now we only need variants for generic space pointers.
1624 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1625 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1626 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1627 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1628 regclass, ImmType, Imm, ImmTy, Preds>;
1630 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1631 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1632 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1633 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1634 regclass, ImmType, Imm, ImmTy, Preds>;
1637 // Constructs variants for different scopes of atomic op.
1638 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1639 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1640 ValueType ImmTy, list<Predicate> Preds> {
1641 // .gpu scope is default and is currently covered by existing
1642 // atomics w/o explicitly specified scope.
1643 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1644 regclass, ImmType, Imm, ImmTy,
1645 !listconcat(Preds,[hasAtomScope])>;
1646 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1647 regclass, ImmType, Imm, ImmTy,
1648 !listconcat(Preds,[hasAtomScope])>;
1650 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1651 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1652 list<Predicate> Preds> {
1653 // No need to define ".gpu"-scoped atomics. They do the same thing
1654 // as the regular, non-scoped atomics defined elsewhere.
1655 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1656 regclass, ImmType, Imm, ImmTy,
1657 !listconcat(Preds,[hasAtomScope])>;
1658 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1659 regclass, ImmType, Imm, ImmTy,
1660 !listconcat(Preds,[hasAtomScope])>;
1664 multiclass ATOM2_add_impl<string OpStr> {
1665 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1666 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1667 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1668 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1670 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1674 // atom.{and,or,xor}
1675 multiclass ATOM2_bitwise_impl<string OpStr> {
1676 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1677 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1678 [hasAtomBitwise64]>;
1682 multiclass ATOM2_exch_impl<string OpStr> {
1683 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1684 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1688 multiclass ATOM2_minmax_impl<string OpStr> {
1689 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1690 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1691 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1693 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1698 multiclass ATOM2_incdec_impl<string OpStr> {
1699 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1703 multiclass ATOM3_cas_impl<string OpStr> {
1704 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1705 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1708 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1709 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1710 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1711 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1712 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1713 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1714 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1715 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1716 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1717 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1719 //-----------------------------------
1720 // Support for ldu on sm_20 or later
1721 //-----------------------------------
1723 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1724 // read-only in a kernel.
1728 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1729 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1730 !strconcat("ldu.global.", TyStr),
1731 []>, Requires<[hasLDU]>;
1732 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1733 !strconcat("ldu.global.", TyStr),
1734 []>, Requires<[hasLDU]>;
1735 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1736 !strconcat("ldu.global.", TyStr),
1737 []>, Requires<[hasLDU]>;
1738 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1739 !strconcat("ldu.global.", TyStr),
1740 []>, Requires<[hasLDU]>;
1741 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1742 !strconcat("ldu.global.", TyStr),
1743 []>, Requires<[hasLDU]>;
1746 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1747 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1748 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1749 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1750 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1751 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1752 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1753 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1754 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1755 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1759 // Elementized vector ldu
1760 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1761 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1762 (ins Int32Regs:$src),
1763 !strconcat("ldu.global.", TyStr), []>;
1764 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1765 (ins Int64Regs:$src),
1766 !strconcat("ldu.global.", TyStr), []>;
1767 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1769 !strconcat("ldu.global.", TyStr), []>;
1770 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1772 !strconcat("ldu.global.", TyStr), []>;
1773 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1775 !strconcat("ldu.global.", TyStr), []>;
1778 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1779 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1780 regclass:$dst4), (ins Int32Regs:$src),
1781 !strconcat("ldu.global.", TyStr), []>;
1782 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1783 regclass:$dst4), (ins Int64Regs:$src),
1784 !strconcat("ldu.global.", TyStr), []>;
1785 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1786 regclass:$dst4), (ins MEMri:$src),
1787 !strconcat("ldu.global.", TyStr), []>;
1788 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1789 regclass:$dst4), (ins MEMri64:$src),
1790 !strconcat("ldu.global.", TyStr), []>;
1791 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1792 regclass:$dst4), (ins imemAny:$src),
1793 !strconcat("ldu.global.", TyStr), []>;
1796 defm INT_PTX_LDU_G_v2i8_ELE
1797 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1798 defm INT_PTX_LDU_G_v2i16_ELE
1799 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1800 defm INT_PTX_LDU_G_v2i32_ELE
1801 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1802 defm INT_PTX_LDU_G_v2f16_ELE
1803 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1804 defm INT_PTX_LDU_G_v2f16x2_ELE
1805 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1806 defm INT_PTX_LDU_G_v2f32_ELE
1807 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1808 defm INT_PTX_LDU_G_v2i64_ELE
1809 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1810 defm INT_PTX_LDU_G_v2f64_ELE
1811 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1812 defm INT_PTX_LDU_G_v4i8_ELE
1813 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1814 defm INT_PTX_LDU_G_v4i16_ELE
1815 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1817 defm INT_PTX_LDU_G_v4i32_ELE
1818 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1820 defm INT_PTX_LDU_G_v4f16_ELE
1821 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1823 defm INT_PTX_LDU_G_v4f16x2_ELE
1824 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1826 defm INT_PTX_LDU_G_v4f32_ELE
1827 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1831 //-----------------------------------
1832 // Support for ldg on sm_35 or later
1833 //-----------------------------------
1835 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1836 // non-coherent texture cache, and therefore the values read must be read-only
1837 // during the lifetime of the kernel.
1839 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1840 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1841 !strconcat("ld.global.nc.", TyStr),
1842 []>, Requires<[hasLDG]>;
1843 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1844 !strconcat("ld.global.nc.", TyStr),
1845 []>, Requires<[hasLDG]>;
1846 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1847 !strconcat("ld.global.nc.", TyStr),
1848 []>, Requires<[hasLDG]>;
1849 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1850 !strconcat("ld.global.nc.", TyStr),
1851 []>, Requires<[hasLDG]>;
1852 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1853 !strconcat("ld.global.nc.", TyStr),
1854 []>, Requires<[hasLDG]>;
1857 defm INT_PTX_LDG_GLOBAL_i8
1858 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1859 defm INT_PTX_LDG_GLOBAL_i16
1860 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1861 defm INT_PTX_LDG_GLOBAL_i32
1862 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1863 defm INT_PTX_LDG_GLOBAL_i64
1864 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1865 defm INT_PTX_LDG_GLOBAL_f16
1866 : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1867 defm INT_PTX_LDG_GLOBAL_f16x2
1868 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1869 defm INT_PTX_LDG_GLOBAL_f32
1870 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1871 defm INT_PTX_LDG_GLOBAL_f64
1872 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1873 defm INT_PTX_LDG_GLOBAL_p32
1874 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1875 defm INT_PTX_LDG_GLOBAL_p64
1876 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1880 // Elementized vector ldg
1881 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1882 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1883 (ins Int32Regs:$src),
1884 !strconcat("ld.global.nc.", TyStr), []>;
1885 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1886 (ins Int64Regs:$src),
1887 !strconcat("ld.global.nc.", TyStr), []>;
1888 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1890 !strconcat("ld.global.nc.", TyStr), []>;
1891 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1893 !strconcat("ld.global.nc.", TyStr), []>;
1894 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1896 !strconcat("ld.global.nc.", TyStr), []>;
1899 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1900 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1901 regclass:$dst4), (ins Int32Regs:$src),
1902 !strconcat("ld.global.nc.", TyStr), []>;
1903 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1904 regclass:$dst4), (ins Int64Regs:$src),
1905 !strconcat("ld.global.nc.", TyStr), []>;
1906 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1907 regclass:$dst4), (ins MEMri:$src),
1908 !strconcat("ld.global.nc.", TyStr), []>;
1909 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1910 regclass:$dst4), (ins MEMri64:$src),
1911 !strconcat("ld.global.nc.", TyStr), []>;
1912 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1913 regclass:$dst4), (ins imemAny:$src),
1914 !strconcat("ld.global.nc.", TyStr), []>;
1917 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1918 defm INT_PTX_LDG_G_v2i8_ELE
1919 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1920 defm INT_PTX_LDG_G_v2i16_ELE
1921 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1922 defm INT_PTX_LDG_G_v2i32_ELE
1923 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1924 defm INT_PTX_LDG_G_v2f16_ELE
1925 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1926 defm INT_PTX_LDG_G_v2f16x2_ELE
1927 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1928 defm INT_PTX_LDG_G_v2f32_ELE
1929 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1930 defm INT_PTX_LDG_G_v2i64_ELE
1931 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1932 defm INT_PTX_LDG_G_v2f64_ELE
1933 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1934 defm INT_PTX_LDG_G_v4i8_ELE
1935 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1936 defm INT_PTX_LDG_G_v4i16_ELE
1937 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1938 defm INT_PTX_LDG_G_v4i32_ELE
1939 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1940 defm INT_PTX_LDG_G_v4f16_ELE
1941 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1942 defm INT_PTX_LDG_G_v4f16x2_ELE
1943 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1944 defm INT_PTX_LDG_G_v4f32_ELE
1945 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1948 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1949 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1950 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1951 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1952 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1953 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1954 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1955 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1956 "{{ .reg .b64 %tmp;\n\t"
1957 #" cvt.u64.u32 \t%tmp, $src;\n\t"
1958 #" cvta." # Str # ".u64 \t$result, %tmp; }}",
1959 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1960 Requires<[useShortPtr]>;
1963 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1964 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1965 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1966 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1967 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1968 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1969 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1970 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1971 "{{ .reg .b64 %tmp;\n\t"
1972 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1973 #" cvt.u32.u64 \t$result, %tmp; }}",
1974 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1975 Requires<[useShortPtr]>;
1978 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1979 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1980 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1981 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1983 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1984 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1985 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1986 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1989 // nvvm.ptr.gen.to.param
1990 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1991 (ins Int32Regs:$src),
1992 "mov.u32 \t$result, $src;",
1993 [(set Int32Regs:$result,
1994 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1995 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1996 (ins Int64Regs:$src),
1997 "mov.u64 \t$result, $src;",
1998 [(set Int64Regs:$result,
1999 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2002 // nvvm.move intrinsicc
2003 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2004 "mov.b16 \t$r, $s;",
2006 (int_nvvm_move_i16 Int16Regs:$s))]>;
2007 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2008 "mov.b32 \t$r, $s;",
2010 (int_nvvm_move_i32 Int32Regs:$s))]>;
2011 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2012 "mov.b64 \t$r, $s;",
2014 (int_nvvm_move_i64 Int64Regs:$s))]>;
2015 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2016 "mov.f32 \t$r, $s;",
2017 [(set Float32Regs:$r,
2018 (int_nvvm_move_float Float32Regs:$s))]>;
2019 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2020 "mov.f64 \t$r, $s;",
2021 [(set Float64Regs:$r,
2022 (int_nvvm_move_double Float64Regs:$s))]>;
2023 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2024 "mov.u32 \t$r, $s;",
2026 (int_nvvm_move_ptr Int32Regs:$s))]>;
2027 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2028 "mov.u64 \t$r, $s;",
2030 (int_nvvm_move_ptr Int64Regs:$s))]>;
2032 // @TODO: Are these actually needed, or will we always just see symbols
2033 // copied to registers first?
2034 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2035 "mov.u32 \t$r, $s;",
2037 (int_nvvm_move_ptr texternalsym:$s))]>;
2038 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2039 "mov.u64 \t$r, $s;",
2041 (int_nvvm_move_ptr texternalsym:$s))]>;*/
2044 // MoveParam %r1, param
2045 // ptr_local_to_gen %r2, %r1
2046 // ptr_gen_to_local %r3, %r2
2050 // @TODO: Revisit this. There is a type
2051 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2052 // instructions are not currently defined. However, we can use the ptr
2053 // variants and the asm printer will do the right thing.
2054 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2055 (MoveParam texternalsym:$src)))),
2056 (nvvm_move_ptr64 texternalsym:$src)>;
2057 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2058 (MoveParam texternalsym:$src)))),
2059 (nvvm_move_ptr32 texternalsym:$src)>;
2062 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2063 "mov.u64 \t$result, $src;", []>;
2065 //-----------------------------------
2066 // Compiler Error Warn
2067 // - Just ignore them in codegen
2068 //-----------------------------------
2070 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2071 "// llvm.nvvm.compiler.warn()",
2072 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2073 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2074 "// llvm.nvvm.compiler.warn()",
2075 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2076 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2077 "// llvm.nvvm.compiler.error()",
2078 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2079 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2080 "// llvm.nvvm.compiler.error()",
2081 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2086 def ISSPACEP_CONST_32
2087 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2088 "isspacep.const \t$d, $a;",
2089 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2090 Requires<[hasPTX31]>;
2091 def ISSPACEP_CONST_64
2092 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2093 "isspacep.const \t$d, $a;",
2094 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2095 Requires<[hasPTX31]>;
2096 def ISSPACEP_GLOBAL_32
2097 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2098 "isspacep.global \t$d, $a;",
2099 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2100 def ISSPACEP_GLOBAL_64
2101 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2102 "isspacep.global \t$d, $a;",
2103 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2104 def ISSPACEP_LOCAL_32
2105 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2106 "isspacep.local \t$d, $a;",
2107 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2108 def ISSPACEP_LOCAL_64
2109 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2110 "isspacep.local \t$d, $a;",
2111 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2112 def ISSPACEP_SHARED_32
2113 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2114 "isspacep.shared \t$d, $a;",
2115 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2116 def ISSPACEP_SHARED_64
2117 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2118 "isspacep.shared \t$d, $a;",
2119 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2122 // Special register reads
2123 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2124 (ins SpecialRegs:$r),
2125 "mov.b32 \t$d, $r;", []>;
2127 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2128 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2129 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2130 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2131 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2132 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2133 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2134 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2135 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2136 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2137 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2138 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2139 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2140 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2141 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2142 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2143 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2144 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2145 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2146 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2147 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2148 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2149 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2150 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2151 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2152 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2153 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2154 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2155 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2156 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2157 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2158 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2161 // rotate builtin support
2163 def ROTATE_B32_HW_IMM
2164 : NVPTXInst<(outs Int32Regs:$dst),
2165 (ins Int32Regs:$src, i32imm:$amt),
2166 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2167 [(set Int32Regs:$dst,
2168 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2169 Requires<[hasHWROT32]> ;
2171 def ROTATE_B32_HW_REG
2172 : NVPTXInst<(outs Int32Regs:$dst),
2173 (ins Int32Regs:$src, Int32Regs:$amt),
2174 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2175 [(set Int32Regs:$dst,
2176 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2177 Requires<[hasHWROT32]> ;
2179 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2180 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2181 Requires<[noHWROT32]> ;
2183 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2184 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2185 Requires<[noHWROT32]> ;
2187 let hasSideEffects = 0 in {
2188 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2189 !strconcat("{{\n\t",
2190 ".reg .b32 %dummy;\n\t",
2191 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2195 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2196 !strconcat("{{\n\t",
2197 ".reg .b32 %dummy;\n\t",
2198 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2203 let hasSideEffects = 0 in {
2205 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2206 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2209 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2210 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2211 (GET_LO_INT64 Int64Regs:$src))> ;
2213 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2215 let hasSideEffects = 0 in {
2216 def SHF_L_WRAP_B32_IMM
2217 : NVPTXInst<(outs Int32Regs:$dst),
2218 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2219 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2220 Requires<[hasHWROT32]>;
2222 def SHF_L_WRAP_B32_REG
2223 : NVPTXInst<(outs Int32Regs:$dst),
2224 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2225 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2226 Requires<[hasHWROT32]>;
2228 def SHF_R_WRAP_B32_IMM
2229 : NVPTXInst<(outs Int32Regs:$dst),
2230 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2231 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2232 Requires<[hasHWROT32]>;
2234 def SHF_R_WRAP_B32_REG
2235 : NVPTXInst<(outs Int32Regs:$dst),
2236 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2237 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2238 Requires<[hasHWROT32]>;
2241 // HW version of rotate 64
2242 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2244 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2245 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2246 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2247 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2248 Requires<[hasHWROT32]>;
2250 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2252 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2253 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2254 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2255 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2256 Requires<[hasHWROT32]>;
2259 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2261 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2262 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2263 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2264 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2265 Requires<[hasHWROT32]>;
2267 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2269 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2270 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2271 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2272 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2273 Requires<[hasHWROT32]>;
2275 // SW version of rotate 64
2276 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2277 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2278 Requires<[noHWROT32]>;
2279 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2280 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2281 Requires<[noHWROT32]>;
2282 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2283 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2284 Requires<[noHWROT32]>;
2285 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2286 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2287 Requires<[noHWROT32]>;
2290 //-----------------------------------
2291 // Texture Intrinsics
2292 //-----------------------------------
2294 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2295 // also defined in NVPTXReplaceImageHandles.cpp
2297 // texmode_independent
2298 let IsTex = 1, IsTexModeUnified = 0 in {
2299 // Texture fetch instructions using handles
2301 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2302 Float32Regs:$b, Float32Regs:$a),
2303 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2304 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2307 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2308 Float32Regs:$b, Float32Regs:$a),
2309 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2310 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2312 def TEX_1D_F32_F32_LEVEL
2313 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2314 Float32Regs:$b, Float32Regs:$a),
2315 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2316 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2317 "[$t, $s, \\{$x\\}], $lod;",
2319 def TEX_1D_F32_F32_GRAD
2320 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2321 Float32Regs:$b, Float32Regs:$a),
2322 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2323 Float32Regs:$gradx, Float32Regs:$grady),
2324 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2325 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2328 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2329 Int32Regs:$b, Int32Regs:$a),
2330 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2331 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2334 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2335 Int32Regs:$b, Int32Regs:$a),
2336 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2337 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2339 def TEX_1D_S32_F32_LEVEL
2340 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2341 Int32Regs:$b, Int32Regs:$a),
2342 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2344 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2345 "[$t, $s, \\{$x\\}], $lod;",
2347 def TEX_1D_S32_F32_GRAD
2348 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2349 Int32Regs:$b, Int32Regs:$a),
2350 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2351 Float32Regs:$gradx, Float32Regs:$grady),
2352 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2353 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2356 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2357 Int32Regs:$b, Int32Regs:$a),
2358 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2359 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2362 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363 Int32Regs:$b, Int32Regs:$a),
2364 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2365 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2367 def TEX_1D_U32_F32_LEVEL
2368 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2369 Int32Regs:$b, Int32Regs:$a),
2370 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2372 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2373 "[$t, $s, \\{$x\\}], $lod;",
2375 def TEX_1D_U32_F32_GRAD
2376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2377 Int32Regs:$b, Int32Regs:$a),
2378 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2379 Float32Regs:$gradx, Float32Regs:$grady),
2380 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2381 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2384 def TEX_1D_ARRAY_F32_S32
2385 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2386 Float32Regs:$b, Float32Regs:$a),
2387 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2388 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2389 "[$t, $s, \\{$l, $x\\}];",
2391 def TEX_1D_ARRAY_F32_F32
2392 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2393 Float32Regs:$b, Float32Regs:$a),
2394 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2395 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2396 "[$t, $s, \\{$l, $x\\}];",
2398 def TEX_1D_ARRAY_F32_F32_LEVEL
2399 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2400 Float32Regs:$b, Float32Regs:$a),
2401 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2403 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2404 "[$t, $s, \\{$l, $x\\}], $lod;",
2406 def TEX_1D_ARRAY_F32_F32_GRAD
2407 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2408 Float32Regs:$b, Float32Regs:$a),
2409 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2410 Float32Regs:$gradx, Float32Regs:$grady),
2411 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2412 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2414 def TEX_1D_ARRAY_S32_S32
2415 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2416 Int32Regs:$b, Int32Regs:$a),
2417 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2418 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2419 "[$t, $s, \\{$l, $x\\}];",
2421 def TEX_1D_ARRAY_S32_F32
2422 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2423 Int32Regs:$b, Int32Regs:$a),
2424 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2425 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2426 "[$t, $s, \\{$l, $x\\}];",
2428 def TEX_1D_ARRAY_S32_F32_LEVEL
2429 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2430 Int32Regs:$b, Int32Regs:$a),
2431 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2433 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2434 "[$t, $s, \\{$l, $x\\}], $lod;",
2436 def TEX_1D_ARRAY_S32_F32_GRAD
2437 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2438 Int32Regs:$b, Int32Regs:$a),
2439 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2440 Float32Regs:$gradx, Float32Regs:$grady),
2441 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2442 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2444 def TEX_1D_ARRAY_U32_S32
2445 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2446 Int32Regs:$b, Int32Regs:$a),
2447 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2448 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2449 "[$t, $s, \\{$l, $x\\}];",
2451 def TEX_1D_ARRAY_U32_F32
2452 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2453 Int32Regs:$b, Int32Regs:$a),
2454 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2455 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2456 "[$t, $s, \\{$l, $x\\}];",
2458 def TEX_1D_ARRAY_U32_F32_LEVEL
2459 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2460 Int32Regs:$b, Int32Regs:$a),
2461 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2463 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2464 "[$t, $s, \\{$l, $x\\}], $lod;",
2466 def TEX_1D_ARRAY_U32_F32_GRAD
2467 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2468 Int32Regs:$b, Int32Regs:$a),
2469 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2470 Float32Regs:$gradx, Float32Regs:$grady),
2471 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2472 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2476 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2477 Float32Regs:$b, Float32Regs:$a),
2478 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2479 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2480 "[$t, $s, \\{$x, $y\\}];",
2483 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2484 Float32Regs:$b, Float32Regs:$a),
2485 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2486 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2487 "[$t, $s, \\{$x, $y\\}];",
2489 def TEX_2D_F32_F32_LEVEL
2490 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2491 Float32Regs:$b, Float32Regs:$a),
2492 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2494 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2495 "[$t, $s, \\{$x, $y\\}], $lod;",
2497 def TEX_2D_F32_F32_GRAD
2498 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2499 Float32Regs:$b, Float32Regs:$a),
2500 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2501 Float32Regs:$gradx0, Float32Regs:$gradx1,
2502 Float32Regs:$grady0, Float32Regs:$grady1),
2503 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2504 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2505 "\\{$grady0, $grady1\\};",
2508 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2509 Int32Regs:$b, Int32Regs:$a),
2510 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2511 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2512 "[$t, $s, \\{$x, $y\\}];",
2515 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2516 Int32Regs:$b, Int32Regs:$a),
2517 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2518 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2519 "[$t, $s, \\{$x, $y\\}];",
2521 def TEX_2D_S32_F32_LEVEL
2522 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2523 Int32Regs:$b, Int32Regs:$a),
2524 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2526 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2527 "[$t, $s, \\{$x, $y\\}], $lod;",
2529 def TEX_2D_S32_F32_GRAD
2530 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2531 Int32Regs:$b, Int32Regs:$a),
2532 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2533 Float32Regs:$gradx0, Float32Regs:$gradx1,
2534 Float32Regs:$grady0, Float32Regs:$grady1),
2535 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2536 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2537 "\\{$grady0, $grady1\\};",
2540 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2541 Int32Regs:$b, Int32Regs:$a),
2542 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2543 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2544 "[$t, $s, \\{$x, $y\\}];",
2547 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2548 Int32Regs:$b, Int32Regs:$a),
2549 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2550 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2551 "[$t, $s, \\{$x, $y\\}];",
2553 def TEX_2D_U32_F32_LEVEL
2554 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2555 Int32Regs:$b, Int32Regs:$a),
2556 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2558 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2559 "[$t, $s, \\{$x, $y\\}], $lod;",
2561 def TEX_2D_U32_F32_GRAD
2562 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2563 Int32Regs:$b, Int32Regs:$a),
2564 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2565 Float32Regs:$gradx0, Float32Regs:$gradx1,
2566 Float32Regs:$grady0, Float32Regs:$grady1),
2567 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2568 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2569 "\\{$grady0, $grady1\\};",
2572 def TEX_2D_ARRAY_F32_S32
2573 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2574 Float32Regs:$b, Float32Regs:$a),
2575 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2577 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2578 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2580 def TEX_2D_ARRAY_F32_F32
2581 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2582 Float32Regs:$b, Float32Regs:$a),
2583 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2585 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2586 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2588 def TEX_2D_ARRAY_F32_F32_LEVEL
2589 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2590 Float32Regs:$b, Float32Regs:$a),
2591 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2592 Float32Regs:$y, Float32Regs:$lod),
2593 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2594 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2596 def TEX_2D_ARRAY_F32_F32_GRAD
2597 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2598 Float32Regs:$b, Float32Regs:$a),
2599 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2600 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2601 Float32Regs:$grady0, Float32Regs:$grady1),
2602 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2603 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2604 "\\{$grady0, $grady1\\};",
2606 def TEX_2D_ARRAY_S32_S32
2607 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2608 Int32Regs:$b, Int32Regs:$a),
2609 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2611 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2612 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2614 def TEX_2D_ARRAY_S32_F32
2615 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2616 Int32Regs:$b, Int32Regs:$a),
2617 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2619 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2620 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2622 def TEX_2D_ARRAY_S32_F32_LEVEL
2623 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2624 Int32Regs:$b, Int32Regs:$a),
2625 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2626 Float32Regs:$y, Float32Regs:$lod),
2627 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2628 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2630 def TEX_2D_ARRAY_S32_F32_GRAD
2631 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2632 Int32Regs:$b, Int32Regs:$a),
2633 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2635 Float32Regs:$gradx0, Float32Regs:$gradx1,
2636 Float32Regs:$grady0, Float32Regs:$grady1),
2637 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2638 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2639 "\\{$grady0, $grady1\\};",
2641 def TEX_2D_ARRAY_U32_S32
2642 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2643 Int32Regs:$b, Int32Regs:$a),
2644 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2646 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2647 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2649 def TEX_2D_ARRAY_U32_F32
2650 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2651 Int32Regs:$b, Int32Regs:$a),
2652 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2654 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2655 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2657 def TEX_2D_ARRAY_U32_F32_LEVEL
2658 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2659 Int32Regs:$b, Int32Regs:$a),
2660 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2661 Float32Regs:$y, Float32Regs:$lod),
2662 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2663 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2665 def TEX_2D_ARRAY_U32_F32_GRAD
2666 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2667 Int32Regs:$b, Int32Regs:$a),
2668 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2670 Float32Regs:$gradx0, Float32Regs:$gradx1,
2671 Float32Regs:$grady0, Float32Regs:$grady1),
2672 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2673 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2674 "\\{$grady0, $grady1\\};",
2678 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2679 Float32Regs:$b, Float32Regs:$a),
2680 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2682 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2683 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2686 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2687 Float32Regs:$b, Float32Regs:$a),
2688 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2690 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2691 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2693 def TEX_3D_F32_F32_LEVEL
2694 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2695 Float32Regs:$b, Float32Regs:$a),
2696 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2697 Float32Regs:$z, Float32Regs:$lod),
2698 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2699 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2701 def TEX_3D_F32_F32_GRAD
2702 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2703 Float32Regs:$b, Float32Regs:$a),
2704 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2706 Float32Regs:$gradx0, Float32Regs:$gradx1,
2707 Float32Regs:$gradx2, Float32Regs:$grady0,
2708 Float32Regs:$grady1, Float32Regs:$grady2),
2709 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2710 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2711 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2712 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2715 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716 Int32Regs:$b, Int32Regs:$a),
2717 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2719 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2720 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2723 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2724 Int32Regs:$b, Int32Regs:$a),
2725 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2727 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2728 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2730 def TEX_3D_S32_F32_LEVEL
2731 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2732 Int32Regs:$b, Int32Regs:$a),
2733 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2734 Float32Regs:$z, Float32Regs:$lod),
2735 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2736 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2738 def TEX_3D_S32_F32_GRAD
2739 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2740 Int32Regs:$b, Int32Regs:$a),
2741 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2743 Float32Regs:$gradx0, Float32Regs:$gradx1,
2744 Float32Regs:$gradx2, Float32Regs:$grady0,
2745 Float32Regs:$grady1, Float32Regs:$grady2),
2746 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2747 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2748 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2749 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2752 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2753 Int32Regs:$b, Int32Regs:$a),
2754 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2756 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2757 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2760 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2761 Int32Regs:$b, Int32Regs:$a),
2762 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2764 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2765 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2767 def TEX_3D_U32_F32_LEVEL
2768 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2769 Int32Regs:$b, Int32Regs:$a),
2770 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2771 Float32Regs:$z, Float32Regs:$lod),
2772 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2773 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2775 def TEX_3D_U32_F32_GRAD
2776 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2777 Int32Regs:$b, Int32Regs:$a),
2778 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2780 Float32Regs:$gradx0, Float32Regs:$gradx1,
2781 Float32Regs:$gradx2, Float32Regs:$grady0,
2782 Float32Regs:$grady1, Float32Regs:$grady2),
2783 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2784 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2785 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2786 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2789 def TEX_CUBE_F32_F32
2790 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2791 Float32Regs:$b, Float32Regs:$a),
2792 (ins Int64Regs:$t, Int64Regs:$s,
2793 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2794 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2795 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2797 def TEX_CUBE_F32_F32_LEVEL
2798 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2799 Float32Regs:$b, Float32Regs:$a),
2800 (ins Int64Regs:$t, Int64Regs:$s,
2801 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2803 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2804 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2806 def TEX_CUBE_S32_F32
2807 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2808 Int32Regs:$b, Int32Regs:$a),
2809 (ins Int64Regs:$t, Int64Regs:$s,
2810 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2811 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2812 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2814 def TEX_CUBE_S32_F32_LEVEL
2815 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2816 Int32Regs:$b, Int32Regs:$a),
2817 (ins Int64Regs:$t, Int64Regs:$s,
2818 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2820 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2821 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2823 def TEX_CUBE_U32_F32
2824 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2825 Int32Regs:$b, Int32Regs:$a),
2826 (ins Int64Regs:$t, Int64Regs:$s,
2827 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2828 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2829 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2831 def TEX_CUBE_U32_F32_LEVEL
2832 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2833 Int32Regs:$b, Int32Regs:$a),
2834 (ins Int64Regs:$t, Int64Regs:$s,
2835 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2837 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2838 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2841 def TEX_CUBE_ARRAY_F32_F32
2842 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2843 Float32Regs:$b, Float32Regs:$a),
2844 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2845 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2846 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2847 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2849 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2850 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2851 Float32Regs:$b, Float32Regs:$a),
2852 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2853 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2855 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2856 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2858 def TEX_CUBE_ARRAY_S32_F32
2859 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2860 Int32Regs:$b, Int32Regs:$a),
2861 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2862 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2863 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2864 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2866 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2867 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2868 Int32Regs:$b, Int32Regs:$a),
2869 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2870 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2872 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2873 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2875 def TEX_CUBE_ARRAY_U32_F32
2876 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2877 Int32Regs:$b, Int32Regs:$a),
2878 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2879 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2880 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2881 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2883 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2884 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2885 Int32Regs:$b, Int32Regs:$a),
2886 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2887 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2889 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2890 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2893 def TLD4_R_2D_F32_F32
2894 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2895 Float32Regs:$v2, Float32Regs:$v3),
2896 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2897 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2898 "[$t, $s, \\{$x, $y\\}];",
2900 def TLD4_G_2D_F32_F32
2901 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2902 Float32Regs:$v2, Float32Regs:$v3),
2903 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2904 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2905 "[$t, $s, \\{$x, $y\\}];",
2907 def TLD4_B_2D_F32_F32
2908 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2909 Float32Regs:$v2, Float32Regs:$v3),
2910 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2911 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2912 "[$t, $s, \\{$x, $y\\}];",
2914 def TLD4_A_2D_F32_F32
2915 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2916 Float32Regs:$v2, Float32Regs:$v3),
2917 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2918 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2919 "[$t, $s, \\{$x, $y\\}];",
2921 def TLD4_R_2D_S32_F32
2922 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2923 Int32Regs:$v2, Int32Regs:$v3),
2924 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2925 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2926 "[$t, $s, \\{$x, $y\\}];",
2928 def TLD4_G_2D_S32_F32
2929 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2930 Int32Regs:$v2, Int32Regs:$v3),
2931 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2932 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2933 "[$t, $s, \\{$x, $y\\}];",
2935 def TLD4_B_2D_S32_F32
2936 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2937 Int32Regs:$v2, Int32Regs:$v3),
2938 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2939 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2940 "[$t, $s, \\{$x, $y\\}];",
2942 def TLD4_A_2D_S32_F32
2943 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2944 Int32Regs:$v2, Int32Regs:$v3),
2945 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2946 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2947 "[$t, $s, \\{$x, $y\\}];",
2949 def TLD4_R_2D_U32_F32
2950 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2951 Int32Regs:$v2, Int32Regs:$v3),
2952 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2953 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2954 "[$t, $s, \\{$x, $y\\}];",
2956 def TLD4_G_2D_U32_F32
2957 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2958 Int32Regs:$v2, Int32Regs:$v3),
2959 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2960 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2961 "[$t, $s, \\{$x, $y\\}];",
2963 def TLD4_B_2D_U32_F32
2964 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2965 Int32Regs:$v2, Int32Regs:$v3),
2966 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2967 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2968 "[$t, $s, \\{$x, $y\\}];",
2970 def TLD4_A_2D_U32_F32
2971 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2972 Int32Regs:$v2, Int32Regs:$v3),
2973 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2974 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2975 "[$t, $s, \\{$x, $y\\}];",
2981 let IsTex = 1, IsTexModeUnified = 1 in {
2982 // Texture fetch instructions using handles
2983 def TEX_UNIFIED_1D_F32_S32
2984 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2985 Float32Regs:$b, Float32Regs:$a),
2986 (ins Int64Regs:$t, Int32Regs:$x),
2987 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2989 def TEX_UNIFIED_1D_F32_F32
2990 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2991 Float32Regs:$b, Float32Regs:$a),
2992 (ins Int64Regs:$t, Float32Regs:$x),
2993 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2995 def TEX_UNIFIED_1D_F32_F32_LEVEL
2996 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2997 Float32Regs:$b, Float32Regs:$a),
2998 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2999 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3000 "[$t, \\{$x\\}], $lod;",
3002 def TEX_UNIFIED_1D_F32_F32_GRAD
3003 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3004 Float32Regs:$b, Float32Regs:$a),
3005 (ins Int64Regs:$t, Float32Regs:$x,
3006 Float32Regs:$gradx, Float32Regs:$grady),
3007 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3008 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3010 def TEX_UNIFIED_1D_S32_S32
3011 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3012 Int32Regs:$b, Int32Regs:$a),
3013 (ins Int64Regs:$t, Int32Regs:$x),
3014 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3016 def TEX_UNIFIED_1D_S32_F32
3017 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3018 Int32Regs:$b, Int32Regs:$a),
3019 (ins Int64Regs:$t, Float32Regs:$x),
3020 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3022 def TEX_UNIFIED_1D_S32_F32_LEVEL
3023 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3024 Int32Regs:$b, Int32Regs:$a),
3025 (ins Int64Regs:$t, Float32Regs:$x,
3027 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3028 "[$t, \\{$x\\}], $lod;",
3030 def TEX_UNIFIED_1D_S32_F32_GRAD
3031 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3032 Int32Regs:$b, Int32Regs:$a),
3033 (ins Int64Regs:$t, Float32Regs:$x,
3034 Float32Regs:$gradx, Float32Regs:$grady),
3035 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3036 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3038 def TEX_UNIFIED_1D_U32_S32
3039 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3040 Int32Regs:$b, Int32Regs:$a),
3041 (ins Int64Regs:$t, Int32Regs:$x),
3042 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3044 def TEX_UNIFIED_1D_U32_F32
3045 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046 Int32Regs:$b, Int32Regs:$a),
3047 (ins Int64Regs:$t, Float32Regs:$x),
3048 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3050 def TEX_UNIFIED_1D_U32_F32_LEVEL
3051 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3052 Int32Regs:$b, Int32Regs:$a),
3053 (ins Int64Regs:$t, Float32Regs:$x,
3055 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3056 "[$t, \\{$x\\}], $lod;",
3058 def TEX_UNIFIED_1D_U32_F32_GRAD
3059 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3060 Int32Regs:$b, Int32Regs:$a),
3061 (ins Int64Regs:$t, Float32Regs:$x,
3062 Float32Regs:$gradx, Float32Regs:$grady),
3063 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3064 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3067 def TEX_UNIFIED_1D_ARRAY_F32_S32
3068 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3069 Float32Regs:$b, Float32Regs:$a),
3070 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3071 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3072 "[$t, \\{$l, $x\\}];",
3074 def TEX_UNIFIED_1D_ARRAY_F32_F32
3075 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3076 Float32Regs:$b, Float32Regs:$a),
3077 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3078 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3079 "[$t, \\{$l, $x\\}];",
3081 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3082 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3083 Float32Regs:$b, Float32Regs:$a),
3084 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3086 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3087 "[$t, \\{$l, $x\\}], $lod;",
3089 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3090 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3091 Float32Regs:$b, Float32Regs:$a),
3092 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3093 Float32Regs:$gradx, Float32Regs:$grady),
3094 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3095 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3097 def TEX_UNIFIED_1D_ARRAY_S32_S32
3098 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3099 Int32Regs:$b, Int32Regs:$a),
3100 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3101 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3102 "[$t, \\{$l, $x\\}];",
3104 def TEX_UNIFIED_1D_ARRAY_S32_F32
3105 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3106 Int32Regs:$b, Int32Regs:$a),
3107 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3108 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3109 "[$t, \\{$l, $x\\}];",
3111 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3112 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3113 Int32Regs:$b, Int32Regs:$a),
3114 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3116 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3117 "[$t, \\{$l, $x\\}], $lod;",
3119 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3120 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3121 Int32Regs:$b, Int32Regs:$a),
3122 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3123 Float32Regs:$gradx, Float32Regs:$grady),
3124 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3125 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3127 def TEX_UNIFIED_1D_ARRAY_U32_S32
3128 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3129 Int32Regs:$b, Int32Regs:$a),
3130 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3131 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3132 "[$t, \\{$l, $x\\}];",
3134 def TEX_UNIFIED_1D_ARRAY_U32_F32
3135 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3136 Int32Regs:$b, Int32Regs:$a),
3137 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3138 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3139 "[$t, \\{$l, $x\\}];",
3141 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3142 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3143 Int32Regs:$b, Int32Regs:$a),
3144 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3146 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3147 "[$t, \\{$l, $x\\}], $lod;",
3149 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3150 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3151 Int32Regs:$b, Int32Regs:$a),
3152 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3153 Float32Regs:$gradx, Float32Regs:$grady),
3154 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3155 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3158 def TEX_UNIFIED_2D_F32_S32
3159 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3160 Float32Regs:$b, Float32Regs:$a),
3161 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3162 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3163 "[$t, \\{$x, $y\\}];",
3165 def TEX_UNIFIED_2D_F32_F32
3166 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3167 Float32Regs:$b, Float32Regs:$a),
3168 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3169 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3170 "[$t, \\{$x, $y\\}];",
3172 def TEX_UNIFIED_2D_F32_F32_LEVEL
3173 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3174 Float32Regs:$b, Float32Regs:$a),
3175 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3177 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3178 "[$t, \\{$x, $y\\}], $lod;",
3180 def TEX_UNIFIED_2D_F32_F32_GRAD
3181 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3182 Float32Regs:$b, Float32Regs:$a),
3183 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3184 Float32Regs:$gradx0, Float32Regs:$gradx1,
3185 Float32Regs:$grady0, Float32Regs:$grady1),
3186 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3187 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3188 "\\{$grady0, $grady1\\};",
3190 def TEX_UNIFIED_2D_S32_S32
3191 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3192 Int32Regs:$b, Int32Regs:$a),
3193 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3194 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3195 "[$t, \\{$x, $y\\}];",
3197 def TEX_UNIFIED_2D_S32_F32
3198 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3199 Int32Regs:$b, Int32Regs:$a),
3200 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3201 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3202 "[$t, \\{$x, $y\\}];",
3204 def TEX_UNIFIED_2D_S32_F32_LEVEL
3205 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3206 Int32Regs:$b, Int32Regs:$a),
3207 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3209 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3210 "[$t, \\{$x, $y\\}], $lod;",
3212 def TEX_UNIFIED_2D_S32_F32_GRAD
3213 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3214 Int32Regs:$b, Int32Regs:$a),
3215 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3216 Float32Regs:$gradx0, Float32Regs:$gradx1,
3217 Float32Regs:$grady0, Float32Regs:$grady1),
3218 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3219 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3220 "\\{$grady0, $grady1\\};",
3222 def TEX_UNIFIED_2D_U32_S32
3223 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3224 Int32Regs:$b, Int32Regs:$a),
3225 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3226 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3227 "[$t, \\{$x, $y\\}];",
3229 def TEX_UNIFIED_2D_U32_F32
3230 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3231 Int32Regs:$b, Int32Regs:$a),
3232 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3233 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3234 "[$t, \\{$x, $y\\}];",
3236 def TEX_UNIFIED_2D_U32_F32_LEVEL
3237 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3238 Int32Regs:$b, Int32Regs:$a),
3239 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3241 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3242 "[$t, \\{$x, $y\\}], $lod;",
3244 def TEX_UNIFIED_2D_U32_F32_GRAD
3245 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3246 Int32Regs:$b, Int32Regs:$a),
3247 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3248 Float32Regs:$gradx0, Float32Regs:$gradx1,
3249 Float32Regs:$grady0, Float32Regs:$grady1),
3250 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3251 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3252 "\\{$grady0, $grady1\\};",
3255 def TEX_UNIFIED_2D_ARRAY_F32_S32
3256 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3257 Float32Regs:$b, Float32Regs:$a),
3258 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3260 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3261 "[$t, \\{$l, $x, $y, $y\\}];",
3263 def TEX_UNIFIED_2D_ARRAY_F32_F32
3264 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3265 Float32Regs:$b, Float32Regs:$a),
3266 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3268 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3269 "[$t, \\{$l, $x, $y, $y\\}];",
3271 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3272 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3273 Float32Regs:$b, Float32Regs:$a),
3274 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3275 Float32Regs:$y, Float32Regs:$lod),
3276 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3277 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3279 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3280 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3281 Float32Regs:$b, Float32Regs:$a),
3282 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3283 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3284 Float32Regs:$grady0, Float32Regs:$grady1),
3285 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3286 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3287 "\\{$grady0, $grady1\\};",
3289 def TEX_UNIFIED_2D_ARRAY_S32_S32
3290 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3291 Int32Regs:$b, Int32Regs:$a),
3292 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3294 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3295 "[$t, \\{$l, $x, $y, $y\\}];",
3297 def TEX_UNIFIED_2D_ARRAY_S32_F32
3298 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3299 Int32Regs:$b, Int32Regs:$a),
3300 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3302 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3303 "[$t, \\{$l, $x, $y, $y\\}];",
3305 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3306 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3307 Int32Regs:$b, Int32Regs:$a),
3308 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3309 Float32Regs:$y, Float32Regs:$lod),
3310 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3311 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3313 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3314 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3315 Int32Regs:$b, Int32Regs:$a),
3316 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3318 Float32Regs:$gradx0, Float32Regs:$gradx1,
3319 Float32Regs:$grady0, Float32Regs:$grady1),
3320 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3321 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3322 "\\{$grady0, $grady1\\};",
3324 def TEX_UNIFIED_2D_ARRAY_U32_S32
3325 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3326 Int32Regs:$b, Int32Regs:$a),
3327 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3329 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3330 "[$t, \\{$l, $x, $y, $y\\}];",
3332 def TEX_UNIFIED_2D_ARRAY_U32_F32
3333 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3334 Int32Regs:$b, Int32Regs:$a),
3335 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3337 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3338 "[$t, \\{$l, $x, $y, $y\\}];",
3340 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3341 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3342 Int32Regs:$b, Int32Regs:$a),
3343 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3344 Float32Regs:$y, Float32Regs:$lod),
3345 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3346 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3348 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3349 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3350 Int32Regs:$b, Int32Regs:$a),
3351 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3353 Float32Regs:$gradx0, Float32Regs:$gradx1,
3354 Float32Regs:$grady0, Float32Regs:$grady1),
3355 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3356 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3357 "\\{$grady0, $grady1\\};",
3360 def TEX_UNIFIED_3D_F32_S32
3361 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3362 Float32Regs:$b, Float32Regs:$a),
3363 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3365 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3366 "[$t, \\{$x, $y, $z, $z\\}];",
3368 def TEX_UNIFIED_3D_F32_F32
3369 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3370 Float32Regs:$b, Float32Regs:$a),
3371 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3373 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3374 "[$t, \\{$x, $y, $z, $z\\}];",
3376 def TEX_UNIFIED_3D_F32_F32_LEVEL
3377 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3378 Float32Regs:$b, Float32Regs:$a),
3379 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3380 Float32Regs:$z, Float32Regs:$lod),
3381 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3382 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3384 def TEX_UNIFIED_3D_F32_F32_GRAD
3385 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3386 Float32Regs:$b, Float32Regs:$a),
3387 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3389 Float32Regs:$gradx0, Float32Regs:$gradx1,
3390 Float32Regs:$gradx2, Float32Regs:$grady0,
3391 Float32Regs:$grady1, Float32Regs:$grady2),
3392 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3393 "[$t, \\{$x, $y, $z, $z\\}], "
3394 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3395 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3397 def TEX_UNIFIED_3D_S32_S32
3398 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399 Int32Regs:$b, Int32Regs:$a),
3400 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3402 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3403 "[$t, \\{$x, $y, $z, $z\\}];",
3405 def TEX_UNIFIED_3D_S32_F32
3406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3407 Int32Regs:$b, Int32Regs:$a),
3408 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3410 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3411 "[$t, \\{$x, $y, $z, $z\\}];",
3413 def TEX_UNIFIED_3D_S32_F32_LEVEL
3414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3415 Int32Regs:$b, Int32Regs:$a),
3416 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3417 Float32Regs:$z, Float32Regs:$lod),
3418 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3419 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3421 def TEX_UNIFIED_3D_S32_F32_GRAD
3422 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3423 Int32Regs:$b, Int32Regs:$a),
3424 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3426 Float32Regs:$gradx0, Float32Regs:$gradx1,
3427 Float32Regs:$gradx2, Float32Regs:$grady0,
3428 Float32Regs:$grady1, Float32Regs:$grady2),
3429 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3430 "[$t, \\{$x, $y, $z, $z\\}], "
3431 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3432 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3434 def TEX_UNIFIED_3D_U32_S32
3435 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3436 Int32Regs:$b, Int32Regs:$a),
3437 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3439 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3440 "[$t, \\{$x, $y, $z, $z\\}];",
3442 def TEX_UNIFIED_3D_U32_F32
3443 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3444 Int32Regs:$b, Int32Regs:$a),
3445 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3447 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3448 "[$t, \\{$x, $y, $z, $z\\}];",
3450 def TEX_UNIFIED_3D_U32_F32_LEVEL
3451 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3452 Int32Regs:$b, Int32Regs:$a),
3453 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3454 Float32Regs:$z, Float32Regs:$lod),
3455 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3456 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3458 def TEX_UNIFIED_3D_U32_F32_GRAD
3459 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3460 Int32Regs:$b, Int32Regs:$a),
3461 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3463 Float32Regs:$gradx0, Float32Regs:$gradx1,
3464 Float32Regs:$gradx2, Float32Regs:$grady0,
3465 Float32Regs:$grady1, Float32Regs:$grady2),
3466 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3467 "[$t, \\{$x, $y, $z, $z\\}], "
3468 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3469 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3472 def TEX_UNIFIED_CUBE_F32_F32
3473 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3474 Float32Regs:$b, Float32Regs:$a),
3476 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3477 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3478 "[$t, \\{$x, $y, $z, $z\\}];",
3480 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3481 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3482 Float32Regs:$b, Float32Regs:$a),
3484 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3486 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3487 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3489 def TEX_UNIFIED_CUBE_S32_F32
3490 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3491 Int32Regs:$b, Int32Regs:$a),
3493 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3494 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3495 "[$t, \\{$x, $y, $z, $z\\}];",
3497 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3498 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3499 Int32Regs:$b, Int32Regs:$a),
3501 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3503 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3504 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3506 def TEX_UNIFIED_CUBE_U32_F32
3507 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3508 Int32Regs:$b, Int32Regs:$a),
3510 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3511 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3512 "[$t, \\{$x, $y, $z, $z\\}];",
3514 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3515 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3516 Int32Regs:$b, Int32Regs:$a),
3518 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3520 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3521 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3524 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3525 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3526 Float32Regs:$b, Float32Regs:$a),
3527 (ins Int64Regs:$t, Int32Regs:$l,
3528 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3529 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3530 "[$t, \\{$l, $x, $y, $z\\}];",
3532 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3533 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3534 Float32Regs:$b, Float32Regs:$a),
3535 (ins Int64Regs:$t, Int32Regs:$l,
3536 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3538 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3539 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3541 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3542 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3543 Int32Regs:$b, Int32Regs:$a),
3544 (ins Int64Regs:$t, Int32Regs:$l,
3545 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3546 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3547 "[$t, \\{$l, $x, $y, $z\\}];",
3549 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3550 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3551 Int32Regs:$b, Int32Regs:$a),
3552 (ins Int64Regs:$t, Int32Regs:$l,
3553 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3555 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3556 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3558 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3559 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3560 Int32Regs:$b, Int32Regs:$a),
3561 (ins Int64Regs:$t, Int32Regs:$l,
3562 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3563 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3564 "[$t, \\{$l, $x, $y, $z\\}];",
3566 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3567 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3568 Int32Regs:$b, Int32Regs:$a),
3569 (ins Int64Regs:$t, Int32Regs:$l,
3570 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3572 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3573 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3576 def TLD4_UNIFIED_R_2D_F32_F32
3577 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3578 Float32Regs:$v2, Float32Regs:$v3),
3579 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3580 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3581 "[$t, \\{$x, $y\\}];",
3583 def TLD4_UNIFIED_G_2D_F32_F32
3584 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3585 Float32Regs:$v2, Float32Regs:$v3),
3586 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3587 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3588 "[$t, \\{$x, $y\\}];",
3590 def TLD4_UNIFIED_B_2D_F32_F32
3591 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3592 Float32Regs:$v2, Float32Regs:$v3),
3593 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3594 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3595 "[$t, \\{$x, $y\\}];",
3597 def TLD4_UNIFIED_A_2D_F32_F32
3598 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3599 Float32Regs:$v2, Float32Regs:$v3),
3600 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3601 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3602 "[$t, \\{$x, $y\\}];",
3604 def TLD4_UNIFIED_R_2D_S32_F32
3605 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3606 Int32Regs:$v2, Int32Regs:$v3),
3607 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3608 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3609 "[$t, \\{$x, $y\\}];",
3611 def TLD4_UNIFIED_G_2D_S32_F32
3612 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3613 Int32Regs:$v2, Int32Regs:$v3),
3614 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3615 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3616 "[$t, \\{$x, $y\\}];",
3618 def TLD4_UNIFIED_B_2D_S32_F32
3619 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3620 Int32Regs:$v2, Int32Regs:$v3),
3621 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3622 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3623 "[$t, \\{$x, $y\\}];",
3625 def TLD4_UNIFIED_A_2D_S32_F32
3626 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3627 Int32Regs:$v2, Int32Regs:$v3),
3628 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3629 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3630 "[$t, \\{$x, $y\\}];",
3632 def TLD4_UNIFIED_R_2D_U32_F32
3633 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3634 Int32Regs:$v2, Int32Regs:$v3),
3635 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3636 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3637 "[$t, \\{$x, $y\\}];",
3639 def TLD4_UNIFIED_G_2D_U32_F32
3640 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3641 Int32Regs:$v2, Int32Regs:$v3),
3642 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3643 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3644 "[$t, \\{$x, $y\\}];",
3646 def TLD4_UNIFIED_B_2D_U32_F32
3647 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3648 Int32Regs:$v2, Int32Regs:$v3),
3649 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3650 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3651 "[$t, \\{$x, $y\\}];",
3653 def TLD4_UNIFIED_A_2D_U32_F32
3654 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3655 Int32Regs:$v2, Int32Regs:$v3),
3656 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3657 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3658 "[$t, \\{$x, $y\\}];",
3664 //=== Surface load instructions
3667 def SULD_1D_I8_CLAMP
3668 : NVPTXInst<(outs Int16Regs:$r),
3669 (ins Int64Regs:$s, Int32Regs:$x),
3670 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3672 def SULD_1D_I16_CLAMP
3673 : NVPTXInst<(outs Int16Regs:$r),
3674 (ins Int64Regs:$s, Int32Regs:$x),
3675 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3677 def SULD_1D_I32_CLAMP
3678 : NVPTXInst<(outs Int32Regs:$r),
3679 (ins Int64Regs:$s, Int32Regs:$x),
3680 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3682 def SULD_1D_I64_CLAMP
3683 : NVPTXInst<(outs Int64Regs:$r),
3684 (ins Int64Regs:$s, Int32Regs:$x),
3685 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3688 def SULD_1D_ARRAY_I8_CLAMP
3689 : NVPTXInst<(outs Int16Regs:$r),
3690 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3691 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3693 def SULD_1D_ARRAY_I16_CLAMP
3694 : NVPTXInst<(outs Int16Regs:$r),
3695 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3696 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3698 def SULD_1D_ARRAY_I32_CLAMP
3699 : NVPTXInst<(outs Int32Regs:$r),
3700 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3701 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3703 def SULD_1D_ARRAY_I64_CLAMP
3704 : NVPTXInst<(outs Int64Regs:$r),
3705 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3706 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3709 def SULD_2D_I8_CLAMP
3710 : NVPTXInst<(outs Int16Regs:$r),
3711 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3712 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3714 def SULD_2D_I16_CLAMP
3715 : NVPTXInst<(outs Int16Regs:$r),
3716 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3717 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3719 def SULD_2D_I32_CLAMP
3720 : NVPTXInst<(outs Int32Regs:$r),
3721 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3722 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3724 def SULD_2D_I64_CLAMP
3725 : NVPTXInst<(outs Int64Regs:$r),
3726 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3727 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3730 def SULD_2D_ARRAY_I8_CLAMP
3731 : NVPTXInst<(outs Int16Regs:$r),
3732 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3733 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3735 def SULD_2D_ARRAY_I16_CLAMP
3736 : NVPTXInst<(outs Int16Regs:$r),
3737 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3738 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3740 def SULD_2D_ARRAY_I32_CLAMP
3741 : NVPTXInst<(outs Int32Regs:$r),
3742 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3743 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3745 def SULD_2D_ARRAY_I64_CLAMP
3746 : NVPTXInst<(outs Int64Regs:$r),
3747 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3748 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3751 def SULD_3D_I8_CLAMP
3752 : NVPTXInst<(outs Int16Regs:$r),
3753 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3754 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3756 def SULD_3D_I16_CLAMP
3757 : NVPTXInst<(outs Int16Regs:$r),
3758 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3759 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3761 def SULD_3D_I32_CLAMP
3762 : NVPTXInst<(outs Int32Regs:$r),
3763 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3764 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3766 def SULD_3D_I64_CLAMP
3767 : NVPTXInst<(outs Int64Regs:$r),
3768 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3769 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3774 def SULD_1D_V2I8_CLAMP
3775 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3776 (ins Int64Regs:$s, Int32Regs:$x),
3777 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3779 def SULD_1D_V2I16_CLAMP
3780 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3781 (ins Int64Regs:$s, Int32Regs:$x),
3782 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3784 def SULD_1D_V2I32_CLAMP
3785 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3786 (ins Int64Regs:$s, Int32Regs:$x),
3787 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3789 def SULD_1D_V2I64_CLAMP
3790 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3791 (ins Int64Regs:$s, Int32Regs:$x),
3792 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3795 def SULD_1D_ARRAY_V2I8_CLAMP
3796 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3797 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3798 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3800 def SULD_1D_ARRAY_V2I16_CLAMP
3801 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3802 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3803 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3805 def SULD_1D_ARRAY_V2I32_CLAMP
3806 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3807 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3808 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3810 def SULD_1D_ARRAY_V2I64_CLAMP
3811 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3812 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3813 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3816 def SULD_2D_V2I8_CLAMP
3817 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3818 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3819 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3821 def SULD_2D_V2I16_CLAMP
3822 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3823 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3824 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3826 def SULD_2D_V2I32_CLAMP
3827 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3828 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3829 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3831 def SULD_2D_V2I64_CLAMP
3832 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3833 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3834 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3837 def SULD_2D_ARRAY_V2I8_CLAMP
3838 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3839 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3840 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3841 "[$s, \\{$l, $x, $y, $y\\}];",
3843 def SULD_2D_ARRAY_V2I16_CLAMP
3844 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3845 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3846 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3847 "[$s, \\{$l, $x, $y, $y\\}];",
3849 def SULD_2D_ARRAY_V2I32_CLAMP
3850 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3851 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3852 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3853 "[$s, \\{$l, $x, $y, $y\\}];",
3855 def SULD_2D_ARRAY_V2I64_CLAMP
3856 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3857 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3858 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3859 "[$s, \\{$l, $x, $y, $y\\}];",
3862 def SULD_3D_V2I8_CLAMP
3863 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3864 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3865 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3867 def SULD_3D_V2I16_CLAMP
3868 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3869 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3870 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3872 def SULD_3D_V2I32_CLAMP
3873 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3875 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3877 def SULD_3D_V2I64_CLAMP
3878 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3880 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3885 def SULD_1D_V4I8_CLAMP
3886 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3887 (ins Int64Regs:$s, Int32Regs:$x),
3888 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3890 def SULD_1D_V4I16_CLAMP
3891 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3892 (ins Int64Regs:$s, Int32Regs:$x),
3893 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3895 def SULD_1D_V4I32_CLAMP
3896 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3897 (ins Int64Regs:$s, Int32Regs:$x),
3898 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3901 def SULD_1D_ARRAY_V4I8_CLAMP
3902 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3903 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3904 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3905 "[$s, \\{$l, $x\\}];",
3907 def SULD_1D_ARRAY_V4I16_CLAMP
3908 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3909 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3910 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3911 "[$s, \\{$l, $x\\}];",
3913 def SULD_1D_ARRAY_V4I32_CLAMP
3914 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3915 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3916 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3917 "[$s, \\{$l, $x\\}];",
3920 def SULD_2D_V4I8_CLAMP
3921 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3922 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3923 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3925 def SULD_2D_V4I16_CLAMP
3926 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3927 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3928 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3930 def SULD_2D_V4I32_CLAMP
3931 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3932 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3933 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3936 def SULD_2D_ARRAY_V4I8_CLAMP
3937 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3938 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3939 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3940 "[$s, \\{$l, $x, $y, $y\\}];",
3942 def SULD_2D_ARRAY_V4I16_CLAMP
3943 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3944 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3945 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3946 "[$s, \\{$l, $x, $y, $y\\}];",
3948 def SULD_2D_ARRAY_V4I32_CLAMP
3949 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3950 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3951 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3952 "[$s, \\{$l, $x, $y, $y\\}];",
3956 def SULD_3D_V4I8_CLAMP
3957 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3958 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3959 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3960 "[$s, \\{$x, $y, $z, $z\\}];",
3962 def SULD_3D_V4I16_CLAMP
3963 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3964 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3965 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3966 "[$s, \\{$x, $y, $z, $z\\}];",
3968 def SULD_3D_V4I32_CLAMP
3969 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3970 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3971 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3972 "[$s, \\{$x, $y, $z, $z\\}];",
3980 : NVPTXInst<(outs Int16Regs:$r),
3981 (ins Int64Regs:$s, Int32Regs:$x),
3982 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3984 def SULD_1D_I16_TRAP
3985 : NVPTXInst<(outs Int16Regs:$r),
3986 (ins Int64Regs:$s, Int32Regs:$x),
3987 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3989 def SULD_1D_I32_TRAP
3990 : NVPTXInst<(outs Int32Regs:$r),
3991 (ins Int64Regs:$s, Int32Regs:$x),
3992 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3994 def SULD_1D_I64_TRAP
3995 : NVPTXInst<(outs Int64Regs:$r),
3996 (ins Int64Regs:$s, Int32Regs:$x),
3997 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
4000 def SULD_1D_ARRAY_I8_TRAP
4001 : NVPTXInst<(outs Int16Regs:$r),
4002 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4003 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4005 def SULD_1D_ARRAY_I16_TRAP
4006 : NVPTXInst<(outs Int16Regs:$r),
4007 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4008 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4010 def SULD_1D_ARRAY_I32_TRAP
4011 : NVPTXInst<(outs Int32Regs:$r),
4012 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4013 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4015 def SULD_1D_ARRAY_I64_TRAP
4016 : NVPTXInst<(outs Int64Regs:$r),
4017 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4018 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4022 : NVPTXInst<(outs Int16Regs:$r),
4023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4024 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4026 def SULD_2D_I16_TRAP
4027 : NVPTXInst<(outs Int16Regs:$r),
4028 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4029 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4031 def SULD_2D_I32_TRAP
4032 : NVPTXInst<(outs Int32Regs:$r),
4033 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4034 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4036 def SULD_2D_I64_TRAP
4037 : NVPTXInst<(outs Int64Regs:$r),
4038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4039 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4042 def SULD_2D_ARRAY_I8_TRAP
4043 : NVPTXInst<(outs Int16Regs:$r),
4044 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4045 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4047 def SULD_2D_ARRAY_I16_TRAP
4048 : NVPTXInst<(outs Int16Regs:$r),
4049 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4050 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4052 def SULD_2D_ARRAY_I32_TRAP
4053 : NVPTXInst<(outs Int32Regs:$r),
4054 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4055 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4057 def SULD_2D_ARRAY_I64_TRAP
4058 : NVPTXInst<(outs Int64Regs:$r),
4059 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4060 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4064 : NVPTXInst<(outs Int16Regs:$r),
4065 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4066 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4068 def SULD_3D_I16_TRAP
4069 : NVPTXInst<(outs Int16Regs:$r),
4070 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4071 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4073 def SULD_3D_I32_TRAP
4074 : NVPTXInst<(outs Int32Regs:$r),
4075 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4076 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4078 def SULD_3D_I64_TRAP
4079 : NVPTXInst<(outs Int64Regs:$r),
4080 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4081 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4086 def SULD_1D_V2I8_TRAP
4087 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4088 (ins Int64Regs:$s, Int32Regs:$x),
4089 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4091 def SULD_1D_V2I16_TRAP
4092 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4093 (ins Int64Regs:$s, Int32Regs:$x),
4094 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4096 def SULD_1D_V2I32_TRAP
4097 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4098 (ins Int64Regs:$s, Int32Regs:$x),
4099 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4101 def SULD_1D_V2I64_TRAP
4102 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4103 (ins Int64Regs:$s, Int32Regs:$x),
4104 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4107 def SULD_1D_ARRAY_V2I8_TRAP
4108 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4109 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4110 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4112 def SULD_1D_ARRAY_V2I16_TRAP
4113 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4114 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4115 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4117 def SULD_1D_ARRAY_V2I32_TRAP
4118 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4119 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4120 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4122 def SULD_1D_ARRAY_V2I64_TRAP
4123 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4124 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4125 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4128 def SULD_2D_V2I8_TRAP
4129 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4130 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4131 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4133 def SULD_2D_V2I16_TRAP
4134 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4135 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4136 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4138 def SULD_2D_V2I32_TRAP
4139 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4140 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4141 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4143 def SULD_2D_V2I64_TRAP
4144 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4145 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4146 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4149 def SULD_2D_ARRAY_V2I8_TRAP
4150 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4151 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4152 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4153 "[$s, \\{$l, $x, $y, $y\\}];",
4155 def SULD_2D_ARRAY_V2I16_TRAP
4156 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4157 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4158 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4159 "[$s, \\{$l, $x, $y, $y\\}];",
4161 def SULD_2D_ARRAY_V2I32_TRAP
4162 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4163 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4164 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4165 "[$s, \\{$l, $x, $y, $y\\}];",
4167 def SULD_2D_ARRAY_V2I64_TRAP
4168 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4169 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4170 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4171 "[$s, \\{$l, $x, $y, $y\\}];",
4174 def SULD_3D_V2I8_TRAP
4175 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4176 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4177 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4179 def SULD_3D_V2I16_TRAP
4180 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4182 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4184 def SULD_3D_V2I32_TRAP
4185 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4186 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4187 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4189 def SULD_3D_V2I64_TRAP
4190 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4192 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4197 def SULD_1D_V4I8_TRAP
4198 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4199 (ins Int64Regs:$s, Int32Regs:$x),
4200 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4202 def SULD_1D_V4I16_TRAP
4203 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4204 (ins Int64Regs:$s, Int32Regs:$x),
4205 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4207 def SULD_1D_V4I32_TRAP
4208 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4209 (ins Int64Regs:$s, Int32Regs:$x),
4210 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4213 def SULD_1D_ARRAY_V4I8_TRAP
4214 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4215 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4216 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4217 "[$s, \\{$l, $x\\}];",
4219 def SULD_1D_ARRAY_V4I16_TRAP
4220 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4221 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4222 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4223 "[$s, \\{$l, $x\\}];",
4225 def SULD_1D_ARRAY_V4I32_TRAP
4226 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4227 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4228 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4229 "[$s, \\{$l, $x\\}];",
4232 def SULD_2D_V4I8_TRAP
4233 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4234 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4235 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4237 def SULD_2D_V4I16_TRAP
4238 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4239 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4240 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4242 def SULD_2D_V4I32_TRAP
4243 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4244 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4245 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4248 def SULD_2D_ARRAY_V4I8_TRAP
4249 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4250 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4251 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4252 "[$s, \\{$l, $x, $y, $y\\}];",
4254 def SULD_2D_ARRAY_V4I16_TRAP
4255 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4256 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4257 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4258 "[$s, \\{$l, $x, $y, $y\\}];",
4260 def SULD_2D_ARRAY_V4I32_TRAP
4261 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4262 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4263 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4264 "[$s, \\{$l, $x, $y, $y\\}];",
4268 def SULD_3D_V4I8_TRAP
4269 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4270 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4271 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4272 "[$s, \\{$x, $y, $z, $z\\}];",
4274 def SULD_3D_V4I16_TRAP
4275 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4276 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4277 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4278 "[$s, \\{$x, $y, $z, $z\\}];",
4280 def SULD_3D_V4I32_TRAP
4281 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4282 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4283 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4284 "[$s, \\{$x, $y, $z, $z\\}];",
4291 : NVPTXInst<(outs Int16Regs:$r),
4292 (ins Int64Regs:$s, Int32Regs:$x),
4293 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4295 def SULD_1D_I16_ZERO
4296 : NVPTXInst<(outs Int16Regs:$r),
4297 (ins Int64Regs:$s, Int32Regs:$x),
4298 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4300 def SULD_1D_I32_ZERO
4301 : NVPTXInst<(outs Int32Regs:$r),
4302 (ins Int64Regs:$s, Int32Regs:$x),
4303 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4305 def SULD_1D_I64_ZERO
4306 : NVPTXInst<(outs Int64Regs:$r),
4307 (ins Int64Regs:$s, Int32Regs:$x),
4308 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4311 def SULD_1D_ARRAY_I8_ZERO
4312 : NVPTXInst<(outs Int16Regs:$r),
4313 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4314 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4316 def SULD_1D_ARRAY_I16_ZERO
4317 : NVPTXInst<(outs Int16Regs:$r),
4318 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4319 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4321 def SULD_1D_ARRAY_I32_ZERO
4322 : NVPTXInst<(outs Int32Regs:$r),
4323 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4324 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4326 def SULD_1D_ARRAY_I64_ZERO
4327 : NVPTXInst<(outs Int64Regs:$r),
4328 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4329 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4333 : NVPTXInst<(outs Int16Regs:$r),
4334 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4335 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4337 def SULD_2D_I16_ZERO
4338 : NVPTXInst<(outs Int16Regs:$r),
4339 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4340 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4342 def SULD_2D_I32_ZERO
4343 : NVPTXInst<(outs Int32Regs:$r),
4344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4345 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4347 def SULD_2D_I64_ZERO
4348 : NVPTXInst<(outs Int64Regs:$r),
4349 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4350 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4353 def SULD_2D_ARRAY_I8_ZERO
4354 : NVPTXInst<(outs Int16Regs:$r),
4355 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4356 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4358 def SULD_2D_ARRAY_I16_ZERO
4359 : NVPTXInst<(outs Int16Regs:$r),
4360 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4361 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4363 def SULD_2D_ARRAY_I32_ZERO
4364 : NVPTXInst<(outs Int32Regs:$r),
4365 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4366 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4368 def SULD_2D_ARRAY_I64_ZERO
4369 : NVPTXInst<(outs Int64Regs:$r),
4370 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4371 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4375 : NVPTXInst<(outs Int16Regs:$r),
4376 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4377 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4379 def SULD_3D_I16_ZERO
4380 : NVPTXInst<(outs Int16Regs:$r),
4381 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4382 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4384 def SULD_3D_I32_ZERO
4385 : NVPTXInst<(outs Int32Regs:$r),
4386 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4387 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4389 def SULD_3D_I64_ZERO
4390 : NVPTXInst<(outs Int64Regs:$r),
4391 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4392 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4397 def SULD_1D_V2I8_ZERO
4398 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4399 (ins Int64Regs:$s, Int32Regs:$x),
4400 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4402 def SULD_1D_V2I16_ZERO
4403 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4404 (ins Int64Regs:$s, Int32Regs:$x),
4405 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4407 def SULD_1D_V2I32_ZERO
4408 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4409 (ins Int64Regs:$s, Int32Regs:$x),
4410 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4412 def SULD_1D_V2I64_ZERO
4413 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4414 (ins Int64Regs:$s, Int32Regs:$x),
4415 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4418 def SULD_1D_ARRAY_V2I8_ZERO
4419 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4420 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4421 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4423 def SULD_1D_ARRAY_V2I16_ZERO
4424 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4425 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4426 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4428 def SULD_1D_ARRAY_V2I32_ZERO
4429 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4430 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4431 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4433 def SULD_1D_ARRAY_V2I64_ZERO
4434 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4435 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4436 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4439 def SULD_2D_V2I8_ZERO
4440 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4441 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4442 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4444 def SULD_2D_V2I16_ZERO
4445 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4446 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4447 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4449 def SULD_2D_V2I32_ZERO
4450 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4451 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4452 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4454 def SULD_2D_V2I64_ZERO
4455 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4456 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4457 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4460 def SULD_2D_ARRAY_V2I8_ZERO
4461 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4462 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4463 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4464 "[$s, \\{$l, $x, $y, $y\\}];",
4466 def SULD_2D_ARRAY_V2I16_ZERO
4467 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4468 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4469 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4470 "[$s, \\{$l, $x, $y, $y\\}];",
4472 def SULD_2D_ARRAY_V2I32_ZERO
4473 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4474 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4475 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4476 "[$s, \\{$l, $x, $y, $y\\}];",
4478 def SULD_2D_ARRAY_V2I64_ZERO
4479 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4480 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4481 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4482 "[$s, \\{$l, $x, $y, $y\\}];",
4485 def SULD_3D_V2I8_ZERO
4486 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4487 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4488 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4490 def SULD_3D_V2I16_ZERO
4491 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4492 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4493 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4495 def SULD_3D_V2I32_ZERO
4496 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4497 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4498 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4500 def SULD_3D_V2I64_ZERO
4501 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4503 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4508 def SULD_1D_V4I8_ZERO
4509 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4510 (ins Int64Regs:$s, Int32Regs:$x),
4511 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4513 def SULD_1D_V4I16_ZERO
4514 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4515 (ins Int64Regs:$s, Int32Regs:$x),
4516 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4518 def SULD_1D_V4I32_ZERO
4519 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4520 (ins Int64Regs:$s, Int32Regs:$x),
4521 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4524 def SULD_1D_ARRAY_V4I8_ZERO
4525 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4526 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4527 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4528 "[$s, \\{$l, $x\\}];",
4530 def SULD_1D_ARRAY_V4I16_ZERO
4531 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4532 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4533 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4534 "[$s, \\{$l, $x\\}];",
4536 def SULD_1D_ARRAY_V4I32_ZERO
4537 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4538 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4539 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4540 "[$s, \\{$l, $x\\}];",
4543 def SULD_2D_V4I8_ZERO
4544 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4545 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4546 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4548 def SULD_2D_V4I16_ZERO
4549 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4550 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4551 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4553 def SULD_2D_V4I32_ZERO
4554 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4555 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4556 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4559 def SULD_2D_ARRAY_V4I8_ZERO
4560 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4561 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4562 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4563 "[$s, \\{$l, $x, $y, $y\\}];",
4565 def SULD_2D_ARRAY_V4I16_ZERO
4566 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4567 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4568 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4569 "[$s, \\{$l, $x, $y, $y\\}];",
4571 def SULD_2D_ARRAY_V4I32_ZERO
4572 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4573 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4574 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4575 "[$s, \\{$l, $x, $y, $y\\}];",
4579 def SULD_3D_V4I8_ZERO
4580 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4581 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4582 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4583 "[$s, \\{$x, $y, $z, $z\\}];",
4585 def SULD_3D_V4I16_ZERO
4586 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4587 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4588 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4589 "[$s, \\{$x, $y, $z, $z\\}];",
4591 def SULD_3D_V4I32_ZERO
4592 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4593 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4594 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4595 "[$s, \\{$x, $y, $z, $z\\}];",
4599 //-----------------------------------
4600 // Texture Query Intrinsics
4601 //-----------------------------------
4603 let IsSurfTexQuery = 1 in {
4604 def TXQ_CHANNEL_ORDER
4605 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4606 "txq.channel_order.b32 \t$d, [$a];",
4608 def TXQ_CHANNEL_DATA_TYPE
4609 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4610 "txq.channel_data_type.b32 \t$d, [$a];",
4613 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4614 "txq.width.b32 \t$d, [$a];",
4617 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4618 "txq.height.b32 \t$d, [$a];",
4621 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4622 "txq.depth.b32 \t$d, [$a];",
4625 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4626 "txq.array_size.b32 \t$d, [$a];",
4629 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4630 "txq.num_samples.b32 \t$d, [$a];",
4632 def TXQ_NUM_MIPMAP_LEVELS
4633 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4634 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4638 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4639 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4640 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4641 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4642 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4643 (TXQ_WIDTH Int64Regs:$a)>;
4644 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4645 (TXQ_HEIGHT Int64Regs:$a)>;
4646 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4647 (TXQ_DEPTH Int64Regs:$a)>;
4648 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4649 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4650 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4651 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4652 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4653 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4656 //-----------------------------------
4657 // Surface Query Intrinsics
4658 //-----------------------------------
4660 let IsSurfTexQuery = 1 in {
4661 def SUQ_CHANNEL_ORDER
4662 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4663 "suq.channel_order.b32 \t$d, [$a];",
4665 def SUQ_CHANNEL_DATA_TYPE
4666 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4667 "suq.channel_data_type.b32 \t$d, [$a];",
4670 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4671 "suq.width.b32 \t$d, [$a];",
4674 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4675 "suq.height.b32 \t$d, [$a];",
4678 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4679 "suq.depth.b32 \t$d, [$a];",
4682 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4683 "suq.array_size.b32 \t$d, [$a];",
4687 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4688 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4689 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4690 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4691 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4692 (SUQ_WIDTH Int64Regs:$a)>;
4693 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4694 (SUQ_HEIGHT Int64Regs:$a)>;
4695 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4696 (SUQ_DEPTH Int64Regs:$a)>;
4697 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4698 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4701 //===- Handle Query -------------------------------------------------------===//
4703 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4705 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4706 "istypep.samplerref \t$d, $a;",
4707 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4709 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4710 "istypep.surfref \t$d, $a;",
4711 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4713 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4714 "istypep.texref \t$d, $a;",
4715 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4717 //===- Surface Stores -----------------------------------------------------===//
4722 def SUST_B_1D_B8_CLAMP
4724 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4725 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4727 def SUST_B_1D_B16_CLAMP
4729 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4730 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4732 def SUST_B_1D_B32_CLAMP
4734 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4735 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4737 def SUST_B_1D_B64_CLAMP
4739 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4740 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4742 def SUST_B_1D_V2B8_CLAMP
4744 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4745 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4747 def SUST_B_1D_V2B16_CLAMP
4749 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4750 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4752 def SUST_B_1D_V2B32_CLAMP
4754 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4755 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4757 def SUST_B_1D_V2B64_CLAMP
4759 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4760 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4762 def SUST_B_1D_V4B8_CLAMP
4764 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4765 Int16Regs:$b, Int16Regs:$a),
4766 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4768 def SUST_B_1D_V4B16_CLAMP
4770 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4771 Int16Regs:$b, Int16Regs:$a),
4772 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4774 def SUST_B_1D_V4B32_CLAMP
4776 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4777 Int32Regs:$b, Int32Regs:$a),
4778 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4782 def SUST_B_1D_ARRAY_B8_CLAMP
4784 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4785 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4787 def SUST_B_1D_ARRAY_B16_CLAMP
4789 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4790 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4792 def SUST_B_1D_ARRAY_B32_CLAMP
4794 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4795 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4797 def SUST_B_1D_ARRAY_B64_CLAMP
4799 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4800 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4802 def SUST_B_1D_ARRAY_V2B8_CLAMP
4804 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4806 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4808 def SUST_B_1D_ARRAY_V2B16_CLAMP
4810 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4812 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4814 def SUST_B_1D_ARRAY_V2B32_CLAMP
4816 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4818 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4820 def SUST_B_1D_ARRAY_V2B64_CLAMP
4822 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4824 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4826 def SUST_B_1D_ARRAY_V4B8_CLAMP
4828 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4829 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4830 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4831 "\\{$r, $g, $b, $a\\};",
4833 def SUST_B_1D_ARRAY_V4B16_CLAMP
4835 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4836 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4837 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4838 "\\{$r, $g, $b, $a\\};",
4840 def SUST_B_1D_ARRAY_V4B32_CLAMP
4842 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4843 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4844 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4845 "\\{$r, $g, $b, $a\\};",
4849 def SUST_B_2D_B8_CLAMP
4851 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4852 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4854 def SUST_B_2D_B16_CLAMP
4856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4857 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4859 def SUST_B_2D_B32_CLAMP
4861 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4862 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4864 def SUST_B_2D_B64_CLAMP
4866 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4867 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4869 def SUST_B_2D_V2B8_CLAMP
4871 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4873 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4875 def SUST_B_2D_V2B16_CLAMP
4877 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4879 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4881 def SUST_B_2D_V2B32_CLAMP
4883 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4885 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4887 def SUST_B_2D_V2B64_CLAMP
4889 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4891 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4893 def SUST_B_2D_V4B8_CLAMP
4895 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4896 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4897 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4898 "\\{$r, $g, $b, $a\\};",
4900 def SUST_B_2D_V4B16_CLAMP
4902 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4903 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4904 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4905 "\\{$r, $g, $b, $a\\};",
4907 def SUST_B_2D_V4B32_CLAMP
4909 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4910 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4911 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4912 "\\{$r, $g, $b, $a\\};",
4916 def SUST_B_2D_ARRAY_B8_CLAMP
4918 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4920 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4922 def SUST_B_2D_ARRAY_B16_CLAMP
4924 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4926 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4928 def SUST_B_2D_ARRAY_B32_CLAMP
4930 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4932 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4934 def SUST_B_2D_ARRAY_B64_CLAMP
4936 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4938 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4940 def SUST_B_2D_ARRAY_V2B8_CLAMP
4942 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4943 Int16Regs:$r, Int16Regs:$g),
4944 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4947 def SUST_B_2D_ARRAY_V2B16_CLAMP
4949 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4950 Int16Regs:$r, Int16Regs:$g),
4951 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4954 def SUST_B_2D_ARRAY_V2B32_CLAMP
4956 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4957 Int32Regs:$r, Int32Regs:$g),
4958 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4961 def SUST_B_2D_ARRAY_V2B64_CLAMP
4963 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4964 Int64Regs:$r, Int64Regs:$g),
4965 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4968 def SUST_B_2D_ARRAY_V4B8_CLAMP
4970 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4971 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4972 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4973 "\\{$r, $g, $b, $a\\};",
4975 def SUST_B_2D_ARRAY_V4B16_CLAMP
4977 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4978 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4979 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4980 "\\{$r, $g, $b, $a\\};",
4982 def SUST_B_2D_ARRAY_V4B32_CLAMP
4984 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4985 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4986 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4987 "\\{$r, $g, $b, $a\\};",
4991 def SUST_B_3D_B8_CLAMP
4993 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4995 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4997 def SUST_B_3D_B16_CLAMP
4999 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5001 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5003 def SUST_B_3D_B32_CLAMP
5005 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5007 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5009 def SUST_B_3D_B64_CLAMP
5011 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5013 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5015 def SUST_B_3D_V2B8_CLAMP
5017 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5018 Int16Regs:$r, Int16Regs:$g),
5019 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5022 def SUST_B_3D_V2B16_CLAMP
5024 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5025 Int16Regs:$r, Int16Regs:$g),
5026 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5029 def SUST_B_3D_V2B32_CLAMP
5031 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5032 Int32Regs:$r, Int32Regs:$g),
5033 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5036 def SUST_B_3D_V2B64_CLAMP
5038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5039 Int64Regs:$r, Int64Regs:$g),
5040 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5043 def SUST_B_3D_V4B8_CLAMP
5045 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5046 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5047 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5048 "\\{$r, $g, $b, $a\\};",
5050 def SUST_B_3D_V4B16_CLAMP
5052 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5053 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5054 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5055 "\\{$r, $g, $b, $a\\};",
5057 def SUST_B_3D_V4B32_CLAMP
5059 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5060 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5061 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5062 "\\{$r, $g, $b, $a\\};",
5067 def SUST_B_1D_B8_TRAP
5069 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5070 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5072 def SUST_B_1D_B16_TRAP
5074 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5075 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5077 def SUST_B_1D_B32_TRAP
5079 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5080 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5082 def SUST_B_1D_B64_TRAP
5084 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5085 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5087 def SUST_B_1D_V2B8_TRAP
5089 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5090 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5092 def SUST_B_1D_V2B16_TRAP
5094 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5095 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5097 def SUST_B_1D_V2B32_TRAP
5099 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5100 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5102 def SUST_B_1D_V2B64_TRAP
5104 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5105 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5107 def SUST_B_1D_V4B8_TRAP
5109 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5110 Int16Regs:$b, Int16Regs:$a),
5111 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5113 def SUST_B_1D_V4B16_TRAP
5115 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5116 Int16Regs:$b, Int16Regs:$a),
5117 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5119 def SUST_B_1D_V4B32_TRAP
5121 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5122 Int32Regs:$b, Int32Regs:$a),
5123 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5127 def SUST_B_1D_ARRAY_B8_TRAP
5129 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5130 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5132 def SUST_B_1D_ARRAY_B16_TRAP
5134 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5135 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5137 def SUST_B_1D_ARRAY_B32_TRAP
5139 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5140 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5142 def SUST_B_1D_ARRAY_B64_TRAP
5144 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5145 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5147 def SUST_B_1D_ARRAY_V2B8_TRAP
5149 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5151 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5153 def SUST_B_1D_ARRAY_V2B16_TRAP
5155 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5157 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5159 def SUST_B_1D_ARRAY_V2B32_TRAP
5161 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5163 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5165 def SUST_B_1D_ARRAY_V2B64_TRAP
5167 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5169 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5171 def SUST_B_1D_ARRAY_V4B8_TRAP
5173 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5174 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5175 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5176 "\\{$r, $g, $b, $a\\};",
5178 def SUST_B_1D_ARRAY_V4B16_TRAP
5180 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5181 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5182 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5183 "\\{$r, $g, $b, $a\\};",
5185 def SUST_B_1D_ARRAY_V4B32_TRAP
5187 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5188 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5189 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5190 "\\{$r, $g, $b, $a\\};",
5194 def SUST_B_2D_B8_TRAP
5196 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5197 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5199 def SUST_B_2D_B16_TRAP
5201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5202 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5204 def SUST_B_2D_B32_TRAP
5206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5207 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5209 def SUST_B_2D_B64_TRAP
5211 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5212 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5214 def SUST_B_2D_V2B8_TRAP
5216 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5218 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5220 def SUST_B_2D_V2B16_TRAP
5222 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5224 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5226 def SUST_B_2D_V2B32_TRAP
5228 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5230 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5232 def SUST_B_2D_V2B64_TRAP
5234 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5236 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5238 def SUST_B_2D_V4B8_TRAP
5240 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5241 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5242 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5243 "\\{$r, $g, $b, $a\\};",
5245 def SUST_B_2D_V4B16_TRAP
5247 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5248 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5249 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5250 "\\{$r, $g, $b, $a\\};",
5252 def SUST_B_2D_V4B32_TRAP
5254 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5255 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5256 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5257 "\\{$r, $g, $b, $a\\};",
5261 def SUST_B_2D_ARRAY_B8_TRAP
5263 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5265 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5267 def SUST_B_2D_ARRAY_B16_TRAP
5269 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5271 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5273 def SUST_B_2D_ARRAY_B32_TRAP
5275 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5277 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5279 def SUST_B_2D_ARRAY_B64_TRAP
5281 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5283 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5285 def SUST_B_2D_ARRAY_V2B8_TRAP
5287 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5288 Int16Regs:$r, Int16Regs:$g),
5289 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5292 def SUST_B_2D_ARRAY_V2B16_TRAP
5294 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5295 Int16Regs:$r, Int16Regs:$g),
5296 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5299 def SUST_B_2D_ARRAY_V2B32_TRAP
5301 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5302 Int32Regs:$r, Int32Regs:$g),
5303 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5306 def SUST_B_2D_ARRAY_V2B64_TRAP
5308 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5309 Int64Regs:$r, Int64Regs:$g),
5310 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5313 def SUST_B_2D_ARRAY_V4B8_TRAP
5315 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5316 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5317 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5318 "\\{$r, $g, $b, $a\\};",
5320 def SUST_B_2D_ARRAY_V4B16_TRAP
5322 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5323 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5324 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5325 "\\{$r, $g, $b, $a\\};",
5327 def SUST_B_2D_ARRAY_V4B32_TRAP
5329 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5330 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5331 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5332 "\\{$r, $g, $b, $a\\};",
5336 def SUST_B_3D_B8_TRAP
5338 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5340 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5342 def SUST_B_3D_B16_TRAP
5344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5346 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5348 def SUST_B_3D_B32_TRAP
5350 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5352 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5354 def SUST_B_3D_B64_TRAP
5356 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5358 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5360 def SUST_B_3D_V2B8_TRAP
5362 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5363 Int16Regs:$r, Int16Regs:$g),
5364 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5367 def SUST_B_3D_V2B16_TRAP
5369 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5370 Int16Regs:$r, Int16Regs:$g),
5371 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5374 def SUST_B_3D_V2B32_TRAP
5376 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5377 Int32Regs:$r, Int32Regs:$g),
5378 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5381 def SUST_B_3D_V2B64_TRAP
5383 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5384 Int64Regs:$r, Int64Regs:$g),
5385 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5388 def SUST_B_3D_V4B8_TRAP
5390 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5391 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5392 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5393 "\\{$r, $g, $b, $a\\};",
5395 def SUST_B_3D_V4B16_TRAP
5397 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5398 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5399 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5400 "\\{$r, $g, $b, $a\\};",
5402 def SUST_B_3D_V4B32_TRAP
5404 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5405 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5406 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5407 "\\{$r, $g, $b, $a\\};",
5412 def SUST_B_1D_B8_ZERO
5414 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5415 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5417 def SUST_B_1D_B16_ZERO
5419 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5420 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5422 def SUST_B_1D_B32_ZERO
5424 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5425 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5427 def SUST_B_1D_B64_ZERO
5429 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5430 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5432 def SUST_B_1D_V2B8_ZERO
5434 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5435 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5437 def SUST_B_1D_V2B16_ZERO
5439 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5440 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5442 def SUST_B_1D_V2B32_ZERO
5444 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5445 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5447 def SUST_B_1D_V2B64_ZERO
5449 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5450 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5452 def SUST_B_1D_V4B8_ZERO
5454 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5455 Int16Regs:$b, Int16Regs:$a),
5456 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5458 def SUST_B_1D_V4B16_ZERO
5460 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5461 Int16Regs:$b, Int16Regs:$a),
5462 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5464 def SUST_B_1D_V4B32_ZERO
5466 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5467 Int32Regs:$b, Int32Regs:$a),
5468 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5472 def SUST_B_1D_ARRAY_B8_ZERO
5474 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5475 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5477 def SUST_B_1D_ARRAY_B16_ZERO
5479 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5480 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5482 def SUST_B_1D_ARRAY_B32_ZERO
5484 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5485 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5487 def SUST_B_1D_ARRAY_B64_ZERO
5489 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5490 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5492 def SUST_B_1D_ARRAY_V2B8_ZERO
5494 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5496 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5498 def SUST_B_1D_ARRAY_V2B16_ZERO
5500 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5502 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5504 def SUST_B_1D_ARRAY_V2B32_ZERO
5506 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5508 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5510 def SUST_B_1D_ARRAY_V2B64_ZERO
5512 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5514 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5516 def SUST_B_1D_ARRAY_V4B8_ZERO
5518 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5519 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5520 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5521 "\\{$r, $g, $b, $a\\};",
5523 def SUST_B_1D_ARRAY_V4B16_ZERO
5525 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5526 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5527 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5528 "\\{$r, $g, $b, $a\\};",
5530 def SUST_B_1D_ARRAY_V4B32_ZERO
5532 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5533 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5534 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5535 "\\{$r, $g, $b, $a\\};",
5539 def SUST_B_2D_B8_ZERO
5541 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5542 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5544 def SUST_B_2D_B16_ZERO
5546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5547 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5549 def SUST_B_2D_B32_ZERO
5551 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5552 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5554 def SUST_B_2D_B64_ZERO
5556 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5557 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5559 def SUST_B_2D_V2B8_ZERO
5561 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5563 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5565 def SUST_B_2D_V2B16_ZERO
5567 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5569 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5571 def SUST_B_2D_V2B32_ZERO
5573 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5575 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5577 def SUST_B_2D_V2B64_ZERO
5579 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5581 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5583 def SUST_B_2D_V4B8_ZERO
5585 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5586 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5587 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5588 "\\{$r, $g, $b, $a\\};",
5590 def SUST_B_2D_V4B16_ZERO
5592 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5593 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5594 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5595 "\\{$r, $g, $b, $a\\};",
5597 def SUST_B_2D_V4B32_ZERO
5599 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5600 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5601 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5602 "\\{$r, $g, $b, $a\\};",
5606 def SUST_B_2D_ARRAY_B8_ZERO
5608 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5610 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5612 def SUST_B_2D_ARRAY_B16_ZERO
5614 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5616 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5618 def SUST_B_2D_ARRAY_B32_ZERO
5620 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5622 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5624 def SUST_B_2D_ARRAY_B64_ZERO
5626 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5628 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5630 def SUST_B_2D_ARRAY_V2B8_ZERO
5632 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5633 Int16Regs:$r, Int16Regs:$g),
5634 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5637 def SUST_B_2D_ARRAY_V2B16_ZERO
5639 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5640 Int16Regs:$r, Int16Regs:$g),
5641 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5644 def SUST_B_2D_ARRAY_V2B32_ZERO
5646 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5647 Int32Regs:$r, Int32Regs:$g),
5648 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5651 def SUST_B_2D_ARRAY_V2B64_ZERO
5653 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5654 Int64Regs:$r, Int64Regs:$g),
5655 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5658 def SUST_B_2D_ARRAY_V4B8_ZERO
5660 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5661 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5662 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5663 "\\{$r, $g, $b, $a\\};",
5665 def SUST_B_2D_ARRAY_V4B16_ZERO
5667 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5668 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5669 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5670 "\\{$r, $g, $b, $a\\};",
5672 def SUST_B_2D_ARRAY_V4B32_ZERO
5674 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5675 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5676 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5677 "\\{$r, $g, $b, $a\\};",
5681 def SUST_B_3D_B8_ZERO
5683 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5685 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5687 def SUST_B_3D_B16_ZERO
5689 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5691 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5693 def SUST_B_3D_B32_ZERO
5695 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5697 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5699 def SUST_B_3D_B64_ZERO
5701 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5703 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5705 def SUST_B_3D_V2B8_ZERO
5707 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5708 Int16Regs:$r, Int16Regs:$g),
5709 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5712 def SUST_B_3D_V2B16_ZERO
5714 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5715 Int16Regs:$r, Int16Regs:$g),
5716 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5719 def SUST_B_3D_V2B32_ZERO
5721 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5722 Int32Regs:$r, Int32Regs:$g),
5723 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5726 def SUST_B_3D_V2B64_ZERO
5728 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5729 Int64Regs:$r, Int64Regs:$g),
5730 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5733 def SUST_B_3D_V4B8_ZERO
5735 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5736 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5737 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5738 "\\{$r, $g, $b, $a\\};",
5740 def SUST_B_3D_V4B16_ZERO
5742 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5743 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5744 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5745 "\\{$r, $g, $b, $a\\};",
5747 def SUST_B_3D_V4B32_ZERO
5749 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5750 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5751 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5752 "\\{$r, $g, $b, $a\\};",
5759 def SUST_P_1D_B8_TRAP
5761 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5762 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5764 def SUST_P_1D_B16_TRAP
5766 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5767 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5769 def SUST_P_1D_B32_TRAP
5771 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5772 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5774 def SUST_P_1D_V2B8_TRAP
5776 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5777 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5779 def SUST_P_1D_V2B16_TRAP
5781 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5782 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5784 def SUST_P_1D_V2B32_TRAP
5786 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5787 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5789 def SUST_P_1D_V4B8_TRAP
5791 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5792 Int16Regs:$b, Int16Regs:$a),
5793 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5795 def SUST_P_1D_V4B16_TRAP
5797 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5798 Int16Regs:$b, Int16Regs:$a),
5799 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5801 def SUST_P_1D_V4B32_TRAP
5803 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5804 Int32Regs:$b, Int32Regs:$a),
5805 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5809 def SUST_P_1D_ARRAY_B8_TRAP
5811 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5812 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5814 def SUST_P_1D_ARRAY_B16_TRAP
5816 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5817 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5819 def SUST_P_1D_ARRAY_B32_TRAP
5821 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5822 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5824 def SUST_P_1D_ARRAY_V2B8_TRAP
5826 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5828 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5830 def SUST_P_1D_ARRAY_V2B16_TRAP
5832 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5834 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5836 def SUST_P_1D_ARRAY_V2B32_TRAP
5838 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5840 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5842 def SUST_P_1D_ARRAY_V4B8_TRAP
5844 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5845 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5846 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5847 "\\{$r, $g, $b, $a\\};",
5849 def SUST_P_1D_ARRAY_V4B16_TRAP
5851 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5852 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5853 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5854 "\\{$r, $g, $b, $a\\};",
5856 def SUST_P_1D_ARRAY_V4B32_TRAP
5858 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5859 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5860 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5861 "\\{$r, $g, $b, $a\\};",
5865 def SUST_P_2D_B8_TRAP
5867 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5868 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5870 def SUST_P_2D_B16_TRAP
5872 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5875 def SUST_P_2D_B32_TRAP
5877 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5878 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5880 def SUST_P_2D_V2B8_TRAP
5882 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5884 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5886 def SUST_P_2D_V2B16_TRAP
5888 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5890 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5892 def SUST_P_2D_V2B32_TRAP
5894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5896 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5898 def SUST_P_2D_V4B8_TRAP
5900 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5901 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5902 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5903 "\\{$r, $g, $b, $a\\};",
5905 def SUST_P_2D_V4B16_TRAP
5907 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5908 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5909 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5910 "\\{$r, $g, $b, $a\\};",
5912 def SUST_P_2D_V4B32_TRAP
5914 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5915 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5916 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5917 "\\{$r, $g, $b, $a\\};",
5921 def SUST_P_2D_ARRAY_B8_TRAP
5923 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5925 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5927 def SUST_P_2D_ARRAY_B16_TRAP
5929 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5931 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5933 def SUST_P_2D_ARRAY_B32_TRAP
5935 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5937 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5939 def SUST_P_2D_ARRAY_V2B8_TRAP
5941 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5942 Int16Regs:$r, Int16Regs:$g),
5943 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5946 def SUST_P_2D_ARRAY_V2B16_TRAP
5948 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5949 Int16Regs:$r, Int16Regs:$g),
5950 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5953 def SUST_P_2D_ARRAY_V2B32_TRAP
5955 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5956 Int32Regs:$r, Int32Regs:$g),
5957 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5960 def SUST_P_2D_ARRAY_V4B8_TRAP
5962 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5964 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5965 "\\{$r, $g, $b, $a\\};",
5967 def SUST_P_2D_ARRAY_V4B16_TRAP
5969 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5970 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5971 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5972 "\\{$r, $g, $b, $a\\};",
5974 def SUST_P_2D_ARRAY_V4B32_TRAP
5976 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5977 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5978 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5979 "\\{$r, $g, $b, $a\\};",
5983 def SUST_P_3D_B8_TRAP
5985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5987 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5989 def SUST_P_3D_B16_TRAP
5991 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5993 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5995 def SUST_P_3D_B32_TRAP
5997 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5999 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6001 def SUST_P_3D_V2B8_TRAP
6003 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6004 Int16Regs:$r, Int16Regs:$g),
6005 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6008 def SUST_P_3D_V2B16_TRAP
6010 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6011 Int16Regs:$r, Int16Regs:$g),
6012 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6015 def SUST_P_3D_V2B32_TRAP
6017 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6018 Int32Regs:$r, Int32Regs:$g),
6019 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6022 def SUST_P_3D_V4B8_TRAP
6024 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6025 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6026 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6027 "\\{$r, $g, $b, $a\\};",
6029 def SUST_P_3D_V4B16_TRAP
6031 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6032 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6033 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6034 "\\{$r, $g, $b, $a\\};",
6036 def SUST_P_3D_V4B32_TRAP
6038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6039 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6040 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6041 "\\{$r, $g, $b, $a\\};",
6045 // Surface store instruction patterns
6046 // I'm not sure why we can't just include these in the instruction definitions,
6047 // but TableGen complains of type errors :(
6050 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6051 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6052 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6054 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6055 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6056 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6058 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6059 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6060 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6062 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6063 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6064 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6066 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6067 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6068 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6069 Int16Regs:$r, Int16Regs:$g)>;
6071 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6072 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6073 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6074 Int16Regs:$r, Int16Regs:$g)>;
6076 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6077 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6078 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6079 Int32Regs:$r, Int32Regs:$g)>;
6081 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6082 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6083 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6084 Int64Regs:$r, Int64Regs:$g)>;
6086 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6087 Int64Regs:$s, Int32Regs:$x,
6088 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6089 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6090 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6092 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6093 Int64Regs:$s, Int32Regs:$x,
6094 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6095 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6096 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6098 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6099 Int64Regs:$s, Int32Regs:$x,
6100 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6101 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6102 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6106 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6107 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6108 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6111 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6112 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6113 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6116 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6118 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6121 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6122 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6123 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6126 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6127 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6128 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129 Int16Regs:$r, Int16Regs:$g)>;
6131 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6133 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6134 Int16Regs:$r, Int16Regs:$g)>;
6136 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6137 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6138 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139 Int32Regs:$r, Int32Regs:$g)>;
6141 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6143 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6144 Int64Regs:$r, Int64Regs:$g)>;
6146 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6147 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6148 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6149 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6150 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6152 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6153 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6154 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6155 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6156 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6158 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6159 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6160 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6161 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6162 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6166 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6167 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6168 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6171 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6172 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6173 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6176 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6178 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6181 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6183 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6186 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6188 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189 Int16Regs:$r, Int16Regs:$g)>;
6191 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6192 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6193 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6194 Int16Regs:$r, Int16Regs:$g)>;
6196 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6197 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6198 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199 Int32Regs:$r, Int32Regs:$g)>;
6201 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6202 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6203 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6204 Int64Regs:$r, Int64Regs:$g)>;
6206 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6207 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6208 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6209 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6210 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6212 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6213 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6215 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6216 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6218 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6219 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6220 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6221 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6222 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6226 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6227 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6228 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6229 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6232 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6233 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6234 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6235 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6238 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6239 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6240 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6241 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6244 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6245 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6246 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6247 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6250 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6251 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6252 Int16Regs:$r, Int16Regs:$g),
6253 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6254 Int32Regs:$x, Int32Regs:$y,
6255 Int16Regs:$r, Int16Regs:$g)>;
6257 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6258 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6259 Int16Regs:$r, Int16Regs:$g),
6260 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6261 Int32Regs:$x, Int32Regs:$y,
6262 Int16Regs:$r, Int16Regs:$g)>;
6264 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6265 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6267 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6268 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6270 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6271 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6273 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6274 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6276 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6277 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6278 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6279 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6280 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6281 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6283 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6284 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6285 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6286 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6287 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6288 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6290 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6291 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6292 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6293 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6294 Int32Regs:$x, Int32Regs:$y,
6295 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6299 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6300 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6302 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6303 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6306 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6307 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6309 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6310 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6313 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6314 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6316 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6317 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6320 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6321 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6323 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6324 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6327 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6328 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6329 Int16Regs:$r, Int16Regs:$g),
6330 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6331 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6332 Int16Regs:$r, Int16Regs:$g)>;
6334 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6335 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6336 Int16Regs:$r, Int16Regs:$g),
6337 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6338 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6339 Int16Regs:$r, Int16Regs:$g)>;
6341 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6342 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6343 Int32Regs:$r, Int32Regs:$g),
6344 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6345 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6346 Int32Regs:$r, Int32Regs:$g)>;
6348 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6349 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6350 Int64Regs:$r, Int64Regs:$g),
6351 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6352 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6353 Int64Regs:$r, Int64Regs:$g)>;
6355 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6356 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6357 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6358 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6359 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6360 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6362 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6363 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6364 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6365 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6366 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6367 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6369 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6370 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6371 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6372 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6373 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6374 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6378 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6379 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6380 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6382 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6383 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6384 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6386 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6387 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6388 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6390 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6391 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6392 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6394 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6395 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6396 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6397 Int16Regs:$r, Int16Regs:$g)>;
6399 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6400 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6401 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6402 Int16Regs:$r, Int16Regs:$g)>;
6404 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6405 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6406 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6407 Int32Regs:$r, Int32Regs:$g)>;
6409 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6410 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6411 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6412 Int64Regs:$r, Int64Regs:$g)>;
6414 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6415 Int64Regs:$s, Int32Regs:$x,
6416 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6417 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6418 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6420 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6421 Int64Regs:$s, Int32Regs:$x,
6422 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6423 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6424 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6426 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6427 Int64Regs:$s, Int32Regs:$x,
6428 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6429 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6430 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6434 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6435 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6436 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6439 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6440 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6441 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6444 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6446 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6449 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6450 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6451 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6454 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6455 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6456 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457 Int16Regs:$r, Int16Regs:$g)>;
6459 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6460 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6461 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6462 Int16Regs:$r, Int16Regs:$g)>;
6464 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6465 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6466 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6467 Int32Regs:$r, Int32Regs:$g)>;
6469 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6470 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6471 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6472 Int64Regs:$r, Int64Regs:$g)>;
6474 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6475 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6476 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6477 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6478 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6480 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6481 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6482 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6483 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6484 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6486 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6487 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6488 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6489 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6490 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6494 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6495 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6496 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6499 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6500 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6501 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6504 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6505 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6506 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6509 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6511 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6514 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6515 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6516 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517 Int16Regs:$r, Int16Regs:$g)>;
6519 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6520 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6521 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6522 Int16Regs:$r, Int16Regs:$g)>;
6524 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6525 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6526 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6527 Int32Regs:$r, Int32Regs:$g)>;
6529 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6530 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6531 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6532 Int64Regs:$r, Int64Regs:$g)>;
6534 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6535 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6536 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6537 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6538 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6540 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6541 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6542 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6543 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6544 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6546 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6547 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6548 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6549 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6550 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6554 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6555 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6556 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6557 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6560 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6561 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6562 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6563 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6566 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6567 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6568 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6569 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6572 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6573 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6574 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6575 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6578 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6579 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6580 Int16Regs:$r, Int16Regs:$g),
6581 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6582 Int32Regs:$x, Int32Regs:$y,
6583 Int16Regs:$r, Int16Regs:$g)>;
6585 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6586 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6587 Int16Regs:$r, Int16Regs:$g),
6588 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6589 Int32Regs:$x, Int32Regs:$y,
6590 Int16Regs:$r, Int16Regs:$g)>;
6592 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6593 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6595 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6596 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6598 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6599 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6601 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6602 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6604 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6605 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6606 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6607 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6608 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6609 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6611 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6612 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6613 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6614 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6615 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6616 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6618 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6619 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6620 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6621 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6622 Int32Regs:$x, Int32Regs:$y,
6623 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6627 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6628 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6630 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6631 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6634 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6635 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6637 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6638 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6641 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6642 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6644 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6645 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6648 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6649 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6651 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6652 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6655 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6656 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6657 Int16Regs:$r, Int16Regs:$g),
6658 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6659 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6660 Int16Regs:$r, Int16Regs:$g)>;
6662 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6663 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6664 Int16Regs:$r, Int16Regs:$g),
6665 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6666 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6667 Int16Regs:$r, Int16Regs:$g)>;
6669 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6670 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6671 Int32Regs:$r, Int32Regs:$g),
6672 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6673 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6674 Int32Regs:$r, Int32Regs:$g)>;
6676 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6677 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6678 Int64Regs:$r, Int64Regs:$g),
6679 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6680 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6681 Int64Regs:$r, Int64Regs:$g)>;
6683 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6684 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6685 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6686 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6687 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6688 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6690 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6691 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6692 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6693 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6694 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6695 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6697 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6698 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6699 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6700 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6701 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6702 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6706 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6707 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6708 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6710 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6711 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6712 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6714 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6715 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6716 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6718 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6719 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6720 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6722 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6723 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6724 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6725 Int16Regs:$r, Int16Regs:$g)>;
6727 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6728 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6729 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6730 Int16Regs:$r, Int16Regs:$g)>;
6732 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6733 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6734 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6735 Int32Regs:$r, Int32Regs:$g)>;
6737 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6738 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6739 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6740 Int64Regs:$r, Int64Regs:$g)>;
6742 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6743 Int64Regs:$s, Int32Regs:$x,
6744 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6745 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6746 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6748 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6749 Int64Regs:$s, Int32Regs:$x,
6750 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6751 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6752 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6754 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6755 Int64Regs:$s, Int32Regs:$x,
6756 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6757 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6758 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6762 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6763 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6764 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6767 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6768 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6769 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6772 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6774 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6777 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6778 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6779 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6782 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6783 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6784 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6785 Int16Regs:$r, Int16Regs:$g)>;
6787 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6788 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6789 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6790 Int16Regs:$r, Int16Regs:$g)>;
6792 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6793 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6794 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6795 Int32Regs:$r, Int32Regs:$g)>;
6797 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6798 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6799 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6800 Int64Regs:$r, Int64Regs:$g)>;
6802 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6803 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6804 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6805 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6806 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6808 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6809 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6810 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6811 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6812 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6814 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6815 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6816 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6817 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6818 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6822 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6823 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6824 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6827 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6828 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6829 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6832 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6833 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6834 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6837 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6838 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6839 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6842 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6843 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6844 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6845 Int16Regs:$r, Int16Regs:$g)>;
6847 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6848 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6849 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6850 Int16Regs:$r, Int16Regs:$g)>;
6852 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6853 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6854 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6855 Int32Regs:$r, Int32Regs:$g)>;
6857 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6858 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6859 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6860 Int64Regs:$r, Int64Regs:$g)>;
6862 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6863 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6864 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6865 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6866 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6868 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6869 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6870 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6871 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6872 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6874 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6875 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6876 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6877 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6878 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6882 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6883 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6884 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6885 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6888 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6889 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6890 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6891 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6894 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6895 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6896 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6897 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6900 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6901 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6902 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6903 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6906 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6907 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6908 Int16Regs:$r, Int16Regs:$g),
6909 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6910 Int32Regs:$x, Int32Regs:$y,
6911 Int16Regs:$r, Int16Regs:$g)>;
6913 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6914 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6915 Int16Regs:$r, Int16Regs:$g),
6916 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6917 Int32Regs:$x, Int32Regs:$y,
6918 Int16Regs:$r, Int16Regs:$g)>;
6920 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6921 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6923 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6924 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6926 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6927 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6929 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6930 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6932 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6933 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6934 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6935 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6936 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6937 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6939 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6940 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6941 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6942 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6943 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6944 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6946 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6947 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6948 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6949 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6950 Int32Regs:$x, Int32Regs:$y,
6951 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6955 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6956 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6958 (SUST_B_3D_B8_ZERO Int64Regs:$s,
6959 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6962 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6963 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6965 (SUST_B_3D_B16_ZERO Int64Regs:$s,
6966 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6969 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6970 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6972 (SUST_B_3D_B32_ZERO Int64Regs:$s,
6973 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6976 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6977 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6979 (SUST_B_3D_B64_ZERO Int64Regs:$s,
6980 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6983 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6984 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6985 Int16Regs:$r, Int16Regs:$g),
6986 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6987 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6988 Int16Regs:$r, Int16Regs:$g)>;
6990 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6991 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6992 Int16Regs:$r, Int16Regs:$g),
6993 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6994 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6995 Int16Regs:$r, Int16Regs:$g)>;
6997 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6998 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6999 Int32Regs:$r, Int32Regs:$g),
7000 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
7001 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7002 Int32Regs:$r, Int32Regs:$g)>;
7004 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
7005 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7006 Int64Regs:$r, Int64Regs:$g),
7007 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
7008 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7009 Int64Regs:$r, Int64Regs:$g)>;
7011 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
7012 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7013 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7014 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7015 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7016 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7018 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7019 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7020 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7021 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7022 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7023 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7025 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7026 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7027 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7028 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7029 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7030 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7035 def : Pat<(int_nvvm_sust_p_1d_i8_trap
7036 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7037 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7039 def : Pat<(int_nvvm_sust_p_1d_i16_trap
7040 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7041 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7043 def : Pat<(int_nvvm_sust_p_1d_i32_trap
7044 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7045 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7047 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7048 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7049 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7050 Int16Regs:$r, Int16Regs:$g)>;
7052 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7053 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7054 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7055 Int16Regs:$r, Int16Regs:$g)>;
7057 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7058 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7059 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7060 Int32Regs:$r, Int32Regs:$g)>;
7062 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7063 Int64Regs:$s, Int32Regs:$x,
7064 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7065 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7066 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7068 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7069 Int64Regs:$s, Int32Regs:$x,
7070 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7071 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7072 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7074 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7075 Int64Regs:$s, Int32Regs:$x,
7076 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7077 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7078 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7082 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7083 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7084 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7087 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7088 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7089 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7092 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7093 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7094 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7097 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7098 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7099 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7100 Int16Regs:$r, Int16Regs:$g)>;
7102 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7103 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7104 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7105 Int16Regs:$r, Int16Regs:$g)>;
7107 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7109 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7110 Int32Regs:$r, Int32Regs:$g)>;
7112 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7113 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7114 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7115 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7116 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7118 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7119 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7120 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7121 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7122 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7124 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7125 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7126 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7127 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7128 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7132 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7133 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7134 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7137 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7138 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7139 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7142 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7143 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7144 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7147 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7148 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7149 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7150 Int16Regs:$r, Int16Regs:$g)>;
7152 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7153 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7154 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7155 Int16Regs:$r, Int16Regs:$g)>;
7157 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7158 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7159 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7160 Int32Regs:$r, Int32Regs:$g)>;
7162 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7163 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7164 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7165 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7166 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7168 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7169 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7170 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7171 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7172 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7174 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7175 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7176 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7177 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7178 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7182 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7183 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7184 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7185 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7188 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7189 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7190 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7191 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7194 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7195 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7196 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7197 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7200 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7201 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7202 Int16Regs:$r, Int16Regs:$g),
7203 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7204 Int32Regs:$x, Int32Regs:$y,
7205 Int16Regs:$r, Int16Regs:$g)>;
7207 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7208 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7209 Int16Regs:$r, Int16Regs:$g),
7210 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7211 Int32Regs:$x, Int32Regs:$y,
7212 Int16Regs:$r, Int16Regs:$g)>;
7214 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7215 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7217 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7218 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7220 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7221 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7222 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7223 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7224 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7225 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7227 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7228 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7229 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7230 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7231 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7232 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7234 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7235 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7236 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7237 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7238 Int32Regs:$x, Int32Regs:$y,
7239 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7243 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7244 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7246 (SUST_P_3D_B8_TRAP Int64Regs:$s,
7247 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7250 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7251 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7253 (SUST_P_3D_B16_TRAP Int64Regs:$s,
7254 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7257 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7258 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7260 (SUST_P_3D_B32_TRAP Int64Regs:$s,
7261 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7264 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7265 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7266 Int16Regs:$r, Int16Regs:$g),
7267 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7268 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7269 Int16Regs:$r, Int16Regs:$g)>;
7271 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7272 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7273 Int16Regs:$r, Int16Regs:$g),
7274 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7275 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7276 Int16Regs:$r, Int16Regs:$g)>;
7278 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7279 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7280 Int32Regs:$r, Int32Regs:$g),
7281 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7282 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7283 Int32Regs:$r, Int32Regs:$g)>;
7285 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7286 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7287 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7288 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7289 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7290 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7292 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7293 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7294 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7295 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7296 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7297 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7299 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7300 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7301 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7302 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7303 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7304 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7306 //-----------------------------------
7307 // Read Special Registers
7308 //-----------------------------------
7310 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7311 : NVPTXInst<(outs Int64Regs:$d), (ins),
7312 !strconcat("mov.u64 \t$d, %", regname, ";"),
7313 [(set Int64Regs:$d, (intop))]>;
7315 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7316 : NVPTXInst<(outs Int32Regs:$d), (ins),
7317 !strconcat("mov.u32 \t$d, %", regname, ";"),
7318 [(set Int32Regs:$d, (intop))]>;
7320 // TODO Add read vector-version of special registers
7322 def INT_PTX_SREG_TID_X :
7323 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7324 def INT_PTX_SREG_TID_Y :
7325 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7326 def INT_PTX_SREG_TID_Z :
7327 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7328 def INT_PTX_SREG_TID_W :
7329 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7331 def INT_PTX_SREG_NTID_X :
7332 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7333 def INT_PTX_SREG_NTID_Y :
7334 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7335 def INT_PTX_SREG_NTID_Z :
7336 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7337 def INT_PTX_SREG_NTID_W :
7338 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7340 def INT_PTX_SREG_LANEID :
7341 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7342 def INT_PTX_SREG_WARPID :
7343 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7344 def INT_PTX_SREG_NWARPID :
7345 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7347 def INT_PTX_SREG_CTAID_X :
7348 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7349 def INT_PTX_SREG_CTAID_Y :
7350 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7351 def INT_PTX_SREG_CTAID_Z :
7352 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7353 def INT_PTX_SREG_CTAID_W :
7354 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7356 def INT_PTX_SREG_NCTAID_X :
7357 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7358 def INT_PTX_SREG_NCTAID_Y :
7359 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7360 def INT_PTX_SREG_NCTAID_Z :
7361 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7362 def INT_PTX_SREG_NCTAID_W :
7363 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7365 def INT_PTX_SREG_SMID :
7366 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7367 def INT_PTX_SREG_NSMID :
7368 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7369 def INT_PTX_SREG_GRIDID :
7370 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7372 def INT_PTX_SREG_LANEMASK_EQ :
7373 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7374 def INT_PTX_SREG_LANEMASK_LE :
7375 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7376 def INT_PTX_SREG_LANEMASK_LT :
7377 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7378 def INT_PTX_SREG_LANEMASK_GE :
7379 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7380 def INT_PTX_SREG_LANEMASK_GT :
7381 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7383 def INT_PTX_SREG_CLOCK :
7384 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7385 def INT_PTX_SREG_CLOCK64 :
7386 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7388 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7389 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7390 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7391 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7393 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7394 // handle the constant.
7395 def INT_PTX_SREG_WARPSIZE :
7396 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7397 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7399 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7400 // In addition to target-independent fields provided by WMMA_REGS, it adds
7401 // the fields commonly used to implement specific PTX instruction -- register
7402 // types and names, constraints, parts of assembly, etc.
7403 class WMMA_REGINFO<WMMA_REGS r>
7404 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7405 // NVPTX register types used to carry fragment data.
7406 NVPTXRegClass regclass = !cond(
7407 !eq(ptx_elt_type, "f16") : Float16x2Regs,
7408 !eq(ptx_elt_type, "f32") : Float32Regs,
7409 !eq(ptx_elt_type, "s32") : Int32Regs,
7410 !eq(ptx_elt_type, "s8") : Int32Regs,
7411 !eq(ptx_elt_type, "u8") : Int32Regs,
7412 !eq(ptx_elt_type, "s4") : Int32Regs,
7413 !eq(ptx_elt_type, "u4") : Int32Regs,
7414 !eq(ptx_elt_type, "b1") : Int32Regs);
7416 // Instruction input/output arguments for the fragment.
7417 list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
7419 // List of register names for the fragment -- ["ra0", "ra1",...]
7420 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7422 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7423 string regstring = "{{$" # !head(reg_names)
7424 # !foldl("", !tail(reg_names), a, b,
7425 !strconcat(a, ", $", b))
7428 // Predicates for particular fragment variant. Technically those are
7429 // per-instruction predicates, but currently all fragments that can be used in
7430 // a given instruction are subject to the same constraints, so an instruction
7431 // can use predicates from any of its fragments. If/when this is no
7432 // longer the case, we can concat all per-fragment predicates to enforce that
7433 // all fragments of the instruction are viable.
7434 list<Predicate> Predicates = !cond(
7435 // fp16 -> fp16/fp32 @ m16n16k16
7436 !and(!eq(geom, "m16n16k16"),
7437 !or(!eq(ptx_elt_type, "f16"),
7438 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7440 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7441 !and(!or(!eq(geom, "m8n32k16"),
7442 !eq(geom, "m32n8k16")),
7443 !or(!eq(ptx_elt_type, "f16"),
7444 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7446 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7447 !and(!or(!eq(geom,"m16n16k16"),
7448 !eq(geom,"m8n32k16"),
7449 !eq(geom,"m32n8k16")),
7450 !or(!eq(ptx_elt_type, "u8"),
7451 !eq(ptx_elt_type, "s8"),
7452 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7454 // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
7455 !or(!eq(geom,"m8n8k128"),
7456 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]);
7458 // template DAGs for instruction inputs/output.
7459 dag Outs = !dag(outs, ptx_regs, reg_names);
7460 dag Ins = !dag(ins, ptx_regs, reg_names);
7463 // Convert dag of arguments into a dag to match given intrinsic.
7464 class BuildPatternI<Intrinsic Intr, dag Ins> {
7465 // Build a dag pattern that matches the intrinsic call.
7466 dag ret = !foreach(tmp, Ins,
7467 !subst(imem, ADDRvar,
7468 !subst(MEMri64, ADDRri64,
7469 !subst(MEMri, ADDRri,
7470 !subst(ins, Intr, tmp)))));
7473 // Same as above, but uses PatFrag instead of an Intrinsic.
7474 class BuildPatternPF<PatFrag Intr, dag Ins> {
7475 // Build a dag pattern that matches the intrinsic call.
7476 dag ret = !foreach(tmp, Ins,
7477 !subst(imem, ADDRvar,
7478 !subst(MEMri64, ADDRri64,
7479 !subst(MEMri, ADDRri,
7480 !subst(ins, Intr, tmp)))));
7483 // Common WMMA-related fields used for building patterns for all MMA instructions.
7484 class WMMA_INSTR<string _Intr, list<dag> _Args>
7485 : NVPTXInst<(outs), (ins), "?", []> {
7486 Intrinsic Intr = !cast<Intrinsic>(_Intr);
7487 // Concatenate all arguments into a single dag.
7488 dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7489 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7490 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7494 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7497 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7499 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7500 [!con((ins SrcOp:$src),
7501 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7502 Requires<Frag.Predicates> {
7503 // Load/store intrinsics are overloaded on pointer's address space.
7504 // To match the right intrinsic, we need to build AS-constrained PatFrag.
7505 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7506 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7507 // Build PatFrag that only matches particular address space.
7508 PatFrag IntrFrag = PatFrag<PFOperands,
7509 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7510 !cond(!eq(Space, ".shared"): AS_match.shared,
7511 !eq(Space, ".global"): AS_match.global,
7512 1: AS_match.generic)>;
7513 // Build AS-constrained pattern.
7514 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7516 let OutOperandList = Frag.Outs;
7517 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7518 let AsmString = "wmma.load."
7525 # "." # Frag.ptx_elt_type # " \t"
7528 # !if(WithStride, ", $ldm", "")
7533 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7535 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7536 bit WithStride, DAGOperand DstOp>
7537 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7538 [!con((ins DstOp:$dst),
7540 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7541 Requires<Frag.Predicates> {
7543 // Load/store intrinsics are overloaded on pointer's address space.
7544 // To match the right intrinsic, we need to build AS-constrained PatFrag.
7545 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7546 dag PFOperands = !con((ops node:$dst),
7547 !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
7548 !if(WithStride, (ops node:$ldm), (ops)));
7549 // Build PatFrag that only matches particular address space.
7550 PatFrag IntrFrag = PatFrag<PFOperands,
7551 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7552 !cond(!eq(Space, ".shared"): AS_match.shared,
7553 !eq(Space, ".global"): AS_match.global,
7554 1: AS_match.generic)>;
7555 // Build AS-constrained pattern.
7556 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7558 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7559 let OutOperandList = (outs);
7560 let AsmString = "wmma.store.d.sync"
7565 # "." # Frag.ptx_elt_type
7568 # !if(WithStride, ", $ldm", "")
7572 // Create all load/store variants
7573 defset list<WMMA_INSTR> MMA_LDSTs = {
7574 foreach layout = ["row", "col"] in {
7575 foreach stride = [0, 1] in {
7576 foreach space = [".global", ".shared", ""] in {
7577 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7578 foreach frag = NVVM_MMA_OPS.all_ld_ops in
7579 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7580 def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7581 foreach frag = NVVM_MMA_OPS.all_st_ops in
7582 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7583 def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7591 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7592 WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7593 string ALayout, string BLayout, int Satfinite>
7594 : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
7595 [FragA.Ins, FragB.Ins, FragC.Ins]>,
7596 // Requires does not seem to have effect on Instruction w/o Patterns.
7597 // We set it here anyways and propagate to the Pat<> we construct below.
7598 Requires<FragA.Predicates> {
7599 let OutOperandList = FragD.Outs;
7600 let InOperandList = !con(Args, (ins MmaCode:$ptx));
7601 string TypeList = !cond(
7602 !eq(FragD.ptx_elt_type, "s32") : ".s32"
7603 # "." # FragA.ptx_elt_type
7604 # "." # FragB.ptx_elt_type
7606 1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
7608 let AsmString = "wmma.mma"
7609 # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
7616 # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7617 # FragD.regstring # ",\n\t\t"
7618 # FragA.regstring # ",\n\t\t"
7619 # FragB.regstring # ",\n\t\t"
7620 # FragC.regstring # ";";
7623 defset list<WMMA_INSTR> MMAs = {
7624 foreach layout_a = ["row", "col"] in {
7625 foreach layout_b = ["row", "col"] in {
7626 foreach satf = [0, 1] in {
7627 foreach op = NVVM_MMA_OPS.all_mma_ops in {
7628 foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
7629 def : WMMA_MMA<WMMA_REGINFO<op[0]>,
7630 WMMA_REGINFO<op[1]>,
7631 WMMA_REGINFO<op[2]>,
7632 WMMA_REGINFO<op[3]>,
7633 layout_a, layout_b, satf>;
7642 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7643 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7644 // the instruction record.
7645 class WMMA_PAT<WMMA_INSTR wi>
7646 : Pat<wi.IntrinsicPattern,
7647 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7649 Requires<wi.Predicates>;
7651 // Build intrinsic->instruction patterns for all MMA instructions.
7652 foreach mma = !listconcat(MMAs, MMA_LDSTs) in
7653 def : WMMA_PAT<mma>;