[obj2yaml] - Fix BB after r373315.
[llvm-complete.git] / lib / Target / NVPTX / NVPTXIntrinsics.td
blob1752d3e0575e63eb0e198790acb09a25d7b9f5ca
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10     float f = (float)N->getValueAPF().convertToFloat();
11     return (f==0.0f);
12 }]>;
14 def immFloat1 : PatLeaf<(fpimm), [{
15     float f = (float)N->getValueAPF().convertToFloat();
16     return (f==1.0f);
17 }]>;
19 def immDouble0 : PatLeaf<(fpimm), [{
20     double d = (double)N->getValueAPF().convertToDouble();
21     return (d==0.0);
22 }]>;
24 def immDouble1 : PatLeaf<(fpimm), [{
25     double d = (double)N->getValueAPF().convertToDouble();
26     return (d==1.0);
27 }]>;
29 def AS_match {
30   code generic = [{
31    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32   }];
33   code shared = [{
34    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35   }];
36   code global = [{
37    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38   }];
41 // A node that will be replaced with the current PTX version.
42 class PTX {
43   SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44     return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45   }]>;
46   // (i32 0) will be XForm'ed to the currently used PTX version.
47   dag version = (PTXVerXform (i32 0));
49 def ptx : PTX;
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54   list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
55                                         [prefix # !add(n, -1)]),
56                             []);
59 //-----------------------------------
60 // Synchronization and shuffle functions
61 //-----------------------------------
62 let isConvergent = 1 in {
63 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
64                   "bar.sync \t0;",
65       [(int_nvvm_barrier0)]>;
66 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
67                   "bar.sync \t$src1;",
68       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
69 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
70                   "bar.sync \t$src1, $src2;",
71       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
72 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
73   !strconcat("{{ \n\t",
74              ".reg .pred \t%p1; \n\t",
75              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
76              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
77              "}}"),
78       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
79 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
80   !strconcat("{{ \n\t",
81              ".reg .pred \t%p1; \n\t",
82              ".reg .pred \t%p2; \n\t",
83              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
84              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
85              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
86              "}}"),
87       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
88 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
89   !strconcat("{{ \n\t",
90              ".reg .pred \t%p1; \n\t",
91              ".reg .pred \t%p2; \n\t",
92              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
93              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
94              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
95              "}}"),
96       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
98 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
99                              [(int_nvvm_bar_sync imm:$i)]>;
101 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
102                              [(int_nvvm_bar_warp_sync imm:$i)]>,
103         Requires<[hasPTX60, hasSM30]>;
104 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
105                              [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
106         Requires<[hasPTX60, hasSM30]>;
108 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
109                                    [(int_nvvm_barrier_sync imm:$i)]>,
110         Requires<[hasPTX60, hasSM30]>;
111 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
112                                    [(int_nvvm_barrier_sync Int32Regs:$i)]>,
113         Requires<[hasPTX60, hasSM30]>;
115 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
116                  "barrier.sync \t$id, $cnt;",
117                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
118         Requires<[hasPTX60, hasSM30]>;
119 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
120                  "barrier.sync \t$id, $cnt;",
121                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
122         Requires<[hasPTX60, hasSM30]>;
123 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
124                  "barrier.sync \t$id, $cnt;",
125                  [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
126         Requires<[hasPTX60, hasSM30]>;
127 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
128                  "barrier.sync \t$id, $cnt;",
129                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
130         Requires<[hasPTX60, hasSM30]>;
133 // shfl.{up,down,bfly,idx}.b32
134 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
135   // The last two parameters to shfl can be regs or imms.  ptxas is smart
136   // enough to inline constant registers, so strictly speaking we don't need to
137   // handle immediates here.  But it's easy enough, and it makes our ptx more
138   // readable.
139   def reg : NVPTXInst<
140       (outs regclass:$dst),
141       (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
142       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
143       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
145   def imm1 : NVPTXInst<
146       (outs regclass:$dst),
147       (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
148       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
149       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
151   def imm2 : NVPTXInst<
152       (outs regclass:$dst),
153       (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
154       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
155       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
157   def imm3 : NVPTXInst<
158       (outs regclass:$dst),
159       (ins regclass:$src, i32imm:$offset, i32imm:$mask),
160       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
161       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
164 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
165 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
166 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
167 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
168 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
169 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
170 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
171 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
173 multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
174   // Threadmask and the last two parameters to shfl.sync can be regs or imms.
175   // ptxas is smart enough to inline constant registers, so strictly speaking we
176   // don't need to handle immediates here.  But it's easy enough, and it makes
177   // our ptx more readable.
178   def rrr : NVPTXInst<
179       (outs regclass:$dst),
180       (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
181       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
182       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
183                             Int32Regs:$offset, Int32Regs:$mask))]>;
185   def rri : NVPTXInst<
186       (outs regclass:$dst),
187       (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
188       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
189       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
190                             Int32Regs:$offset, imm:$mask))]>;
192   def rir : NVPTXInst<
193       (outs regclass:$dst),
194       (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
195       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
196       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
197                             imm:$offset, Int32Regs:$mask))]>;
199   def rii : NVPTXInst<
200       (outs regclass:$dst),
201       (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
202       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
203       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
204                             imm:$offset, imm:$mask))]>;
206   def irr : NVPTXInst<
207       (outs regclass:$dst),
208       (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
209       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
210       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
211                             Int32Regs:$offset, Int32Regs:$mask))]>;
213   def iri : NVPTXInst<
214       (outs regclass:$dst),
215       (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
216       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
217       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
218                             Int32Regs:$offset, imm:$mask))]>;
220   def iir : NVPTXInst<
221       (outs regclass:$dst),
222       (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
223       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
224       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
225                             imm:$offset, Int32Regs:$mask))]>;
227   def iii : NVPTXInst<
228       (outs regclass:$dst),
229       (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
230       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
231       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
232                             imm:$offset, imm:$mask))]>;
235 // On sm_70 these don't have to be convergent, so we may eventually want to
236 // implement non-convergent variant of this intrinsic.
237 defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
238 defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
239 defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
240 defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
241 defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
242 defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
243 defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
244 defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
247 // vote.{all,any,uni,ballot}
248 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
249   def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
250               "vote." # mode # " \t$dest, $pred;",
251               [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
252         Requires<[hasPTX60, hasSM30]>;
255 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
256 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
257 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
258 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
260 // vote.sync.{all,any,uni,ballot}
261 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
262   def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
263               "vote.sync." # mode # " \t$dest, $pred, $mask;",
264               [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
265           Requires<[hasPTX60, hasSM30]>;
266   def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
267               "vote.sync." # mode #" \t$dest, $pred, $mask;",
268               [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
269           Requires<[hasPTX60, hasSM30]>;
272 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
273 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
274 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
275 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
277 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
278                           Operand ImmOp> {
279   def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
280               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
281               [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
282            Requires<[hasPTX60, hasSM70]>;
283   def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
284               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
285               [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
286            Requires<[hasPTX60, hasSM70]>;
287   def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
288               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
289               [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
290            Requires<[hasPTX60, hasSM70]>;
291   def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
292               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
293               [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
294            Requires<[hasPTX60, hasSM70]>;
297 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
298                                         i32imm>;
299 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
300                                         i64imm>;
302 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
303                           Operand ImmOp> {
304   def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
305                      (ins i32imm:$mask, ImmOp:$value),
306               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
307               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
308            Requires<[hasPTX60, hasSM70]>;
309   def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
310                      (ins Int32Regs:$mask, ImmOp:$value),
311               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
312               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
313            Requires<[hasPTX60, hasSM70]>;
314   def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
315                      (ins i32imm:$mask, regclass:$value),
316               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
317               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
318            Requires<[hasPTX60, hasSM70]>;
319   def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
320                      (ins Int32Regs:$mask, regclass:$value),
321               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
322               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
323            Requires<[hasPTX60, hasSM70]>;
325 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
326                                          i32imm>;
327 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
328                                          i64imm>;
330 } // isConvergent = 1
332 //-----------------------------------
333 // Explicit Memory Fence Functions
334 //-----------------------------------
335 class MEMBAR<string StrOp, Intrinsic IntOP> :
336               NVPTXInst<(outs), (ins),
337             StrOp, [(IntOP)]>;
339 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
340 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
341 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
344 //-----------------------------------
345 // Math Functions
346 //-----------------------------------
348 // Map min(1.0, max(0.0, x)) to sat(x)
349 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
350 // NaN
351 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
352 // Same story for fmax, fmin.
354 def : Pat<(int_nvvm_fmin_f immFloat1,
355             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
356           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
357 def : Pat<(int_nvvm_fmin_f immFloat1,
358             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
359           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
360 def : Pat<(int_nvvm_fmin_f
361             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
362           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
363 def : Pat<(int_nvvm_fmin_f
364             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
365           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
367 def : Pat<(int_nvvm_fmin_d immDouble1,
368             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
369           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
370 def : Pat<(int_nvvm_fmin_d immDouble1,
371             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
372           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
373 def : Pat<(int_nvvm_fmin_d
374             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
375           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
376 def : Pat<(int_nvvm_fmin_d
377             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
378           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
381 // We need a full string for OpcStr here because we need to deal with case like
382 // INT_PTX_RECIP.
383 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
384   NVPTXRegClass src_regclass, Intrinsic IntOP>
385             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
386             OpcStr,
387         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
389 // We need a full string for OpcStr here because we need to deal with the case
390 // like INT_PTX_NATIVE_POWR_F.
391 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
392   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
393             : NVPTXInst<(outs t_regclass:$dst),
394               (ins s0_regclass:$src0, s1_regclass:$src1),
395             OpcStr,
396         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
398 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
399   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
400   NVPTXRegClass s2_regclass, Intrinsic IntOP>
401             : NVPTXInst<(outs t_regclass:$dst),
402               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
403             OpcStr,
404         [(set t_regclass:$dst,
405           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
408 // MISC
411 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
412   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
415 // Min Max
418 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
419   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
420 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
421   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
423 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
424   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
425 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
426   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
428 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
429   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
430 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
431   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
435 // Multiplication
438 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
439   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
440 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
441   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
443 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
444   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
445 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
446   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
448 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
449   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
450 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
451   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
452 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
453   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
454 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
455   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
456 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
457   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
458 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
459   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
460 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
461   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
462 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
463   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
465 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
466   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
467 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
468   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
469 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
470   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
471 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
472   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
474 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
475   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
476 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
477   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
480 // Div
483 def INT_NVVM_DIV_APPROX_FTZ_F
484   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
485     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
486 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
487   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
489 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
490   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
491 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
492   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
493 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
494   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
495 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
496   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
497 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
498   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
499 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
500   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
501 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
502   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
503 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
504   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
506 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
507   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
508 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
509   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
510 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
511   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
512 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
513   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
516 // Sad
519 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
520   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
521 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
522   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
525 // Floor  Ceil
528 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
529           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
530 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
531           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
532 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
533           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
535 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
536           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
537 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
538           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
539 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
540           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
543 // Abs
546 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
547   Float32Regs, int_nvvm_fabs_ftz_f>;
548 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
549   Float32Regs, int_nvvm_fabs_f>;
551 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
552   Float64Regs, int_nvvm_fabs_d>;
555 // Round
558 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
559           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
560 def : Pat<(int_nvvm_round_f Float32Regs:$a),
561           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
562 def : Pat<(int_nvvm_round_d Float64Regs:$a),
563           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
566 // Trunc
569 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
570           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
571 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
572           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
573 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
574           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
577 // Saturate
580 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
581           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
582 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
583           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
584 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
585           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
588 // Exp2  Log2
591 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
592   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
593 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
594   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
595 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
596   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
598 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
599   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
600 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
601   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
602 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
603   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
606 // Sin  Cos
609 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
610   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
611 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
612   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
614 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
615   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
616 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
617   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
620 // Fma
623 def INT_NVVM_FMA_RN_FTZ_F
624   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
625     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
626 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
627   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
628 def INT_NVVM_FMA_RZ_FTZ_F
629   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
630     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
631 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
632   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
633 def INT_NVVM_FMA_RM_FTZ_F
634   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
635     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
636 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
637   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
638 def INT_NVVM_FMA_RP_FTZ_F
639   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
640     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
641 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
642   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
644 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
645   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
646 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
647   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
648 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
649   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
650 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
651   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
654 // Rcp
657 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
658   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
659 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
660   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
661 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
662   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
663 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
664   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
665 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
666   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
667 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
668   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
669 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
670   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
671 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
672   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
674 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
675   Float64Regs, int_nvvm_rcp_rn_d>;
676 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
677   Float64Regs, int_nvvm_rcp_rz_d>;
678 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
679   Float64Regs, int_nvvm_rcp_rm_d>;
680 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
681   Float64Regs, int_nvvm_rcp_rp_d>;
683 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
684   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
687 // Sqrt
690 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
691   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
692 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
693   Float32Regs, int_nvvm_sqrt_rn_f>;
694 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
695   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
696 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
697   Float32Regs, int_nvvm_sqrt_rz_f>;
698 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
699   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
700 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
701   Float32Regs, int_nvvm_sqrt_rm_f>;
702 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
703   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
704 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
705   Float32Regs, int_nvvm_sqrt_rp_f>;
706 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
707   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
708 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
709   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
711 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
712   Float64Regs, int_nvvm_sqrt_rn_d>;
713 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
714   Float64Regs, int_nvvm_sqrt_rz_d>;
715 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
716   Float64Regs, int_nvvm_sqrt_rm_d>;
717 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
718   Float64Regs, int_nvvm_sqrt_rp_d>;
720 // nvvm_sqrt intrinsic
721 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
722           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
723 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
724           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
725 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
726           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
727 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
728           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
731 // Rsqrt
734 def INT_NVVM_RSQRT_APPROX_FTZ_F
735   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
736     int_nvvm_rsqrt_approx_ftz_f>;
737 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
738   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
739 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
740   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
743 // Add
746 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
747   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
748 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
749   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
750 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
751   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
752 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
753   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
754 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
755   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
756 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
757   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
758 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
759   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
760 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
761   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
763 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
764   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
765 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
766   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
767 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
768   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
769 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
770   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
773 // Convert
776 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
777           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
778 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
779           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
780 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
781           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
782 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
783           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
784 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
785           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
786 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
787           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
788 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
789           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
790 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
791           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
793 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
794           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
795 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
796           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
797 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
798           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
799 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
800           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
802 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
803           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
804 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
805           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
806 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
807           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
808 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
809           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
811 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
812           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
813 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
814           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
815 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
816           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
817 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
818           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
820 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
821           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
822 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
823           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
824 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
825           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
826 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
827           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
829 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
830           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
831 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
832           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
833 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
834           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
835 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
836           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
837 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
838           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
839 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
840           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
841 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
842           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
843 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
844           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
846 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
847           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
848 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
849           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
850 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
851           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
852 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
853           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
854 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
855           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
856 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
857           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
858 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
859           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
860 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
861           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
863 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
864           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
865 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
866           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
867 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
868           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
869 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
870           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
872 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
873           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
874 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
875           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
876 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
877           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
878 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
879           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
881 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
882   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
884 def INT_NVVM_D2I_LO : F_MATH_1<
885   !strconcat("{{\n\t",
886              ".reg .b32 %temp; \n\t",
887              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
888              "}}"),
889   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
890 def INT_NVVM_D2I_HI : F_MATH_1<
891   !strconcat("{{\n\t",
892              ".reg .b32 %temp; \n\t",
893              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
894              "}}"),
895   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
897 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
898           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
899 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
900           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
901 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
902           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
903 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
904           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
905 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
906           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
907 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
908           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
909 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
910           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
911 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
912           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
914 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
915           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
916 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
917           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
918 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
919           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
920 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
921           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
922 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
923           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
924 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
925           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
926 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
927           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
928 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
929           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
931 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
932           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
933 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
934           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
935 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
936           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
937 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
938           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
940 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
941           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
942 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
943           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
944 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
945           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
946 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
947           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
949 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
950           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
951 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
952           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
953 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
954           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
955 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
956           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
958 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
959           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
960 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
961           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
962 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
963           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
964 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
965           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
967 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
968           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
969 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
970           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
971 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
972           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
973 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
974           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
976 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
977           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
978 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
979           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
980 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
981           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
982 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
983           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
986 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
987           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
988 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
989           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
992 // Bitcast
995 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
996   Float32Regs, int_nvvm_bitcast_f2i>;
997 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
998   Int32Regs, int_nvvm_bitcast_i2f>;
1000 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1001   Int64Regs, int_nvvm_bitcast_ll2d>;
1002 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1003   Float64Regs, int_nvvm_bitcast_d2ll>;
1006 // FNS
1009 class INT_FNS_MBO<dag ins, dag Operands>
1010   : NVPTXInst<(outs Int32Regs:$dst), ins,
1011                "fns.b32 \t$dst, $mask, $base, $offset;",
1012                [(set Int32Regs:$dst, Operands )]>,
1013     Requires<[hasPTX60, hasSM30]>;
1015 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1016                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1017 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1018                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1019 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1020                      (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1021 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1022                      (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1023 def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1024                      (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1025 def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1026                      (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1027 def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1028                      (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1029 def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1030                      (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1032 //-----------------------------------
1033 // Atomic Functions
1034 //-----------------------------------
1036 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1037  : PatFrag<ops, frag, AS_match.global>;
1038 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1039  : PatFrag<ops, frag, AS_match.shared>;
1040 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1041  : PatFrag<ops, frag, AS_match.generic>;
1043 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1044   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1045   Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1046   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1047     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1048     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1049   Requires<Pred>;
1050   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1051     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1052     [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1053   Requires<Pred>;
1055 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1056   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1057   list<Predicate> Pred = []> {
1058   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1059     IntOp, IMMType, IMM, Pred>;
1060   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1061     IntOp, IMMType, IMM, Pred>;
1064 // has 2 operands, neg the second one
1065 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1066   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1067   Operand IMMType, list<Predicate> Pred> {
1068   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1069     !strconcat(
1070       "{{ \n\t",
1071       ".reg \t.s", TypeStr, " temp; \n\t",
1072       "neg.s", TypeStr, " \ttemp, $b; \n\t",
1073       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1074       "}}"),
1075     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1076   Requires<Pred>;
1078 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1079   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1080   list<Predicate> Pred = []> {
1081  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1082    IntOp, IMMType, Pred> ;
1083  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1084    IntOp, IMMType, Pred> ;
1087 // has 3 operands
1088 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1089   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1090   Operand IMMType, list<Predicate> Pred> {
1091   def reg : NVPTXInst<(outs regclass:$dst),
1092     (ins ptrclass:$addr, regclass:$b, regclass:$c),
1093     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1094     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1095   Requires<Pred>;
1097   def imm1 : NVPTXInst<(outs regclass:$dst),
1098     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1099     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1100     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1101   Requires<Pred>;
1103   def imm2 : NVPTXInst<(outs regclass:$dst),
1104     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1105     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1106     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1107   Requires<Pred>;
1109   def imm3 : NVPTXInst<(outs regclass:$dst),
1110     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1111     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1112     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1113   Requires<Pred>;
1115 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1116   string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1117   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1118     IntOp, IMMType, Pred>;
1119   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1120     IntOp, IMMType, Pred>;
1123 // atom_add
1125 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126   (atomic_load_add_32 node:$a, node:$b)>;
1127 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128   (atomic_load_add_32 node:$a, node:$b)>;
1129 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130   (atomic_load_add_32 node:$a, node:$b)>;
1131 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132   (atomic_load_add_64 node:$a, node:$b)>;
1133 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134   (atomic_load_add_64 node:$a, node:$b)>;
1135 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136   (atomic_load_add_64 node:$a, node:$b)>;
1137 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1138   (atomic_load_fadd node:$a, node:$b)>;
1139 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1140   (atomic_load_fadd node:$a, node:$b)>;
1141 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1142   (atomic_load_fadd node:$a, node:$b)>;
1144 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1145   atomic_load_add_32_g, i32imm, imm>;
1146 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1147   atomic_load_add_32_s, i32imm, imm>;
1148 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1149   atomic_load_add_32_gen, i32imm, imm>;
1150 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1151   ".add", atomic_load_add_32_gen, i32imm, imm>;
1153 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1154   atomic_load_add_64_g, i64imm, imm>;
1155 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1156   atomic_load_add_64_s, i64imm, imm>;
1157 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1158   atomic_load_add_64_gen, i64imm, imm>;
1159 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1160   ".add", atomic_load_add_64_gen, i64imm, imm>;
1162 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1163   atomic_load_add_g, f32imm, fpimm>;
1164 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1165   atomic_load_add_s, f32imm, fpimm>;
1166 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1167   atomic_load_add_gen, f32imm, fpimm>;
1169 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1170   atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1171 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1172   atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1173 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1174   atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1176 // atom_sub
1178 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1179   (atomic_load_sub_32 node:$a, node:$b)>;
1180 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1181   (atomic_load_sub_32 node:$a, node:$b)>;
1182 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1183   (atomic_load_sub_32 node:$a, node:$b)>;
1184 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1185   (atomic_load_sub_64 node:$a, node:$b)>;
1186 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1187   (atomic_load_sub_64 node:$a, node:$b)>;
1188 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1189   (atomic_load_sub_64 node:$a, node:$b)>;
1191 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1192   atomic_load_sub_32_g, i32imm>;
1193 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1194   atomic_load_sub_64_g, i64imm>;
1195 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1196   atomic_load_sub_32_gen, i32imm>;
1197 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1198   ".add", atomic_load_sub_32_gen, i32imm>;
1199 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1200   atomic_load_sub_32_s, i32imm>;
1201 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1202   atomic_load_sub_64_s, i64imm>;
1203 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1204   atomic_load_sub_64_gen, i64imm>;
1205 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1206   ".add", atomic_load_sub_64_gen, i64imm>;
1208 // atom_swap
1210 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1211   (atomic_swap_32 node:$a, node:$b)>;
1212 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1213   (atomic_swap_32 node:$a, node:$b)>;
1214 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1215   (atomic_swap_32 node:$a, node:$b)>;
1216 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1217   (atomic_swap_64 node:$a, node:$b)>;
1218 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1219   (atomic_swap_64 node:$a, node:$b)>;
1220 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1221   (atomic_swap_64 node:$a, node:$b)>;
1223 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1224   atomic_swap_32_g, i32imm, imm>;
1225 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1226   atomic_swap_32_s, i32imm, imm>;
1227 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1228   atomic_swap_32_gen, i32imm, imm>;
1229 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1230   ".exch", atomic_swap_32_gen, i32imm, imm>;
1231 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1232   atomic_swap_64_g, i64imm, imm>;
1233 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1234   atomic_swap_64_s, i64imm, imm>;
1235 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1236   atomic_swap_64_gen, i64imm, imm>;
1237 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1238   ".exch", atomic_swap_64_gen, i64imm, imm>;
1240 // atom_max
1242 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1243   , (atomic_load_max_32 node:$a, node:$b)>;
1244 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1245   (atomic_load_max_32 node:$a, node:$b)>;
1246 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1247   (atomic_load_max_32 node:$a, node:$b)>;
1248 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1249   , (atomic_load_max_64 node:$a, node:$b)>;
1250 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1251   (atomic_load_max_64 node:$a, node:$b)>;
1252 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1253   (atomic_load_max_64 node:$a, node:$b)>;
1254 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1255   (atomic_load_umax_32 node:$a, node:$b)>;
1256 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1257   (atomic_load_umax_32 node:$a, node:$b)>;
1258 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1259   (atomic_load_umax_32 node:$a, node:$b)>;
1260 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1261   (atomic_load_umax_64 node:$a, node:$b)>;
1262 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1263   (atomic_load_umax_64 node:$a, node:$b)>;
1264 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1265   (atomic_load_umax_64 node:$a, node:$b)>;
1267 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1268   ".max", atomic_load_max_32_g, i32imm, imm>;
1269 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1270   ".max", atomic_load_max_32_s, i32imm, imm>;
1271 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1272   atomic_load_max_32_gen, i32imm, imm>;
1273 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1274   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1275 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1276   ".max", atomic_load_max_64_g, i64imm, imm>;
1277 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1278   ".max", atomic_load_max_64_s, i64imm, imm>;
1279 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1280   atomic_load_max_64_gen, i64imm, imm>;
1281 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1282   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1283 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1284   ".max", atomic_load_umax_32_g, i32imm, imm>;
1285 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1286   ".max", atomic_load_umax_32_s, i32imm, imm>;
1287 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1288   atomic_load_umax_32_gen, i32imm, imm>;
1289 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1290   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1291 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1292   ".max", atomic_load_umax_64_g, i64imm, imm>;
1293 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1294   ".max", atomic_load_umax_64_s, i64imm, imm>;
1295 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1296   atomic_load_umax_64_gen, i64imm, imm>;
1297 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1298   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1300 // atom_min
1302 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1303   (atomic_load_min_32 node:$a, node:$b)>;
1304 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1305   (atomic_load_min_32 node:$a, node:$b)>;
1306 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1307   (atomic_load_min_32 node:$a, node:$b)>;
1308 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1309   (atomic_load_min_64 node:$a, node:$b)>;
1310 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1311   (atomic_load_min_64 node:$a, node:$b)>;
1312 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1313   (atomic_load_min_64 node:$a, node:$b)>;
1314 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1315   (atomic_load_umin_32 node:$a, node:$b)>;
1316 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1317   (atomic_load_umin_32 node:$a, node:$b)>;
1318 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1319   (atomic_load_umin_32 node:$a, node:$b)>;
1320 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1321   (atomic_load_umin_64 node:$a, node:$b)>;
1322 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1323   (atomic_load_umin_64 node:$a, node:$b)>;
1324 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1325   (atomic_load_umin_64 node:$a, node:$b)>;
1327 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1328   ".min", atomic_load_min_32_g, i32imm, imm>;
1329 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1330   ".min", atomic_load_min_32_s, i32imm, imm>;
1331 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1332   atomic_load_min_32_gen, i32imm, imm>;
1333 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1334   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1335 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1336   ".min", atomic_load_min_64_g, i64imm, imm>;
1337 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1338   ".min", atomic_load_min_64_s, i64imm, imm>;
1339 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1340   atomic_load_min_64_gen, i64imm, imm>;
1341 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1342   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1343 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1344   ".min", atomic_load_umin_32_g, i32imm, imm>;
1345 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1346   ".min", atomic_load_umin_32_s, i32imm, imm>;
1347 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1348   atomic_load_umin_32_gen, i32imm, imm>;
1349 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1350   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1351 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1352   ".min", atomic_load_umin_64_g, i64imm, imm>;
1353 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1354   ".min", atomic_load_umin_64_s, i64imm, imm>;
1355 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1356   atomic_load_umin_64_gen, i64imm, imm>;
1357 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1358   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1360 // atom_inc  atom_dec
1362 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1363   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1364 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1365   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1366 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1367   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1368 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1369   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1370 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1371   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1372 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1373   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1375 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1376   atomic_load_inc_32_g, i32imm, imm>;
1377 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1378   atomic_load_inc_32_s, i32imm, imm>;
1379 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1380   atomic_load_inc_32_gen, i32imm, imm>;
1381 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1382   ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1383 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1384   atomic_load_dec_32_g, i32imm, imm>;
1385 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1386   atomic_load_dec_32_s, i32imm, imm>;
1387 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1388   atomic_load_dec_32_gen, i32imm, imm>;
1389 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1390   ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1392 // atom_and
1394 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1395   (atomic_load_and_32 node:$a, node:$b)>;
1396 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1397   (atomic_load_and_32 node:$a, node:$b)>;
1398 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1399   (atomic_load_and_32 node:$a, node:$b)>;
1400 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1401   (atomic_load_and_64 node:$a, node:$b)>;
1402 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1403   (atomic_load_and_64 node:$a, node:$b)>;
1404 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1405   (atomic_load_and_64 node:$a, node:$b)>;
1407 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1408   atomic_load_and_32_g, i32imm, imm>;
1409 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1410   atomic_load_and_32_s, i32imm, imm>;
1411 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1412   atomic_load_and_32_gen, i32imm, imm>;
1413 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1414   ".and", atomic_load_and_32_gen, i32imm, imm>;
1415 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1416   atomic_load_and_64_g, i64imm, imm>;
1417 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1418   atomic_load_and_64_s, i64imm, imm>;
1419 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1420   atomic_load_and_64_gen, i64imm, imm>;
1421 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1422   ".and", atomic_load_and_64_gen, i64imm, imm>;
1424 // atom_or
1426 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1427   (atomic_load_or_32 node:$a, node:$b)>;
1428 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1429   (atomic_load_or_32 node:$a, node:$b)>;
1430 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1431   (atomic_load_or_32 node:$a, node:$b)>;
1432 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1433   (atomic_load_or_64 node:$a, node:$b)>;
1434 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1435   (atomic_load_or_64 node:$a, node:$b)>;
1436 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1437   (atomic_load_or_64 node:$a, node:$b)>;
1439 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1440   atomic_load_or_32_g, i32imm, imm>;
1441 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1442   atomic_load_or_32_gen, i32imm, imm>;
1443 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1444   ".or", atomic_load_or_32_gen, i32imm, imm>;
1445 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1446   atomic_load_or_32_s, i32imm, imm>;
1447 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1448   atomic_load_or_64_g, i64imm, imm>;
1449 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1450   atomic_load_or_64_gen, i64imm, imm>;
1451 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1452   ".or", atomic_load_or_64_gen, i64imm, imm>;
1453 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1454   atomic_load_or_64_s, i64imm, imm>;
1456 // atom_xor
1458 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1459   (atomic_load_xor_32 node:$a, node:$b)>;
1460 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1461   (atomic_load_xor_32 node:$a, node:$b)>;
1462 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1463   (atomic_load_xor_32 node:$a, node:$b)>;
1464 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1465   (atomic_load_xor_64 node:$a, node:$b)>;
1466 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1467   (atomic_load_xor_64 node:$a, node:$b)>;
1468 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1469   (atomic_load_xor_64 node:$a, node:$b)>;
1471 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1472   atomic_load_xor_32_g, i32imm, imm>;
1473 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1474   atomic_load_xor_32_s, i32imm, imm>;
1475 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1476   atomic_load_xor_32_gen, i32imm, imm>;
1477 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1478   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1479 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1480   atomic_load_xor_64_g, i64imm, imm>;
1481 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1482   atomic_load_xor_64_s, i64imm, imm>;
1483 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1484   atomic_load_xor_64_gen, i64imm, imm>;
1485 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1486   ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1488 // atom_cas
1490 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1491   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1492 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1493   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1494 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1495   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1496 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1497   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1498 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1499   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1500 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1501   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1503 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1504   atomic_cmp_swap_32_g, i32imm>;
1505 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1506   atomic_cmp_swap_32_s, i32imm>;
1507 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1508   atomic_cmp_swap_32_gen, i32imm>;
1509 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1510   ".cas", atomic_cmp_swap_32_gen, i32imm>;
1511 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1512   atomic_cmp_swap_64_g, i64imm>;
1513 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1514   atomic_cmp_swap_64_s, i64imm>;
1515 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1516   atomic_cmp_swap_64_gen, i64imm>;
1517 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1518   ".cas", atomic_cmp_swap_64_gen, i64imm>;
1520 // Support for scoped atomic operations.  Matches
1521 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1522 // and converts it into the appropriate instruction.
1523 // NOTE: not all possible combinations are implemented
1524 //  'space' is limited to generic as it's the only one needed to support CUDA.
1525 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1526 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1527                   dag ins, dag Operands>
1528       : NVPTXInst<(outs regclass:$result), ins,
1529                   AsmStr,
1530                   [(set regclass:$result, Operands)]>,
1531         Requires<Preds>;
1533 // Define instruction variants for all addressing modes.
1534 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1535                        NVPTXRegClass regclass, Operand ImmType,
1536                        SDNode Imm, ValueType ImmTy,
1537                        list<Predicate> Preds> {
1538   let AddedComplexity = 1 in {
1539     def : ATOM23_impl<AsmStr, regclass, Preds,
1540                       (ins Int32Regs:$src, regclass:$b),
1541                       (Intr Int32Regs:$src, regclass:$b)>;
1542     def : ATOM23_impl<AsmStr, regclass, Preds,
1543                       (ins Int64Regs:$src, regclass:$b),
1544                       (Intr Int64Regs:$src, regclass:$b)>;
1545   }
1546   // tablegen can't infer argument types from Intrinsic (though it can
1547   // from Instruction) so we have to enforce specific type on
1548   // immediates via explicit cast to ImmTy.
1549   def : ATOM23_impl<AsmStr, regclass, Preds,
1550                     (ins Int32Regs:$src, ImmType:$b),
1551                     (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1552   def : ATOM23_impl<AsmStr, regclass, Preds,
1553                     (ins Int64Regs:$src, ImmType:$b),
1554                     (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1557 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1558                        NVPTXRegClass regclass, Operand ImmType,
1559                        SDNode Imm, ValueType ImmTy,
1560                        list<Predicate> Preds> {
1561   // Variants for register/immediate permutations of $b and $c
1562   let AddedComplexity = 2 in {
1563     def : ATOM23_impl<AsmStr, regclass, Preds,
1564                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
1565                       (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1566     def : ATOM23_impl<AsmStr, regclass, Preds,
1567                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
1568                       (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1569   }
1570   let AddedComplexity = 1 in {
1571     def : ATOM23_impl<AsmStr, regclass, Preds,
1572                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1573                       (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1574     def : ATOM23_impl<AsmStr, regclass, Preds,
1575                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1576                       (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1577     def : ATOM23_impl<AsmStr, regclass, Preds,
1578                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1579                       (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1580     def : ATOM23_impl<AsmStr, regclass, Preds,
1581                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1582                       (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1583   }
1584   def : ATOM23_impl<AsmStr, regclass, Preds,
1585                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1586                     (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1587   def : ATOM23_impl<AsmStr, regclass, Preds,
1588                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1589                     (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1592 // Constructs instrinsic name and instruction asm strings.
1593 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1594                        string ScopeStr, string SpaceStr,
1595                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1596                        ValueType ImmTy, list<Predicate> Preds> {
1597   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1598                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1599                             # "." # OpStr # "." # TypeStr
1600                             # " \t$result, [$src], $b;",
1601                      !cast<Intrinsic>(
1602                             "int_nvvm_atomic_" # OpStr
1603                             # "_" # SpaceStr # "_" # IntTypeStr
1604                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1605                      regclass, ImmType, Imm, ImmTy, Preds>;
1607 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1608                        string ScopeStr, string SpaceStr,
1609                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1610                        ValueType ImmTy, list<Predicate> Preds> {
1611   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1612                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1613                             # "." # OpStr # "." # TypeStr
1614                             # " \t$result, [$src], $b, $c;",
1615                      !cast<Intrinsic>(
1616                             "int_nvvm_atomic_" # OpStr
1617                             # "_" # SpaceStr # "_" # IntTypeStr
1618                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1619                      regclass, ImmType, Imm, ImmTy, Preds>;
1622 // Constructs variants for different address spaces.
1623 // For now we only need variants for generic space pointers.
1624 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1625                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1626                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1627    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1628                             regclass, ImmType, Imm, ImmTy, Preds>;
1630 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1631                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1632                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1633    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1634                             regclass, ImmType, Imm, ImmTy, Preds>;
1637 // Constructs variants for different scopes of atomic op.
1638 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1639                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1640                        ValueType ImmTy, list<Predicate> Preds> {
1641    // .gpu scope is default and is currently covered by existing
1642    // atomics w/o explicitly specified scope.
1643    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1644                            regclass, ImmType, Imm, ImmTy,
1645                            !listconcat(Preds,[hasAtomScope])>;
1646    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1647                            regclass, ImmType, Imm, ImmTy,
1648                            !listconcat(Preds,[hasAtomScope])>;
1650 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1651            NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1652            list<Predicate> Preds> {
1653    // No need to define ".gpu"-scoped atomics.  They do the same thing
1654    // as the regular, non-scoped atomics defined elsewhere.
1655    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1656                            regclass, ImmType, Imm, ImmTy,
1657                            !listconcat(Preds,[hasAtomScope])>;
1658    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1659                            regclass, ImmType, Imm, ImmTy,
1660                            !listconcat(Preds,[hasAtomScope])>;
1663 // atom.add
1664 multiclass ATOM2_add_impl<string OpStr> {
1665    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1666    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1667    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1668    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1669                             []>;
1670    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1671                             [hasAtomAddF64]>;
1674 // atom.{and,or,xor}
1675 multiclass ATOM2_bitwise_impl<string OpStr> {
1676    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1677    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1678                             [hasAtomBitwise64]>;
1681 // atom.exch
1682 multiclass ATOM2_exch_impl<string OpStr> {
1683    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1684    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1687 // atom.{min,max}
1688 multiclass ATOM2_minmax_impl<string OpStr> {
1689    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1690    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1691    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1692                             [hasAtomMinMax64]>;
1693    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1694                             [hasAtomMinMax64]>;
1697 // atom.{inc,dec}
1698 multiclass ATOM2_incdec_impl<string OpStr> {
1699    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1702 // atom.cas
1703 multiclass ATOM3_cas_impl<string OpStr> {
1704    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1705    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1708 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1709 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1710 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1711 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1712 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1713 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1714 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1715 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1716 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1717 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1719 //-----------------------------------
1720 // Support for ldu on sm_20 or later
1721 //-----------------------------------
1723 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1724 // read-only in a kernel.
1726 // Scalar
1728 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1729   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1730                !strconcat("ldu.global.", TyStr),
1731                       []>, Requires<[hasLDU]>;
1732   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1733                !strconcat("ldu.global.", TyStr),
1734                         []>, Requires<[hasLDU]>;
1735  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1736                !strconcat("ldu.global.", TyStr),
1737                       []>, Requires<[hasLDU]>;
1738  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1739                !strconcat("ldu.global.", TyStr),
1740                       []>, Requires<[hasLDU]>;
1741  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1742                !strconcat("ldu.global.", TyStr),
1743                         []>, Requires<[hasLDU]>;
1746 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1747 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1748 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1749 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1750 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1751 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1752 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1753 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1754 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1755 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1757 // vector
1759 // Elementized vector ldu
1760 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1761  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1762                      (ins Int32Regs:$src),
1763                      !strconcat("ldu.global.", TyStr), []>;
1764  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1765                      (ins Int64Regs:$src),
1766                      !strconcat("ldu.global.", TyStr), []>;
1767  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1768                      (ins MEMri:$src),
1769                      !strconcat("ldu.global.", TyStr), []>;
1770  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1771                      (ins MEMri64:$src),
1772                      !strconcat("ldu.global.", TyStr), []>;
1773  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1774                      (ins imemAny:$src),
1775                      !strconcat("ldu.global.", TyStr), []>;
1778 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1779  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1780                             regclass:$dst4), (ins Int32Regs:$src), 
1781                !strconcat("ldu.global.", TyStr), []>;
1782  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1783                             regclass:$dst4), (ins Int64Regs:$src), 
1784                !strconcat("ldu.global.", TyStr), []>;
1785  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1786                             regclass:$dst4), (ins MEMri:$src), 
1787                !strconcat("ldu.global.", TyStr), []>;
1788  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1789                             regclass:$dst4), (ins MEMri64:$src), 
1790                !strconcat("ldu.global.", TyStr), []>;
1791  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1792                             regclass:$dst4), (ins imemAny:$src), 
1793                !strconcat("ldu.global.", TyStr), []>;
1796 defm INT_PTX_LDU_G_v2i8_ELE
1797   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1798 defm INT_PTX_LDU_G_v2i16_ELE
1799   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1800 defm INT_PTX_LDU_G_v2i32_ELE
1801   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1802 defm INT_PTX_LDU_G_v2f16_ELE
1803   : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1804 defm INT_PTX_LDU_G_v2f16x2_ELE
1805   : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1806 defm INT_PTX_LDU_G_v2f32_ELE
1807   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1808 defm INT_PTX_LDU_G_v2i64_ELE
1809   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1810 defm INT_PTX_LDU_G_v2f64_ELE
1811   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1812 defm INT_PTX_LDU_G_v4i8_ELE
1813   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1814 defm INT_PTX_LDU_G_v4i16_ELE
1815   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1816     Int16Regs>;
1817 defm INT_PTX_LDU_G_v4i32_ELE
1818   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1819     Int32Regs>;
1820 defm INT_PTX_LDU_G_v4f16_ELE
1821   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1822     Float16Regs>;
1823 defm INT_PTX_LDU_G_v4f16x2_ELE
1824   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1825     Float16x2Regs>;
1826 defm INT_PTX_LDU_G_v4f32_ELE
1827   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1828     Float32Regs>;
1831 //-----------------------------------
1832 // Support for ldg on sm_35 or later 
1833 //-----------------------------------
1835 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1836 // non-coherent texture cache, and therefore the values read must be read-only
1837 // during the lifetime of the kernel.
1839 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1840   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1841                !strconcat("ld.global.nc.", TyStr),
1842                       []>, Requires<[hasLDG]>;
1843   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1844                !strconcat("ld.global.nc.", TyStr),
1845                         []>, Requires<[hasLDG]>;
1846  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1847                !strconcat("ld.global.nc.", TyStr),
1848                       []>, Requires<[hasLDG]>;
1849  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1850                !strconcat("ld.global.nc.", TyStr),
1851                       []>, Requires<[hasLDG]>;
1852  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1853                !strconcat("ld.global.nc.", TyStr),
1854                         []>, Requires<[hasLDG]>;
1857 defm INT_PTX_LDG_GLOBAL_i8
1858   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1859 defm INT_PTX_LDG_GLOBAL_i16
1860   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1861 defm INT_PTX_LDG_GLOBAL_i32
1862   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1863 defm INT_PTX_LDG_GLOBAL_i64
1864   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1865 defm INT_PTX_LDG_GLOBAL_f16
1866   : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1867 defm INT_PTX_LDG_GLOBAL_f16x2
1868   : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1869 defm INT_PTX_LDG_GLOBAL_f32
1870   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1871 defm INT_PTX_LDG_GLOBAL_f64
1872   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1873 defm INT_PTX_LDG_GLOBAL_p32
1874   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1875 defm INT_PTX_LDG_GLOBAL_p64
1876   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1878 // vector
1880 // Elementized vector ldg 
1881 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1882  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1883                      (ins Int32Regs:$src),
1884                      !strconcat("ld.global.nc.", TyStr), []>;
1885  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1886                      (ins Int64Regs:$src),
1887                      !strconcat("ld.global.nc.", TyStr), []>;
1888  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1889                      (ins MEMri:$src),
1890                      !strconcat("ld.global.nc.", TyStr), []>;
1891  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1892                      (ins MEMri64:$src),
1893                      !strconcat("ld.global.nc.", TyStr), []>;
1894  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1895                      (ins imemAny:$src),
1896                      !strconcat("ld.global.nc.", TyStr), []>;
1899 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1900   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1901                               regclass:$dst4), (ins Int32Regs:$src), 
1902                !strconcat("ld.global.nc.", TyStr), []>;
1903   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1904                                regclass:$dst4), (ins Int64Regs:$src), 
1905                !strconcat("ld.global.nc.", TyStr), []>;
1906   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1907                               regclass:$dst4), (ins MEMri:$src), 
1908                !strconcat("ld.global.nc.", TyStr), []>;
1909   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1910                               regclass:$dst4), (ins MEMri64:$src), 
1911                !strconcat("ld.global.nc.", TyStr), []>;
1912   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1913                              regclass:$dst4), (ins imemAny:$src), 
1914                !strconcat("ld.global.nc.", TyStr), []>;
1917 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1918 defm INT_PTX_LDG_G_v2i8_ELE
1919   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1920 defm INT_PTX_LDG_G_v2i16_ELE
1921   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1922 defm INT_PTX_LDG_G_v2i32_ELE
1923   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1924 defm INT_PTX_LDG_G_v2f16_ELE
1925   : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1926 defm INT_PTX_LDG_G_v2f16x2_ELE
1927   : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1928 defm INT_PTX_LDG_G_v2f32_ELE
1929   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1930 defm INT_PTX_LDG_G_v2i64_ELE
1931   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1932 defm INT_PTX_LDG_G_v2f64_ELE
1933   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1934 defm INT_PTX_LDG_G_v4i8_ELE
1935   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1936 defm INT_PTX_LDG_G_v4i16_ELE
1937   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1938 defm INT_PTX_LDG_G_v4i32_ELE
1939   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1940 defm INT_PTX_LDG_G_v4f16_ELE
1941   : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1942 defm INT_PTX_LDG_G_v4f16x2_ELE
1943   : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1944 defm INT_PTX_LDG_G_v4f32_ELE
1945   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1948 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1949    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1950           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1951       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1952    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1953           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1954       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1955    def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1956           "{{ .reg .b64 %tmp;\n\t"
1957           #"  cvt.u64.u32 \t%tmp, $src;\n\t"
1958           #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
1959       [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1960       Requires<[useShortPtr]>;
1963 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1964    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1965           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1966       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1967    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1968           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1969       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1970    def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1971           "{{ .reg .b64 %tmp;\n\t"
1972           #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1973           #"  cvt.u32.u64 \t$result, %tmp; }}",
1974       [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1975       Requires<[useShortPtr]>;
1978 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1979 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1980 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1981 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1983 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1984 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1985 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1986 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1989 // nvvm.ptr.gen.to.param
1990 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1991   (ins Int32Regs:$src),
1992                         "mov.u32 \t$result, $src;",
1993                               [(set Int32Regs:$result,
1994                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1995 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1996   (ins Int64Regs:$src),
1997                         "mov.u64 \t$result, $src;",
1998                               [(set Int64Regs:$result,
1999                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2002 // nvvm.move intrinsicc
2003 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2004                              "mov.b16 \t$r, $s;",
2005                              [(set Int16Regs:$r,
2006                                (int_nvvm_move_i16 Int16Regs:$s))]>;
2007 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2008                              "mov.b32 \t$r, $s;",
2009                              [(set Int32Regs:$r,
2010                                (int_nvvm_move_i32 Int32Regs:$s))]>;
2011 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2012                              "mov.b64 \t$r, $s;",
2013                              [(set Int64Regs:$r,
2014                                (int_nvvm_move_i64 Int64Regs:$s))]>;
2015 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2016                              "mov.f32 \t$r, $s;",
2017                              [(set Float32Regs:$r,
2018                                (int_nvvm_move_float Float32Regs:$s))]>;
2019 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2020                              "mov.f64 \t$r, $s;",
2021                              [(set Float64Regs:$r,
2022                                (int_nvvm_move_double Float64Regs:$s))]>;
2023 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2024                              "mov.u32 \t$r, $s;",
2025                              [(set Int32Regs:$r,
2026                                (int_nvvm_move_ptr Int32Regs:$s))]>;
2027 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2028                              "mov.u64 \t$r, $s;",
2029                              [(set Int64Regs:$r,
2030                                (int_nvvm_move_ptr Int64Regs:$s))]>;
2032 // @TODO: Are these actually needed, or will we always just see symbols
2033 // copied to registers first?
2034 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2035                              "mov.u32 \t$r, $s;",
2036                              [(set Int32Regs:$r,
2037                              (int_nvvm_move_ptr texternalsym:$s))]>;
2038 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2039                              "mov.u64 \t$r, $s;",
2040                              [(set Int64Regs:$r,
2041                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
2044 // MoveParam        %r1, param
2045 // ptr_local_to_gen %r2, %r1
2046 // ptr_gen_to_local %r3, %r2
2047 // ->
2048 // mov %r1, param
2050 // @TODO: Revisit this.  There is a type
2051 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2052 // instructions are not currently defined. However, we can use the ptr
2053 // variants and the asm printer will do the right thing.
2054 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2055                 (MoveParam texternalsym:$src)))),
2056                (nvvm_move_ptr64  texternalsym:$src)>;
2057 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2058                 (MoveParam texternalsym:$src)))),
2059                (nvvm_move_ptr32  texternalsym:$src)>;
2061 def texsurf_handles
2062   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2063               "mov.u64 \t$result, $src;", []>;
2065 //-----------------------------------
2066 // Compiler Error Warn
2067 // - Just ignore them in codegen
2068 //-----------------------------------
2070 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2071                 "// llvm.nvvm.compiler.warn()",
2072                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2073 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2074                 "// llvm.nvvm.compiler.warn()",
2075                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2076 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2077                 "// llvm.nvvm.compiler.error()",
2078                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2079 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2080                 "// llvm.nvvm.compiler.error()",
2081                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2084 // isspacep
2086 def ISSPACEP_CONST_32
2087   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2088               "isspacep.const \t$d, $a;",
2089               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2090     Requires<[hasPTX31]>;
2091 def ISSPACEP_CONST_64
2092   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2093               "isspacep.const \t$d, $a;",
2094               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2095     Requires<[hasPTX31]>;
2096 def ISSPACEP_GLOBAL_32
2097   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2098               "isspacep.global \t$d, $a;",
2099               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2100 def ISSPACEP_GLOBAL_64
2101   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2102               "isspacep.global \t$d, $a;",
2103               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2104 def ISSPACEP_LOCAL_32
2105   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2106               "isspacep.local \t$d, $a;",
2107               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2108 def ISSPACEP_LOCAL_64
2109   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2110               "isspacep.local \t$d, $a;",
2111               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2112 def ISSPACEP_SHARED_32
2113   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2114               "isspacep.shared \t$d, $a;",
2115               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2116 def ISSPACEP_SHARED_64
2117   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2118               "isspacep.shared \t$d, $a;",
2119               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2122 // Special register reads
2123 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2124                             (ins SpecialRegs:$r),
2125                             "mov.b32 \t$d, $r;", []>;
2127 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2128 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2129 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2130 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2131 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2132 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2133 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2134 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2135 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2136 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2137 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2138 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2139 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2140 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2141 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2142 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2143 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2144 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2145 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2146 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2147 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2148 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2149 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2150 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2151 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2152 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2153 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2154 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2155 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2156 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2157 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2158 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2161 // rotate builtin support
2163 def ROTATE_B32_HW_IMM
2164   : NVPTXInst<(outs Int32Regs:$dst),
2165               (ins  Int32Regs:$src, i32imm:$amt),
2166               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2167               [(set Int32Regs:$dst,
2168                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2169               Requires<[hasHWROT32]> ;
2171 def ROTATE_B32_HW_REG
2172   : NVPTXInst<(outs Int32Regs:$dst),
2173               (ins  Int32Regs:$src, Int32Regs:$amt),
2174               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2175               [(set Int32Regs:$dst,
2176                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2177               Requires<[hasHWROT32]> ;
2179 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2180           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2181       Requires<[noHWROT32]> ;
2183 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2184           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2185       Requires<[noHWROT32]> ;
2187 let hasSideEffects = 0 in {
2188   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2189     !strconcat("{{\n\t",
2190                ".reg .b32 %dummy;\n\t",
2191                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2192                "}}"),
2193           []> ;
2195   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2196     !strconcat("{{\n\t",
2197                ".reg .b32 %dummy;\n\t",
2198                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2199                "}}"),
2200           []> ;
2203 let hasSideEffects = 0 in {
2204   def PACK_TWO_INT32
2205     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2206                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2209 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2210           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2211                           (GET_LO_INT64 Int64Regs:$src))> ;
2213 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2214 // no side effects.
2215 let hasSideEffects = 0 in {
2216   def SHF_L_WRAP_B32_IMM
2217     : NVPTXInst<(outs Int32Regs:$dst),
2218                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2219                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2220       Requires<[hasHWROT32]>;
2222   def SHF_L_WRAP_B32_REG
2223     : NVPTXInst<(outs Int32Regs:$dst),
2224                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2225                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2226       Requires<[hasHWROT32]>;
2228   def SHF_R_WRAP_B32_IMM
2229     : NVPTXInst<(outs Int32Regs:$dst),
2230                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2231                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2232       Requires<[hasHWROT32]>;
2234   def SHF_R_WRAP_B32_REG
2235     : NVPTXInst<(outs Int32Regs:$dst),
2236                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2237                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2238       Requires<[hasHWROT32]>;
2241 // HW version of rotate 64
2242 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2243           (PACK_TWO_INT32
2244             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2245                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2246             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2247                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2248       Requires<[hasHWROT32]>;
2250 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2251           (PACK_TWO_INT32
2252             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2253                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2254             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2255                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2256       Requires<[hasHWROT32]>;
2259 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2260           (PACK_TWO_INT32
2261             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2262                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2263             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2264                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2265       Requires<[hasHWROT32]>;
2267 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2268           (PACK_TWO_INT32
2269             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2270                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2271             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2272                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2273       Requires<[hasHWROT32]>;
2275 // SW version of rotate 64
2276 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2277           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2278       Requires<[noHWROT32]>;
2279 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2280           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2281       Requires<[noHWROT32]>;
2282 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2283           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2284       Requires<[noHWROT32]>;
2285 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2286           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2287       Requires<[noHWROT32]>;
2290 //-----------------------------------
2291 // Texture Intrinsics
2292 //-----------------------------------
2294 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2295 // also defined in NVPTXReplaceImageHandles.cpp
2297 // texmode_independent
2298 let IsTex = 1, IsTexModeUnified = 0 in {
2299 // Texture fetch instructions using handles
2300 def TEX_1D_F32_S32
2301   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2302                     Float32Regs:$b, Float32Regs:$a),
2303               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2304               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2305               []>;
2306 def TEX_1D_F32_F32
2307   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2308                     Float32Regs:$b, Float32Regs:$a),
2309               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2310               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2311               []>;
2312 def TEX_1D_F32_F32_LEVEL
2313   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2314                     Float32Regs:$b, Float32Regs:$a),
2315               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2316               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2317               "[$t, $s, \\{$x\\}], $lod;",
2318               []>;
2319 def TEX_1D_F32_F32_GRAD
2320   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2321                     Float32Regs:$b, Float32Regs:$a),
2322               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2323                    Float32Regs:$gradx, Float32Regs:$grady),
2324               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2325               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2326               []>;
2327 def TEX_1D_S32_S32
2328   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2329                     Int32Regs:$b, Int32Regs:$a),
2330               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2331               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2332               []>;
2333 def TEX_1D_S32_F32
2334   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2335                     Int32Regs:$b, Int32Regs:$a),
2336               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2337               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2338               []>;
2339 def TEX_1D_S32_F32_LEVEL
2340   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2341                     Int32Regs:$b, Int32Regs:$a),
2342               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2343                    Float32Regs:$lod),
2344               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2345               "[$t, $s, \\{$x\\}], $lod;",
2346               []>;
2347 def TEX_1D_S32_F32_GRAD
2348   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2349                     Int32Regs:$b, Int32Regs:$a),
2350               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2351                    Float32Regs:$gradx, Float32Regs:$grady),
2352               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2353               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2354               []>;
2355 def TEX_1D_U32_S32
2356   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2357                     Int32Regs:$b, Int32Regs:$a),
2358               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2359               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2360               []>;
2361 def TEX_1D_U32_F32
2362   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363                     Int32Regs:$b, Int32Regs:$a),
2364               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2365               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2366               []>;
2367 def TEX_1D_U32_F32_LEVEL
2368   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2369                     Int32Regs:$b, Int32Regs:$a),
2370               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2371                    Float32Regs:$lod),
2372               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2373               "[$t, $s, \\{$x\\}], $lod;",
2374               []>;
2375 def TEX_1D_U32_F32_GRAD
2376   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2377                     Int32Regs:$b, Int32Regs:$a),
2378               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2379                    Float32Regs:$gradx, Float32Regs:$grady),
2380               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2381               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2382               []>;
2384 def TEX_1D_ARRAY_F32_S32
2385   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2386                     Float32Regs:$b, Float32Regs:$a),
2387               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2388               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2389               "[$t, $s, \\{$l, $x\\}];",
2390               []>;
2391 def TEX_1D_ARRAY_F32_F32
2392   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2393                     Float32Regs:$b, Float32Regs:$a),
2394               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2395               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2396               "[$t, $s, \\{$l, $x\\}];",
2397               []>;
2398 def TEX_1D_ARRAY_F32_F32_LEVEL
2399   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2400                     Float32Regs:$b, Float32Regs:$a),
2401               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2402                    Float32Regs:$lod),
2403               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2404               "[$t, $s, \\{$l, $x\\}], $lod;",
2405               []>;
2406 def TEX_1D_ARRAY_F32_F32_GRAD
2407   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2408                     Float32Regs:$b, Float32Regs:$a),
2409               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2410                    Float32Regs:$gradx, Float32Regs:$grady),
2411               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2412               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2413               []>;
2414 def TEX_1D_ARRAY_S32_S32
2415   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2416                     Int32Regs:$b, Int32Regs:$a),
2417               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2418               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2419               "[$t, $s, \\{$l, $x\\}];",
2420               []>;
2421 def TEX_1D_ARRAY_S32_F32
2422   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2423                     Int32Regs:$b, Int32Regs:$a),
2424               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2425               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2426               "[$t, $s, \\{$l, $x\\}];",
2427               []>;
2428 def TEX_1D_ARRAY_S32_F32_LEVEL
2429   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2430                     Int32Regs:$b, Int32Regs:$a),
2431               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2432                    Float32Regs:$lod),
2433               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2434               "[$t, $s, \\{$l, $x\\}], $lod;",
2435               []>;
2436 def TEX_1D_ARRAY_S32_F32_GRAD
2437   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2438                     Int32Regs:$b, Int32Regs:$a),
2439               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2440                    Float32Regs:$gradx, Float32Regs:$grady),
2441               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2442               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2443               []>;
2444 def TEX_1D_ARRAY_U32_S32
2445   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2446                     Int32Regs:$b, Int32Regs:$a),
2447               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2448               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2449               "[$t, $s, \\{$l, $x\\}];",
2450               []>;
2451 def TEX_1D_ARRAY_U32_F32
2452   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2453                     Int32Regs:$b, Int32Regs:$a),
2454               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2455               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2456               "[$t, $s, \\{$l, $x\\}];",
2457               []>;
2458 def TEX_1D_ARRAY_U32_F32_LEVEL
2459   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2460                     Int32Regs:$b, Int32Regs:$a),
2461               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2462                    Float32Regs:$lod),
2463               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2464               "[$t, $s, \\{$l, $x\\}], $lod;",
2465               []>;
2466 def TEX_1D_ARRAY_U32_F32_GRAD
2467   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2468                     Int32Regs:$b, Int32Regs:$a),
2469               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2470                    Float32Regs:$gradx, Float32Regs:$grady),
2471               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2472               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2473               []>;
2475 def TEX_2D_F32_S32
2476   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2477                     Float32Regs:$b, Float32Regs:$a),
2478               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2479               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2480               "[$t, $s, \\{$x, $y\\}];",
2481               []>;
2482 def TEX_2D_F32_F32
2483   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2484                     Float32Regs:$b, Float32Regs:$a),
2485               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2486               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2487               "[$t, $s, \\{$x, $y\\}];",
2488               []>;
2489 def TEX_2D_F32_F32_LEVEL
2490   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2491                     Float32Regs:$b, Float32Regs:$a),
2492               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2493                    Float32Regs:$lod),
2494               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2495               "[$t, $s, \\{$x, $y\\}], $lod;",
2496               []>;
2497 def TEX_2D_F32_F32_GRAD
2498   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2499                     Float32Regs:$b, Float32Regs:$a),
2500               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2501                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2502                    Float32Regs:$grady0, Float32Regs:$grady1),
2503               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2504               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2505               "\\{$grady0, $grady1\\};",
2506               []>;
2507 def TEX_2D_S32_S32
2508   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2509                     Int32Regs:$b, Int32Regs:$a),
2510               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2511               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2512               "[$t, $s, \\{$x, $y\\}];",
2513               []>;
2514 def TEX_2D_S32_F32
2515   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2516                     Int32Regs:$b, Int32Regs:$a),
2517               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2518               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2519               "[$t, $s, \\{$x, $y\\}];",
2520               []>;
2521 def TEX_2D_S32_F32_LEVEL
2522   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2523                     Int32Regs:$b, Int32Regs:$a),
2524               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2525                    Float32Regs:$lod),
2526               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2527               "[$t, $s, \\{$x, $y\\}], $lod;",
2528               []>;
2529 def TEX_2D_S32_F32_GRAD
2530   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2531                     Int32Regs:$b, Int32Regs:$a),
2532               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2533                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2534                    Float32Regs:$grady0, Float32Regs:$grady1),
2535               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2536               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2537               "\\{$grady0, $grady1\\};",
2538               []>;
2539 def TEX_2D_U32_S32
2540   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2541                     Int32Regs:$b, Int32Regs:$a),
2542               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2543               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2544               "[$t, $s, \\{$x, $y\\}];",
2545               []>;
2546 def TEX_2D_U32_F32
2547   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2548                     Int32Regs:$b, Int32Regs:$a),
2549               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2550               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2551               "[$t, $s, \\{$x, $y\\}];",
2552               []>;
2553 def TEX_2D_U32_F32_LEVEL
2554   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2555                     Int32Regs:$b, Int32Regs:$a),
2556               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2557                    Float32Regs:$lod),
2558               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2559               "[$t, $s, \\{$x, $y\\}], $lod;",
2560               []>;
2561 def TEX_2D_U32_F32_GRAD
2562   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2563                     Int32Regs:$b, Int32Regs:$a),
2564               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2565                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2566                    Float32Regs:$grady0, Float32Regs:$grady1),
2567               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2568               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2569               "\\{$grady0, $grady1\\};",
2570               []>;
2572 def TEX_2D_ARRAY_F32_S32
2573   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2574                     Float32Regs:$b, Float32Regs:$a),
2575               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2576                    Int32Regs:$y),
2577               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2578               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2579               []>;
2580 def TEX_2D_ARRAY_F32_F32
2581   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2582                     Float32Regs:$b, Float32Regs:$a),
2583               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2584                    Float32Regs:$y),
2585               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2586               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2587               []>;
2588 def TEX_2D_ARRAY_F32_F32_LEVEL
2589   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2590                     Float32Regs:$b, Float32Regs:$a),
2591               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2592                    Float32Regs:$y, Float32Regs:$lod),
2593               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2594               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2595               []>;
2596 def TEX_2D_ARRAY_F32_F32_GRAD
2597   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2598                     Float32Regs:$b, Float32Regs:$a),
2599               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2600                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2601                    Float32Regs:$grady0, Float32Regs:$grady1),
2602               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2603               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2604               "\\{$grady0, $grady1\\};",
2605               []>;
2606 def TEX_2D_ARRAY_S32_S32
2607   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2608                     Int32Regs:$b, Int32Regs:$a),
2609               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2610                    Int32Regs:$y),
2611               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2612               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2613               []>;
2614 def TEX_2D_ARRAY_S32_F32
2615   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2616                     Int32Regs:$b, Int32Regs:$a),
2617               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2618                    Float32Regs:$y),
2619               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2620               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2621               []>;
2622 def TEX_2D_ARRAY_S32_F32_LEVEL
2623   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2624                     Int32Regs:$b, Int32Regs:$a),
2625               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2626                    Float32Regs:$y, Float32Regs:$lod),
2627               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2628               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2629               []>;
2630 def TEX_2D_ARRAY_S32_F32_GRAD
2631   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2632                     Int32Regs:$b, Int32Regs:$a),
2633               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2634                    Float32Regs:$y,
2635                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2636                    Float32Regs:$grady0, Float32Regs:$grady1),
2637               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2638               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2639               "\\{$grady0, $grady1\\};",
2640               []>;
2641 def TEX_2D_ARRAY_U32_S32
2642   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2643                     Int32Regs:$b, Int32Regs:$a),
2644               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2645                    Int32Regs:$y),
2646               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2647               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2648               []>;
2649 def TEX_2D_ARRAY_U32_F32
2650   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2651                     Int32Regs:$b, Int32Regs:$a),
2652               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2653                    Float32Regs:$y),
2654               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2655               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2656               []>;
2657 def TEX_2D_ARRAY_U32_F32_LEVEL
2658   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2659                     Int32Regs:$b, Int32Regs:$a),
2660               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2661                    Float32Regs:$y, Float32Regs:$lod),
2662               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2663               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2664               []>;
2665 def TEX_2D_ARRAY_U32_F32_GRAD
2666   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2667                     Int32Regs:$b, Int32Regs:$a),
2668               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2669                    Float32Regs:$y,
2670                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2671                    Float32Regs:$grady0, Float32Regs:$grady1),
2672               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2673               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2674               "\\{$grady0, $grady1\\};",
2675               []>;
2677 def TEX_3D_F32_S32
2678   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2679                     Float32Regs:$b, Float32Regs:$a),
2680               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2681                    Int32Regs:$z),
2682               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2683               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2684               []>;
2685 def TEX_3D_F32_F32
2686   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2687                     Float32Regs:$b, Float32Regs:$a),
2688               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2689                    Float32Regs:$z),
2690               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2691               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2692               []>;
2693 def TEX_3D_F32_F32_LEVEL
2694   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2695                     Float32Regs:$b, Float32Regs:$a),
2696               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2697                    Float32Regs:$z, Float32Regs:$lod),
2698               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2699               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2700               []>;
2701 def TEX_3D_F32_F32_GRAD
2702   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2703                     Float32Regs:$b, Float32Regs:$a),
2704               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2705                    Float32Regs:$z,
2706                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2707                    Float32Regs:$gradx2, Float32Regs:$grady0,
2708                    Float32Regs:$grady1, Float32Regs:$grady2),
2709               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2710               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2711               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2712               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2713               []>;
2714 def TEX_3D_S32_S32
2715   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716                     Int32Regs:$b, Int32Regs:$a),
2717               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2718                    Int32Regs:$z),
2719               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2720               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2721               []>;
2722 def TEX_3D_S32_F32
2723   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2724                     Int32Regs:$b, Int32Regs:$a),
2725               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2726                    Float32Regs:$z),
2727               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2728               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2729               []>;
2730 def TEX_3D_S32_F32_LEVEL
2731   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2732                     Int32Regs:$b, Int32Regs:$a),
2733               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2734                    Float32Regs:$z, Float32Regs:$lod),
2735               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2736               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2737               []>;
2738 def TEX_3D_S32_F32_GRAD
2739   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2740                     Int32Regs:$b, Int32Regs:$a),
2741               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2742                    Float32Regs:$z,
2743                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2744                    Float32Regs:$gradx2, Float32Regs:$grady0,
2745                    Float32Regs:$grady1, Float32Regs:$grady2),
2746               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2747               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2748               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2749               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2750               []>;
2751 def TEX_3D_U32_S32
2752   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2753                     Int32Regs:$b, Int32Regs:$a),
2754               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2755                    Int32Regs:$z),
2756               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2757               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2758               []>;
2759 def TEX_3D_U32_F32
2760   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2761                     Int32Regs:$b, Int32Regs:$a),
2762               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2763                    Float32Regs:$z),
2764               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2765               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2766               []>;
2767 def TEX_3D_U32_F32_LEVEL
2768   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2769                     Int32Regs:$b, Int32Regs:$a),
2770               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2771                    Float32Regs:$z, Float32Regs:$lod),
2772               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2773               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2774               []>;
2775 def TEX_3D_U32_F32_GRAD
2776   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2777                     Int32Regs:$b, Int32Regs:$a),
2778               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2779                    Float32Regs:$z,
2780                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2781                    Float32Regs:$gradx2, Float32Regs:$grady0,
2782                    Float32Regs:$grady1, Float32Regs:$grady2),
2783               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2784               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2785               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2786               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2787               []>;
2789 def TEX_CUBE_F32_F32
2790   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2791                     Float32Regs:$b, Float32Regs:$a),
2792               (ins Int64Regs:$t, Int64Regs:$s,
2793                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2794               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2795               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2796               []>;
2797 def TEX_CUBE_F32_F32_LEVEL
2798   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2799                     Float32Regs:$b, Float32Regs:$a),
2800               (ins Int64Regs:$t, Int64Regs:$s,
2801                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2802                    Float32Regs:$lod),
2803               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2804               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2805               []>;
2806 def TEX_CUBE_S32_F32
2807   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2808                     Int32Regs:$b, Int32Regs:$a),
2809               (ins Int64Regs:$t, Int64Regs:$s,
2810                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2811               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2812               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2813               []>;
2814 def TEX_CUBE_S32_F32_LEVEL
2815   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2816                     Int32Regs:$b, Int32Regs:$a),
2817               (ins Int64Regs:$t, Int64Regs:$s,
2818                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2819                    Float32Regs:$lod),
2820               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2821               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2822               []>;
2823 def TEX_CUBE_U32_F32
2824   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2825                     Int32Regs:$b, Int32Regs:$a),
2826               (ins Int64Regs:$t, Int64Regs:$s,
2827                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2828               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2829               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2830               []>;
2831 def TEX_CUBE_U32_F32_LEVEL
2832   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2833                     Int32Regs:$b, Int32Regs:$a),
2834               (ins Int64Regs:$t, Int64Regs:$s,
2835                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2836                    Float32Regs:$lod),
2837               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2838               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2839               []>;
2841 def TEX_CUBE_ARRAY_F32_F32
2842   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2843                     Float32Regs:$b, Float32Regs:$a),
2844               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2845                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2846               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2847               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2848               []>;
2849 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2850   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2851                     Float32Regs:$b, Float32Regs:$a),
2852               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2853                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2854                    Float32Regs:$lod),
2855               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2856               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2857               []>;
2858 def TEX_CUBE_ARRAY_S32_F32
2859   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2860                     Int32Regs:$b, Int32Regs:$a),
2861               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2862                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2863               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2864               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2865               []>;
2866 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2867   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2868                     Int32Regs:$b, Int32Regs:$a),
2869               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2870                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2871                    Float32Regs:$lod),
2872               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2873               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2874               []>;
2875 def TEX_CUBE_ARRAY_U32_F32
2876   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2877                     Int32Regs:$b, Int32Regs:$a),
2878               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2879                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2880               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2881               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2882               []>;
2883 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2884   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2885                     Int32Regs:$b, Int32Regs:$a),
2886               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2887                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2888                    Float32Regs:$lod),
2889               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2890               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2891               []>;
2893 def TLD4_R_2D_F32_F32
2894   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2895                     Float32Regs:$v2, Float32Regs:$v3),
2896               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2897               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2898               "[$t, $s, \\{$x, $y\\}];",
2899               []>;
2900 def TLD4_G_2D_F32_F32
2901   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2902                     Float32Regs:$v2, Float32Regs:$v3),
2903               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2904               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2905               "[$t, $s, \\{$x, $y\\}];",
2906               []>;
2907 def TLD4_B_2D_F32_F32
2908   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2909                     Float32Regs:$v2, Float32Regs:$v3),
2910               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2911               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2912               "[$t, $s, \\{$x, $y\\}];",
2913               []>;
2914 def TLD4_A_2D_F32_F32
2915   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2916                     Float32Regs:$v2, Float32Regs:$v3),
2917               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2918               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2919               "[$t, $s, \\{$x, $y\\}];",
2920               []>;
2921 def TLD4_R_2D_S32_F32
2922   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2923                     Int32Regs:$v2, Int32Regs:$v3),
2924               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2925               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2926               "[$t, $s, \\{$x, $y\\}];",
2927               []>;
2928 def TLD4_G_2D_S32_F32
2929   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2930                     Int32Regs:$v2, Int32Regs:$v3),
2931               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2932               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2933               "[$t, $s, \\{$x, $y\\}];",
2934               []>;
2935 def TLD4_B_2D_S32_F32
2936   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2937                     Int32Regs:$v2, Int32Regs:$v3),
2938               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2939               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2940               "[$t, $s, \\{$x, $y\\}];",
2941               []>;
2942 def TLD4_A_2D_S32_F32
2943   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2944                     Int32Regs:$v2, Int32Regs:$v3),
2945               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2946               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2947               "[$t, $s, \\{$x, $y\\}];",
2948               []>;
2949 def TLD4_R_2D_U32_F32
2950   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2951                     Int32Regs:$v2, Int32Regs:$v3),
2952               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2953               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2954               "[$t, $s, \\{$x, $y\\}];",
2955               []>;
2956 def TLD4_G_2D_U32_F32
2957   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2958                     Int32Regs:$v2, Int32Regs:$v3),
2959               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2960               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2961               "[$t, $s, \\{$x, $y\\}];",
2962               []>;
2963 def TLD4_B_2D_U32_F32
2964   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2965                     Int32Regs:$v2, Int32Regs:$v3),
2966               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2967               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2968               "[$t, $s, \\{$x, $y\\}];",
2969               []>;
2970 def TLD4_A_2D_U32_F32
2971   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2972                     Int32Regs:$v2, Int32Regs:$v3),
2973               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2974               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2975               "[$t, $s, \\{$x, $y\\}];",
2976               []>;
2980 // texmode_unified
2981 let IsTex = 1, IsTexModeUnified = 1 in {
2982 // Texture fetch instructions using handles
2983 def TEX_UNIFIED_1D_F32_S32
2984   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2985                     Float32Regs:$b, Float32Regs:$a),
2986               (ins Int64Regs:$t, Int32Regs:$x),
2987               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2988               []>;
2989 def TEX_UNIFIED_1D_F32_F32
2990   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2991                     Float32Regs:$b, Float32Regs:$a),
2992               (ins Int64Regs:$t, Float32Regs:$x),
2993               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2994               []>;
2995 def TEX_UNIFIED_1D_F32_F32_LEVEL
2996   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2997                     Float32Regs:$b, Float32Regs:$a),
2998               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2999               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3000               "[$t, \\{$x\\}], $lod;",
3001               []>;
3002 def TEX_UNIFIED_1D_F32_F32_GRAD
3003   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3004                     Float32Regs:$b, Float32Regs:$a),
3005               (ins Int64Regs:$t, Float32Regs:$x,
3006                    Float32Regs:$gradx, Float32Regs:$grady),
3007               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3008               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3009               []>;
3010 def TEX_UNIFIED_1D_S32_S32
3011   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3012                     Int32Regs:$b, Int32Regs:$a),
3013               (ins Int64Regs:$t, Int32Regs:$x),
3014               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3015               []>;
3016 def TEX_UNIFIED_1D_S32_F32
3017   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3018                     Int32Regs:$b, Int32Regs:$a),
3019               (ins Int64Regs:$t, Float32Regs:$x),
3020               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3021               []>;
3022 def TEX_UNIFIED_1D_S32_F32_LEVEL
3023   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3024                     Int32Regs:$b, Int32Regs:$a),
3025               (ins Int64Regs:$t, Float32Regs:$x,
3026                    Float32Regs:$lod),
3027               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3028               "[$t, \\{$x\\}], $lod;",
3029               []>;
3030 def TEX_UNIFIED_1D_S32_F32_GRAD
3031   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3032                     Int32Regs:$b, Int32Regs:$a),
3033               (ins Int64Regs:$t, Float32Regs:$x,
3034                    Float32Regs:$gradx, Float32Regs:$grady),
3035               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3036               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3037               []>;
3038 def TEX_UNIFIED_1D_U32_S32
3039   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3040                     Int32Regs:$b, Int32Regs:$a),
3041               (ins Int64Regs:$t, Int32Regs:$x),
3042               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3043               []>;
3044 def TEX_UNIFIED_1D_U32_F32
3045   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046                     Int32Regs:$b, Int32Regs:$a),
3047               (ins Int64Regs:$t, Float32Regs:$x),
3048               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3049               []>;
3050 def TEX_UNIFIED_1D_U32_F32_LEVEL
3051   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3052                     Int32Regs:$b, Int32Regs:$a),
3053               (ins Int64Regs:$t, Float32Regs:$x,
3054                    Float32Regs:$lod),
3055               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3056               "[$t, \\{$x\\}], $lod;",
3057               []>;
3058 def TEX_UNIFIED_1D_U32_F32_GRAD
3059   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3060                     Int32Regs:$b, Int32Regs:$a),
3061               (ins Int64Regs:$t, Float32Regs:$x,
3062                    Float32Regs:$gradx, Float32Regs:$grady),
3063               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3064               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3065               []>;
3067 def TEX_UNIFIED_1D_ARRAY_F32_S32
3068   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3069                     Float32Regs:$b, Float32Regs:$a),
3070               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3071               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3072               "[$t, \\{$l, $x\\}];",
3073               []>;
3074 def TEX_UNIFIED_1D_ARRAY_F32_F32
3075   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3076                     Float32Regs:$b, Float32Regs:$a),
3077               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3078               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3079               "[$t, \\{$l, $x\\}];",
3080               []>;
3081 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3082   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3083                     Float32Regs:$b, Float32Regs:$a),
3084               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3085                    Float32Regs:$lod),
3086               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3087               "[$t, \\{$l, $x\\}], $lod;",
3088               []>;
3089 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3090   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3091                     Float32Regs:$b, Float32Regs:$a),
3092               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3093                    Float32Regs:$gradx, Float32Regs:$grady),
3094               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3095               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3096               []>;
3097 def TEX_UNIFIED_1D_ARRAY_S32_S32
3098   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3099                     Int32Regs:$b, Int32Regs:$a),
3100               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3101               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3102               "[$t, \\{$l, $x\\}];",
3103               []>;
3104 def TEX_UNIFIED_1D_ARRAY_S32_F32
3105   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3106                     Int32Regs:$b, Int32Regs:$a),
3107               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3108               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3109               "[$t, \\{$l, $x\\}];",
3110               []>;
3111 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3112   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3113                     Int32Regs:$b, Int32Regs:$a),
3114               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3115                    Float32Regs:$lod),
3116               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3117               "[$t, \\{$l, $x\\}], $lod;",
3118               []>;
3119 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3120   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3121                     Int32Regs:$b, Int32Regs:$a),
3122               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3123                    Float32Regs:$gradx, Float32Regs:$grady),
3124               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3125               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3126               []>;
3127 def TEX_UNIFIED_1D_ARRAY_U32_S32
3128   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3129                     Int32Regs:$b, Int32Regs:$a),
3130               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3131               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3132               "[$t, \\{$l, $x\\}];",
3133               []>;
3134 def TEX_UNIFIED_1D_ARRAY_U32_F32
3135   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3136                     Int32Regs:$b, Int32Regs:$a),
3137               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3138               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3139               "[$t, \\{$l, $x\\}];",
3140               []>;
3141 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3142   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3143                     Int32Regs:$b, Int32Regs:$a),
3144               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3145                    Float32Regs:$lod),
3146               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3147               "[$t, \\{$l, $x\\}], $lod;",
3148               []>;
3149 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3150   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3151                     Int32Regs:$b, Int32Regs:$a),
3152               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3153                    Float32Regs:$gradx, Float32Regs:$grady),
3154               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3155               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3156               []>;
3158 def TEX_UNIFIED_2D_F32_S32
3159   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3160                     Float32Regs:$b, Float32Regs:$a),
3161               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3162               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3163               "[$t, \\{$x, $y\\}];",
3164               []>;
3165 def TEX_UNIFIED_2D_F32_F32
3166   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3167                     Float32Regs:$b, Float32Regs:$a),
3168               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3169               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3170               "[$t, \\{$x, $y\\}];",
3171               []>;
3172 def TEX_UNIFIED_2D_F32_F32_LEVEL
3173   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3174                     Float32Regs:$b, Float32Regs:$a),
3175               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3176                    Float32Regs:$lod),
3177               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3178               "[$t, \\{$x, $y\\}], $lod;",
3179               []>;
3180 def TEX_UNIFIED_2D_F32_F32_GRAD
3181   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3182                     Float32Regs:$b, Float32Regs:$a),
3183               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3184                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3185                    Float32Regs:$grady0, Float32Regs:$grady1),
3186               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3187               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3188               "\\{$grady0, $grady1\\};",
3189               []>;
3190 def TEX_UNIFIED_2D_S32_S32
3191   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3192                     Int32Regs:$b, Int32Regs:$a),
3193               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3194               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3195               "[$t, \\{$x, $y\\}];",
3196               []>;
3197 def TEX_UNIFIED_2D_S32_F32
3198   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3199                     Int32Regs:$b, Int32Regs:$a),
3200               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3201               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3202               "[$t, \\{$x, $y\\}];",
3203               []>;
3204 def TEX_UNIFIED_2D_S32_F32_LEVEL
3205   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3206                     Int32Regs:$b, Int32Regs:$a),
3207               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3208                    Float32Regs:$lod),
3209               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3210               "[$t, \\{$x, $y\\}], $lod;",
3211               []>;
3212 def TEX_UNIFIED_2D_S32_F32_GRAD
3213   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3214                     Int32Regs:$b, Int32Regs:$a),
3215               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3216                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3217                    Float32Regs:$grady0, Float32Regs:$grady1),
3218               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3219               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3220               "\\{$grady0, $grady1\\};",
3221               []>;
3222 def TEX_UNIFIED_2D_U32_S32
3223   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3224                     Int32Regs:$b, Int32Regs:$a),
3225               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3226               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3227               "[$t, \\{$x, $y\\}];",
3228               []>;
3229 def TEX_UNIFIED_2D_U32_F32
3230   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3231                     Int32Regs:$b, Int32Regs:$a),
3232               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3233               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3234               "[$t, \\{$x, $y\\}];",
3235               []>;
3236 def TEX_UNIFIED_2D_U32_F32_LEVEL
3237   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3238                     Int32Regs:$b, Int32Regs:$a),
3239               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3240                    Float32Regs:$lod),
3241               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3242               "[$t, \\{$x, $y\\}], $lod;",
3243               []>;
3244 def TEX_UNIFIED_2D_U32_F32_GRAD
3245   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3246                     Int32Regs:$b, Int32Regs:$a),
3247               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3248                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3249                    Float32Regs:$grady0, Float32Regs:$grady1),
3250               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3251               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3252               "\\{$grady0, $grady1\\};",
3253               []>;
3255 def TEX_UNIFIED_2D_ARRAY_F32_S32
3256   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3257                     Float32Regs:$b, Float32Regs:$a),
3258               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3259                    Int32Regs:$y),
3260               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3261               "[$t, \\{$l, $x, $y, $y\\}];",
3262               []>;
3263 def TEX_UNIFIED_2D_ARRAY_F32_F32
3264   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3265                     Float32Regs:$b, Float32Regs:$a),
3266               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3267                    Float32Regs:$y),
3268               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3269               "[$t, \\{$l, $x, $y, $y\\}];",
3270               []>;
3271 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3272   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3273                     Float32Regs:$b, Float32Regs:$a),
3274               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3275                    Float32Regs:$y, Float32Regs:$lod),
3276               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3277               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3278               []>;
3279 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3280   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3281                     Float32Regs:$b, Float32Regs:$a),
3282               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3283                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3284                    Float32Regs:$grady0, Float32Regs:$grady1),
3285               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3286               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3287               "\\{$grady0, $grady1\\};",
3288               []>;
3289 def TEX_UNIFIED_2D_ARRAY_S32_S32
3290   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3291                     Int32Regs:$b, Int32Regs:$a),
3292               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3293                    Int32Regs:$y),
3294               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3295               "[$t, \\{$l, $x, $y, $y\\}];",
3296               []>;
3297 def TEX_UNIFIED_2D_ARRAY_S32_F32
3298   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3299                     Int32Regs:$b, Int32Regs:$a),
3300               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3301                    Float32Regs:$y),
3302               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3303               "[$t, \\{$l, $x, $y, $y\\}];",
3304               []>;
3305 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3306   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3307                     Int32Regs:$b, Int32Regs:$a),
3308               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3309                    Float32Regs:$y, Float32Regs:$lod),
3310               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3311               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3312               []>;
3313 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3314   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3315                     Int32Regs:$b, Int32Regs:$a),
3316               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3317                    Float32Regs:$y,
3318                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3319                    Float32Regs:$grady0, Float32Regs:$grady1),
3320               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3321               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3322               "\\{$grady0, $grady1\\};",
3323               []>;
3324 def TEX_UNIFIED_2D_ARRAY_U32_S32
3325   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3326                     Int32Regs:$b, Int32Regs:$a),
3327               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3328                    Int32Regs:$y),
3329               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3330               "[$t, \\{$l, $x, $y, $y\\}];",
3331               []>;
3332 def TEX_UNIFIED_2D_ARRAY_U32_F32
3333   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3334                     Int32Regs:$b, Int32Regs:$a),
3335               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3336                    Float32Regs:$y),
3337               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3338               "[$t, \\{$l, $x, $y, $y\\}];",
3339               []>;
3340 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3341   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3342                     Int32Regs:$b, Int32Regs:$a),
3343               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3344                    Float32Regs:$y, Float32Regs:$lod),
3345               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3346               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3347               []>;
3348 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3349   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3350                     Int32Regs:$b, Int32Regs:$a),
3351               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3352                    Float32Regs:$y,
3353                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3354                    Float32Regs:$grady0, Float32Regs:$grady1),
3355               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3356               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3357               "\\{$grady0, $grady1\\};",
3358               []>;
3360 def TEX_UNIFIED_3D_F32_S32
3361   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3362                     Float32Regs:$b, Float32Regs:$a),
3363               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3364                    Int32Regs:$z),
3365               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3366               "[$t, \\{$x, $y, $z, $z\\}];",
3367               []>;
3368 def TEX_UNIFIED_3D_F32_F32
3369   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3370                     Float32Regs:$b, Float32Regs:$a),
3371               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3372                    Float32Regs:$z),
3373               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3374               "[$t, \\{$x, $y, $z, $z\\}];",
3375               []>;
3376 def TEX_UNIFIED_3D_F32_F32_LEVEL
3377   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3378                     Float32Regs:$b, Float32Regs:$a),
3379               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3380                    Float32Regs:$z, Float32Regs:$lod),
3381               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3382               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3383               []>;
3384 def TEX_UNIFIED_3D_F32_F32_GRAD
3385   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3386                     Float32Regs:$b, Float32Regs:$a),
3387               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3388                    Float32Regs:$z,
3389                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3390                    Float32Regs:$gradx2, Float32Regs:$grady0,
3391                    Float32Regs:$grady1, Float32Regs:$grady2),
3392               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3393               "[$t, \\{$x, $y, $z, $z\\}], "
3394               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3395               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3396               []>;
3397 def TEX_UNIFIED_3D_S32_S32
3398   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399                     Int32Regs:$b, Int32Regs:$a),
3400               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3401                    Int32Regs:$z),
3402               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3403               "[$t, \\{$x, $y, $z, $z\\}];",
3404               []>;
3405 def TEX_UNIFIED_3D_S32_F32
3406   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3407                     Int32Regs:$b, Int32Regs:$a),
3408               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3409                    Float32Regs:$z),
3410               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3411               "[$t, \\{$x, $y, $z, $z\\}];",
3412               []>;
3413 def TEX_UNIFIED_3D_S32_F32_LEVEL
3414   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3415                     Int32Regs:$b, Int32Regs:$a),
3416               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3417                    Float32Regs:$z, Float32Regs:$lod),
3418               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3419               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3420               []>;
3421 def TEX_UNIFIED_3D_S32_F32_GRAD
3422   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3423                     Int32Regs:$b, Int32Regs:$a),
3424               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3425                    Float32Regs:$z,
3426                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3427                    Float32Regs:$gradx2, Float32Regs:$grady0,
3428                    Float32Regs:$grady1, Float32Regs:$grady2),
3429               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3430               "[$t, \\{$x, $y, $z, $z\\}], "
3431               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3432               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3433               []>;
3434 def TEX_UNIFIED_3D_U32_S32
3435   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3436                     Int32Regs:$b, Int32Regs:$a),
3437               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3438                    Int32Regs:$z),
3439               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3440               "[$t, \\{$x, $y, $z, $z\\}];",
3441               []>;
3442 def TEX_UNIFIED_3D_U32_F32
3443   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3444                     Int32Regs:$b, Int32Regs:$a),
3445               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3446                    Float32Regs:$z),
3447               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3448               "[$t, \\{$x, $y, $z, $z\\}];",
3449               []>;
3450 def TEX_UNIFIED_3D_U32_F32_LEVEL
3451   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3452                     Int32Regs:$b, Int32Regs:$a),
3453               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3454                    Float32Regs:$z, Float32Regs:$lod),
3455               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3456               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3457               []>;
3458 def TEX_UNIFIED_3D_U32_F32_GRAD
3459   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3460                     Int32Regs:$b, Int32Regs:$a),
3461               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3462                    Float32Regs:$z,
3463                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3464                    Float32Regs:$gradx2, Float32Regs:$grady0,
3465                    Float32Regs:$grady1, Float32Regs:$grady2),
3466               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3467               "[$t, \\{$x, $y, $z, $z\\}], "
3468               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3469               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3470               []>;
3472 def TEX_UNIFIED_CUBE_F32_F32
3473   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3474                     Float32Regs:$b, Float32Regs:$a),
3475               (ins Int64Regs:$t,
3476                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3477               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3478               "[$t, \\{$x, $y, $z, $z\\}];",
3479               []>;
3480 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3481   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3482                     Float32Regs:$b, Float32Regs:$a),
3483               (ins Int64Regs:$t,
3484                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3485                    Float32Regs:$lod),
3486               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3487               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3488               []>;
3489 def TEX_UNIFIED_CUBE_S32_F32
3490   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3491                     Int32Regs:$b, Int32Regs:$a),
3492               (ins Int64Regs:$t,
3493                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3494               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3495               "[$t, \\{$x, $y, $z, $z\\}];",
3496               []>;
3497 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3498   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3499                     Int32Regs:$b, Int32Regs:$a),
3500               (ins Int64Regs:$t,
3501                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3502                    Float32Regs:$lod),
3503               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3504               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3505               []>;
3506 def TEX_UNIFIED_CUBE_U32_F32
3507   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3508                     Int32Regs:$b, Int32Regs:$a),
3509               (ins Int64Regs:$t,
3510                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3511               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3512               "[$t, \\{$x, $y, $z, $z\\}];",
3513               []>;
3514 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3515   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3516                     Int32Regs:$b, Int32Regs:$a),
3517               (ins Int64Regs:$t,
3518                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3519                    Float32Regs:$lod),
3520               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3521               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3522               []>;
3524 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3525   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3526                     Float32Regs:$b, Float32Regs:$a),
3527               (ins Int64Regs:$t, Int32Regs:$l,
3528                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3529               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3530               "[$t, \\{$l, $x, $y, $z\\}];",
3531               []>;
3532 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3533   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3534                     Float32Regs:$b, Float32Regs:$a),
3535               (ins Int64Regs:$t, Int32Regs:$l,
3536                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3537                    Float32Regs:$lod),
3538               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3539               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3540               []>;
3541 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3542   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3543                     Int32Regs:$b, Int32Regs:$a),
3544               (ins Int64Regs:$t, Int32Regs:$l,
3545                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3546               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3547               "[$t, \\{$l, $x, $y, $z\\}];",
3548               []>;
3549 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3550   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3551                     Int32Regs:$b, Int32Regs:$a),
3552               (ins Int64Regs:$t, Int32Regs:$l,
3553                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3554                    Float32Regs:$lod),
3555               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3556               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3557               []>;
3558 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3559   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3560                     Int32Regs:$b, Int32Regs:$a),
3561               (ins Int64Regs:$t, Int32Regs:$l,
3562                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3563               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3564               "[$t, \\{$l, $x, $y, $z\\}];",
3565               []>;
3566 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3567   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3568                     Int32Regs:$b, Int32Regs:$a),
3569               (ins Int64Regs:$t, Int32Regs:$l,
3570                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3571                    Float32Regs:$lod),
3572               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3573               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3574               []>;
3576 def TLD4_UNIFIED_R_2D_F32_F32
3577   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3578                     Float32Regs:$v2, Float32Regs:$v3),
3579               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3580               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3581               "[$t, \\{$x, $y\\}];",
3582               []>;
3583 def TLD4_UNIFIED_G_2D_F32_F32
3584   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3585                     Float32Regs:$v2, Float32Regs:$v3),
3586               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3587               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3588               "[$t, \\{$x, $y\\}];",
3589               []>;
3590 def TLD4_UNIFIED_B_2D_F32_F32
3591   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3592                     Float32Regs:$v2, Float32Regs:$v3),
3593               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3594               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3595               "[$t, \\{$x, $y\\}];",
3596               []>;
3597 def TLD4_UNIFIED_A_2D_F32_F32
3598   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3599                     Float32Regs:$v2, Float32Regs:$v3),
3600               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3601               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3602               "[$t, \\{$x, $y\\}];",
3603               []>;
3604 def TLD4_UNIFIED_R_2D_S32_F32
3605   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3606                     Int32Regs:$v2, Int32Regs:$v3),
3607               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3608               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3609               "[$t, \\{$x, $y\\}];",
3610               []>;
3611 def TLD4_UNIFIED_G_2D_S32_F32
3612   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3613                     Int32Regs:$v2, Int32Regs:$v3),
3614               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3615               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3616               "[$t, \\{$x, $y\\}];",
3617               []>;
3618 def TLD4_UNIFIED_B_2D_S32_F32
3619   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3620                     Int32Regs:$v2, Int32Regs:$v3),
3621               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3622               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3623               "[$t, \\{$x, $y\\}];",
3624               []>;
3625 def TLD4_UNIFIED_A_2D_S32_F32
3626   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3627                     Int32Regs:$v2, Int32Regs:$v3),
3628               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3629               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3630               "[$t, \\{$x, $y\\}];",
3631               []>;
3632 def TLD4_UNIFIED_R_2D_U32_F32
3633   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3634                     Int32Regs:$v2, Int32Regs:$v3),
3635               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3636               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3637               "[$t, \\{$x, $y\\}];",
3638               []>;
3639 def TLD4_UNIFIED_G_2D_U32_F32
3640   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3641                     Int32Regs:$v2, Int32Regs:$v3),
3642               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3643               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3644               "[$t, \\{$x, $y\\}];",
3645               []>;
3646 def TLD4_UNIFIED_B_2D_U32_F32
3647   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3648                     Int32Regs:$v2, Int32Regs:$v3),
3649               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3650               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3651               "[$t, \\{$x, $y\\}];",
3652               []>;
3653 def TLD4_UNIFIED_A_2D_U32_F32
3654   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3655                     Int32Regs:$v2, Int32Regs:$v3),
3656               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3657               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3658               "[$t, \\{$x, $y\\}];",
3659               []>;
3664 //=== Surface load instructions
3665 // .clamp variant
3666 let IsSuld = 1 in {
3667 def SULD_1D_I8_CLAMP
3668   : NVPTXInst<(outs Int16Regs:$r),
3669               (ins Int64Regs:$s, Int32Regs:$x),
3670               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3671               []>;
3672 def SULD_1D_I16_CLAMP
3673   : NVPTXInst<(outs Int16Regs:$r),
3674               (ins Int64Regs:$s, Int32Regs:$x),
3675               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3676               []>;
3677 def SULD_1D_I32_CLAMP
3678   : NVPTXInst<(outs Int32Regs:$r),
3679               (ins Int64Regs:$s, Int32Regs:$x),
3680               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3681               []>;
3682 def SULD_1D_I64_CLAMP
3683   : NVPTXInst<(outs Int64Regs:$r),
3684               (ins Int64Regs:$s, Int32Regs:$x),
3685               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3686               []>;
3688 def SULD_1D_ARRAY_I8_CLAMP
3689   : NVPTXInst<(outs Int16Regs:$r),
3690               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3691               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3692               []>;
3693 def SULD_1D_ARRAY_I16_CLAMP
3694   : NVPTXInst<(outs Int16Regs:$r),
3695               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3696               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3697               []>;
3698 def SULD_1D_ARRAY_I32_CLAMP
3699   : NVPTXInst<(outs Int32Regs:$r),
3700               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3701               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3702               []>;
3703 def SULD_1D_ARRAY_I64_CLAMP
3704   : NVPTXInst<(outs Int64Regs:$r),
3705               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3706               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3707               []>;
3709 def SULD_2D_I8_CLAMP
3710   : NVPTXInst<(outs Int16Regs:$r),
3711               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3712               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3713               []>;
3714 def SULD_2D_I16_CLAMP
3715   : NVPTXInst<(outs Int16Regs:$r),
3716               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3717               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3718               []>;
3719 def SULD_2D_I32_CLAMP
3720   : NVPTXInst<(outs Int32Regs:$r),
3721               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3722               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3723               []>;
3724 def SULD_2D_I64_CLAMP
3725   : NVPTXInst<(outs Int64Regs:$r),
3726               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3727               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3728               []>;
3730 def SULD_2D_ARRAY_I8_CLAMP
3731   : NVPTXInst<(outs Int16Regs:$r),
3732               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3733               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3734               []>;
3735 def SULD_2D_ARRAY_I16_CLAMP
3736   : NVPTXInst<(outs Int16Regs:$r),
3737               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3738               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3739               []>;
3740 def SULD_2D_ARRAY_I32_CLAMP
3741   : NVPTXInst<(outs Int32Regs:$r),
3742               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3743               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3744               []>;
3745 def SULD_2D_ARRAY_I64_CLAMP
3746   : NVPTXInst<(outs Int64Regs:$r),
3747               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3748               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3749               []>;
3751 def SULD_3D_I8_CLAMP
3752   : NVPTXInst<(outs Int16Regs:$r),
3753               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3754               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3755               []>;
3756 def SULD_3D_I16_CLAMP
3757   : NVPTXInst<(outs Int16Regs:$r),
3758               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3759               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3760               []>;
3761 def SULD_3D_I32_CLAMP
3762   : NVPTXInst<(outs Int32Regs:$r),
3763               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3764               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3765               []>;
3766 def SULD_3D_I64_CLAMP
3767   : NVPTXInst<(outs Int64Regs:$r),
3768               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3769               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3770               []>;
3773 let IsSuld = 2 in {
3774 def SULD_1D_V2I8_CLAMP
3775   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3776               (ins Int64Regs:$s, Int32Regs:$x),
3777               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3778               []>;
3779 def SULD_1D_V2I16_CLAMP
3780   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3781               (ins Int64Regs:$s, Int32Regs:$x),
3782               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3783               []>;
3784 def SULD_1D_V2I32_CLAMP
3785   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3786               (ins Int64Regs:$s, Int32Regs:$x),
3787               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3788               []>;
3789 def SULD_1D_V2I64_CLAMP
3790   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3791               (ins Int64Regs:$s, Int32Regs:$x),
3792               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3793               []>;
3795 def SULD_1D_ARRAY_V2I8_CLAMP
3796   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3797               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3798               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3799               []>;
3800 def SULD_1D_ARRAY_V2I16_CLAMP
3801   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3802               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3803               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3804               []>;
3805 def SULD_1D_ARRAY_V2I32_CLAMP
3806   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3807               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3808               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3809               []>;
3810 def SULD_1D_ARRAY_V2I64_CLAMP
3811   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3812               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3813               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3814               []>;
3816 def SULD_2D_V2I8_CLAMP
3817   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3818               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3819               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3820               []>;
3821 def SULD_2D_V2I16_CLAMP
3822   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3823               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3824               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3825               []>;
3826 def SULD_2D_V2I32_CLAMP
3827   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3828               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3829               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3830               []>;
3831 def SULD_2D_V2I64_CLAMP
3832   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3833               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3834               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3835               []>;
3837 def SULD_2D_ARRAY_V2I8_CLAMP
3838   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3839               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3840               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3841               "[$s, \\{$l, $x, $y, $y\\}];",
3842               []>;
3843 def SULD_2D_ARRAY_V2I16_CLAMP
3844   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3845               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3846               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3847               "[$s, \\{$l, $x, $y, $y\\}];",
3848               []>;
3849 def SULD_2D_ARRAY_V2I32_CLAMP
3850   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3851               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3852               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3853               "[$s, \\{$l, $x, $y, $y\\}];",
3854               []>;
3855 def SULD_2D_ARRAY_V2I64_CLAMP
3856   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3857               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3858               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3859               "[$s, \\{$l, $x, $y, $y\\}];",
3860               []>;
3862 def SULD_3D_V2I8_CLAMP
3863   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3864               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3865               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3866               []>;
3867 def SULD_3D_V2I16_CLAMP
3868   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3869               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3870               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3871               []>;
3872 def SULD_3D_V2I32_CLAMP
3873   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3874               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3875               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3876               []>;
3877 def SULD_3D_V2I64_CLAMP
3878   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3879               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3880               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3881               []>;
3884 let IsSuld = 3 in {
3885 def SULD_1D_V4I8_CLAMP
3886   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3887               (ins Int64Regs:$s, Int32Regs:$x),
3888               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3889               []>;
3890 def SULD_1D_V4I16_CLAMP
3891   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3892               (ins Int64Regs:$s, Int32Regs:$x),
3893               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3894               []>;
3895 def SULD_1D_V4I32_CLAMP
3896   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3897               (ins Int64Regs:$s, Int32Regs:$x),
3898               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3899               []>;
3901 def SULD_1D_ARRAY_V4I8_CLAMP
3902   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3903               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3904               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3905               "[$s, \\{$l, $x\\}];",
3906               []>;
3907 def SULD_1D_ARRAY_V4I16_CLAMP
3908   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3909               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3910               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3911               "[$s, \\{$l, $x\\}];",
3912               []>;
3913 def SULD_1D_ARRAY_V4I32_CLAMP
3914   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3915               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3916               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3917               "[$s, \\{$l, $x\\}];",
3918               []>;
3920 def SULD_2D_V4I8_CLAMP
3921   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3922               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3923               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3924               []>;
3925 def SULD_2D_V4I16_CLAMP
3926   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3927               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3928               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3929               []>;
3930 def SULD_2D_V4I32_CLAMP
3931   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3932               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3933               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3934               []>;
3936 def SULD_2D_ARRAY_V4I8_CLAMP
3937   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3938               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3939               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3940               "[$s, \\{$l, $x, $y, $y\\}];",
3941               []>;
3942 def SULD_2D_ARRAY_V4I16_CLAMP
3943   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3944               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3945               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3946               "[$s, \\{$l, $x, $y, $y\\}];",
3947               []>;
3948 def SULD_2D_ARRAY_V4I32_CLAMP
3949   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3950               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3951               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3952               "[$s, \\{$l, $x, $y, $y\\}];",
3953               []>;
3956 def SULD_3D_V4I8_CLAMP
3957   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3958               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3959               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3960               "[$s, \\{$x, $y, $z, $z\\}];",
3961               []>;
3962 def SULD_3D_V4I16_CLAMP
3963   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3964               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3965               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3966               "[$s, \\{$x, $y, $z, $z\\}];",
3967               []>;
3968 def SULD_3D_V4I32_CLAMP
3969   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3970               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3971               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3972               "[$s, \\{$x, $y, $z, $z\\}];",
3973               []>;
3977 // .trap variant
3978 let IsSuld = 1 in {
3979 def SULD_1D_I8_TRAP
3980   : NVPTXInst<(outs Int16Regs:$r),
3981               (ins Int64Regs:$s, Int32Regs:$x),
3982               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3983               []>;
3984 def SULD_1D_I16_TRAP
3985   : NVPTXInst<(outs Int16Regs:$r),
3986               (ins Int64Regs:$s, Int32Regs:$x),
3987               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3988               []>;
3989 def SULD_1D_I32_TRAP
3990   : NVPTXInst<(outs Int32Regs:$r),
3991               (ins Int64Regs:$s, Int32Regs:$x),
3992               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3993               []>;
3994 def SULD_1D_I64_TRAP
3995   : NVPTXInst<(outs Int64Regs:$r),
3996               (ins Int64Regs:$s, Int32Regs:$x),
3997               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3998               []>;
4000 def SULD_1D_ARRAY_I8_TRAP
4001   : NVPTXInst<(outs Int16Regs:$r),
4002               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4003               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4004               []>;
4005 def SULD_1D_ARRAY_I16_TRAP
4006   : NVPTXInst<(outs Int16Regs:$r),
4007               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4008               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4009               []>;
4010 def SULD_1D_ARRAY_I32_TRAP
4011   : NVPTXInst<(outs Int32Regs:$r),
4012               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4013               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4014               []>;
4015 def SULD_1D_ARRAY_I64_TRAP
4016   : NVPTXInst<(outs Int64Regs:$r),
4017               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4018               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4019               []>;
4021 def SULD_2D_I8_TRAP
4022   : NVPTXInst<(outs Int16Regs:$r),
4023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4024               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4025               []>;
4026 def SULD_2D_I16_TRAP
4027   : NVPTXInst<(outs Int16Regs:$r),
4028               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4029               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4030               []>;
4031 def SULD_2D_I32_TRAP
4032   : NVPTXInst<(outs Int32Regs:$r),
4033               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4034               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4035               []>;
4036 def SULD_2D_I64_TRAP
4037   : NVPTXInst<(outs Int64Regs:$r),
4038               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4039               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4040               []>;
4042 def SULD_2D_ARRAY_I8_TRAP
4043   : NVPTXInst<(outs Int16Regs:$r),
4044               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4045               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4046               []>;
4047 def SULD_2D_ARRAY_I16_TRAP
4048   : NVPTXInst<(outs Int16Regs:$r),
4049               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4050               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4051               []>;
4052 def SULD_2D_ARRAY_I32_TRAP
4053   : NVPTXInst<(outs Int32Regs:$r),
4054               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4055               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4056               []>;
4057 def SULD_2D_ARRAY_I64_TRAP
4058   : NVPTXInst<(outs Int64Regs:$r),
4059               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4060               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4061               []>;
4063 def SULD_3D_I8_TRAP
4064   : NVPTXInst<(outs Int16Regs:$r),
4065               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4066               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4067               []>;
4068 def SULD_3D_I16_TRAP
4069   : NVPTXInst<(outs Int16Regs:$r),
4070               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4071               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4072               []>;
4073 def SULD_3D_I32_TRAP
4074   : NVPTXInst<(outs Int32Regs:$r),
4075               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4076               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4077               []>;
4078 def SULD_3D_I64_TRAP
4079   : NVPTXInst<(outs Int64Regs:$r),
4080               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4081               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4082               []>;
4085 let IsSuld = 2 in {
4086 def SULD_1D_V2I8_TRAP
4087   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4088               (ins Int64Regs:$s, Int32Regs:$x),
4089               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4090               []>;
4091 def SULD_1D_V2I16_TRAP
4092   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4093               (ins Int64Regs:$s, Int32Regs:$x),
4094               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4095               []>;
4096 def SULD_1D_V2I32_TRAP
4097   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4098               (ins Int64Regs:$s, Int32Regs:$x),
4099               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4100               []>;
4101 def SULD_1D_V2I64_TRAP
4102   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4103               (ins Int64Regs:$s, Int32Regs:$x),
4104               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4105               []>;
4107 def SULD_1D_ARRAY_V2I8_TRAP
4108   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4109               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4110               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4111               []>;
4112 def SULD_1D_ARRAY_V2I16_TRAP
4113   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4114               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4115               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4116               []>;
4117 def SULD_1D_ARRAY_V2I32_TRAP
4118   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4119               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4120               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4121               []>;
4122 def SULD_1D_ARRAY_V2I64_TRAP
4123   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4124               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4125               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4126               []>;
4128 def SULD_2D_V2I8_TRAP
4129   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4130               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4131               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4132               []>;
4133 def SULD_2D_V2I16_TRAP
4134   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4135               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4136               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4137               []>;
4138 def SULD_2D_V2I32_TRAP
4139   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4140               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4141               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4142               []>;
4143 def SULD_2D_V2I64_TRAP
4144   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4145               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4146               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4147               []>;
4149 def SULD_2D_ARRAY_V2I8_TRAP
4150   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4151               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4152               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4153               "[$s, \\{$l, $x, $y, $y\\}];",
4154               []>;
4155 def SULD_2D_ARRAY_V2I16_TRAP
4156   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4157               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4158               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4159               "[$s, \\{$l, $x, $y, $y\\}];",
4160               []>;
4161 def SULD_2D_ARRAY_V2I32_TRAP
4162   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4163               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4164               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4165               "[$s, \\{$l, $x, $y, $y\\}];",
4166               []>;
4167 def SULD_2D_ARRAY_V2I64_TRAP
4168   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4169               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4170               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4171               "[$s, \\{$l, $x, $y, $y\\}];",
4172               []>;
4174 def SULD_3D_V2I8_TRAP
4175   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4176               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4177               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4178               []>;
4179 def SULD_3D_V2I16_TRAP
4180   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4181               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4182               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4183               []>;
4184 def SULD_3D_V2I32_TRAP
4185   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4186               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4187               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4188               []>;
4189 def SULD_3D_V2I64_TRAP
4190   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4191               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4192               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4193               []>;
4196 let IsSuld = 3 in {
4197 def SULD_1D_V4I8_TRAP
4198   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4199               (ins Int64Regs:$s, Int32Regs:$x),
4200               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4201               []>;
4202 def SULD_1D_V4I16_TRAP
4203   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4204               (ins Int64Regs:$s, Int32Regs:$x),
4205               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4206               []>;
4207 def SULD_1D_V4I32_TRAP
4208   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4209               (ins Int64Regs:$s, Int32Regs:$x),
4210               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4211               []>;
4213 def SULD_1D_ARRAY_V4I8_TRAP
4214   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4215               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4216               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4217               "[$s, \\{$l, $x\\}];",
4218               []>;
4219 def SULD_1D_ARRAY_V4I16_TRAP
4220   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4221               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4222               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4223               "[$s, \\{$l, $x\\}];",
4224               []>;
4225 def SULD_1D_ARRAY_V4I32_TRAP
4226   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4227               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4228               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4229               "[$s, \\{$l, $x\\}];",
4230               []>;
4232 def SULD_2D_V4I8_TRAP
4233   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4234               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4235               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4236               []>;
4237 def SULD_2D_V4I16_TRAP
4238   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4239               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4240               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4241               []>;
4242 def SULD_2D_V4I32_TRAP
4243   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4244               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4245               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4246               []>;
4248 def SULD_2D_ARRAY_V4I8_TRAP
4249   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4250               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4251               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4252               "[$s, \\{$l, $x, $y, $y\\}];",
4253               []>;
4254 def SULD_2D_ARRAY_V4I16_TRAP
4255   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4256               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4257               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4258               "[$s, \\{$l, $x, $y, $y\\}];",
4259               []>;
4260 def SULD_2D_ARRAY_V4I32_TRAP
4261   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4262               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4263               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4264               "[$s, \\{$l, $x, $y, $y\\}];",
4265               []>;
4268 def SULD_3D_V4I8_TRAP
4269   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4270               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4271               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4272               "[$s, \\{$x, $y, $z, $z\\}];",
4273               []>;
4274 def SULD_3D_V4I16_TRAP
4275   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4276               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4277               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4278               "[$s, \\{$x, $y, $z, $z\\}];",
4279               []>;
4280 def SULD_3D_V4I32_TRAP
4281   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4282               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4283               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4284               "[$s, \\{$x, $y, $z, $z\\}];",
4285               []>;
4288 // .zero variant
4289 let IsSuld = 1 in {
4290 def SULD_1D_I8_ZERO
4291   : NVPTXInst<(outs Int16Regs:$r),
4292               (ins Int64Regs:$s, Int32Regs:$x),
4293               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4294               []>;
4295 def SULD_1D_I16_ZERO
4296   : NVPTXInst<(outs Int16Regs:$r),
4297               (ins Int64Regs:$s, Int32Regs:$x),
4298               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4299               []>;
4300 def SULD_1D_I32_ZERO
4301   : NVPTXInst<(outs Int32Regs:$r),
4302               (ins Int64Regs:$s, Int32Regs:$x),
4303               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4304               []>;
4305 def SULD_1D_I64_ZERO
4306   : NVPTXInst<(outs Int64Regs:$r),
4307               (ins Int64Regs:$s, Int32Regs:$x),
4308               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4309               []>;
4311 def SULD_1D_ARRAY_I8_ZERO
4312   : NVPTXInst<(outs Int16Regs:$r),
4313               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4314               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4315               []>;
4316 def SULD_1D_ARRAY_I16_ZERO
4317   : NVPTXInst<(outs Int16Regs:$r),
4318               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4319               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4320               []>;
4321 def SULD_1D_ARRAY_I32_ZERO
4322   : NVPTXInst<(outs Int32Regs:$r),
4323               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4324               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4325               []>;
4326 def SULD_1D_ARRAY_I64_ZERO
4327   : NVPTXInst<(outs Int64Regs:$r),
4328               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4329               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4330               []>;
4332 def SULD_2D_I8_ZERO
4333   : NVPTXInst<(outs Int16Regs:$r),
4334               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4335               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4336               []>;
4337 def SULD_2D_I16_ZERO
4338   : NVPTXInst<(outs Int16Regs:$r),
4339               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4340               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4341               []>;
4342 def SULD_2D_I32_ZERO
4343   : NVPTXInst<(outs Int32Regs:$r),
4344               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4345               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4346               []>;
4347 def SULD_2D_I64_ZERO
4348   : NVPTXInst<(outs Int64Regs:$r),
4349               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4350               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4351               []>;
4353 def SULD_2D_ARRAY_I8_ZERO
4354   : NVPTXInst<(outs Int16Regs:$r),
4355               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4356               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4357               []>;
4358 def SULD_2D_ARRAY_I16_ZERO
4359   : NVPTXInst<(outs Int16Regs:$r),
4360               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4361               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4362               []>;
4363 def SULD_2D_ARRAY_I32_ZERO
4364   : NVPTXInst<(outs Int32Regs:$r),
4365               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4366               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4367               []>;
4368 def SULD_2D_ARRAY_I64_ZERO
4369   : NVPTXInst<(outs Int64Regs:$r),
4370               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4371               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4372               []>;
4374 def SULD_3D_I8_ZERO
4375   : NVPTXInst<(outs Int16Regs:$r),
4376               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4377               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4378               []>;
4379 def SULD_3D_I16_ZERO
4380   : NVPTXInst<(outs Int16Regs:$r),
4381               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4382               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4383               []>;
4384 def SULD_3D_I32_ZERO
4385   : NVPTXInst<(outs Int32Regs:$r),
4386               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4387               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4388               []>;
4389 def SULD_3D_I64_ZERO
4390   : NVPTXInst<(outs Int64Regs:$r),
4391               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4392               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4393               []>;
4396 let IsSuld = 2 in {
4397 def SULD_1D_V2I8_ZERO
4398   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4399               (ins Int64Regs:$s, Int32Regs:$x),
4400               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4401               []>;
4402 def SULD_1D_V2I16_ZERO
4403   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4404               (ins Int64Regs:$s, Int32Regs:$x),
4405               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4406               []>;
4407 def SULD_1D_V2I32_ZERO
4408   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4409               (ins Int64Regs:$s, Int32Regs:$x),
4410               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4411               []>;
4412 def SULD_1D_V2I64_ZERO
4413   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4414               (ins Int64Regs:$s, Int32Regs:$x),
4415               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4416               []>;
4418 def SULD_1D_ARRAY_V2I8_ZERO
4419   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4420               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4421               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4422               []>;
4423 def SULD_1D_ARRAY_V2I16_ZERO
4424   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4425               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4426               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4427               []>;
4428 def SULD_1D_ARRAY_V2I32_ZERO
4429   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4430               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4431               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4432               []>;
4433 def SULD_1D_ARRAY_V2I64_ZERO
4434   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4435               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4436               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4437               []>;
4439 def SULD_2D_V2I8_ZERO
4440   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4441               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4442               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4443               []>;
4444 def SULD_2D_V2I16_ZERO
4445   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4446               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4447               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4448               []>;
4449 def SULD_2D_V2I32_ZERO
4450   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4451               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4452               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4453               []>;
4454 def SULD_2D_V2I64_ZERO
4455   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4456               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4457               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4458               []>;
4460 def SULD_2D_ARRAY_V2I8_ZERO
4461   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4462               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4463               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4464               "[$s, \\{$l, $x, $y, $y\\}];",
4465               []>;
4466 def SULD_2D_ARRAY_V2I16_ZERO
4467   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4468               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4469               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4470               "[$s, \\{$l, $x, $y, $y\\}];",
4471               []>;
4472 def SULD_2D_ARRAY_V2I32_ZERO
4473   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4474               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4475               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4476               "[$s, \\{$l, $x, $y, $y\\}];",
4477               []>;
4478 def SULD_2D_ARRAY_V2I64_ZERO
4479   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4480               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4481               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4482               "[$s, \\{$l, $x, $y, $y\\}];",
4483               []>;
4485 def SULD_3D_V2I8_ZERO
4486   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4487               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4488               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4489               []>;
4490 def SULD_3D_V2I16_ZERO
4491   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4492               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4493               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4494               []>;
4495 def SULD_3D_V2I32_ZERO
4496   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4497               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4498               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4499               []>;
4500 def SULD_3D_V2I64_ZERO
4501   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4502               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4503               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4504               []>;
4507 let IsSuld = 3 in {
4508 def SULD_1D_V4I8_ZERO
4509   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4510               (ins Int64Regs:$s, Int32Regs:$x),
4511               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4512               []>;
4513 def SULD_1D_V4I16_ZERO
4514   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4515               (ins Int64Regs:$s, Int32Regs:$x),
4516               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4517               []>;
4518 def SULD_1D_V4I32_ZERO
4519   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4520               (ins Int64Regs:$s, Int32Regs:$x),
4521               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4522               []>;
4524 def SULD_1D_ARRAY_V4I8_ZERO
4525   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4526               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4527               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4528               "[$s, \\{$l, $x\\}];",
4529               []>;
4530 def SULD_1D_ARRAY_V4I16_ZERO
4531   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4532               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4533               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4534               "[$s, \\{$l, $x\\}];",
4535               []>;
4536 def SULD_1D_ARRAY_V4I32_ZERO
4537   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4538               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4539               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4540               "[$s, \\{$l, $x\\}];",
4541               []>;
4543 def SULD_2D_V4I8_ZERO
4544   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4545               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4546               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4547               []>;
4548 def SULD_2D_V4I16_ZERO
4549   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4550               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4551               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4552               []>;
4553 def SULD_2D_V4I32_ZERO
4554   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4555               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4556               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4557               []>;
4559 def SULD_2D_ARRAY_V4I8_ZERO
4560   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4561               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4562               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4563               "[$s, \\{$l, $x, $y, $y\\}];",
4564               []>;
4565 def SULD_2D_ARRAY_V4I16_ZERO
4566   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4567               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4568               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4569               "[$s, \\{$l, $x, $y, $y\\}];",
4570               []>;
4571 def SULD_2D_ARRAY_V4I32_ZERO
4572   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4573               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4574               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4575               "[$s, \\{$l, $x, $y, $y\\}];",
4576               []>;
4579 def SULD_3D_V4I8_ZERO
4580   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4581               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4582               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4583               "[$s, \\{$x, $y, $z, $z\\}];",
4584               []>;
4585 def SULD_3D_V4I16_ZERO
4586   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4587               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4588               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4589               "[$s, \\{$x, $y, $z, $z\\}];",
4590               []>;
4591 def SULD_3D_V4I32_ZERO
4592   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4593               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4594               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4595               "[$s, \\{$x, $y, $z, $z\\}];",
4596               []>;
4599 //-----------------------------------
4600 // Texture Query Intrinsics
4601 //-----------------------------------
4603 let IsSurfTexQuery = 1 in {
4604 def TXQ_CHANNEL_ORDER
4605   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4606               "txq.channel_order.b32 \t$d, [$a];",
4607               []>;
4608 def TXQ_CHANNEL_DATA_TYPE
4609   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4610               "txq.channel_data_type.b32 \t$d, [$a];",
4611               []>;
4612 def TXQ_WIDTH
4613   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4614               "txq.width.b32 \t$d, [$a];",
4615               []>;
4616 def TXQ_HEIGHT
4617   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4618               "txq.height.b32 \t$d, [$a];",
4619               []>;
4620 def TXQ_DEPTH
4621   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4622               "txq.depth.b32 \t$d, [$a];",
4623               []>;
4624 def TXQ_ARRAY_SIZE
4625   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4626               "txq.array_size.b32 \t$d, [$a];",
4627               []>;
4628 def TXQ_NUM_SAMPLES
4629   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4630               "txq.num_samples.b32 \t$d, [$a];",
4631               []>;
4632 def TXQ_NUM_MIPMAP_LEVELS
4633   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4634               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4635               []>;
4638 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4639           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4640 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4641           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4642 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4643           (TXQ_WIDTH Int64Regs:$a)>;
4644 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4645           (TXQ_HEIGHT Int64Regs:$a)>;
4646 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4647           (TXQ_DEPTH Int64Regs:$a)>;
4648 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4649           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4650 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4651           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4652 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4653           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4656 //-----------------------------------
4657 // Surface Query Intrinsics
4658 //-----------------------------------
4660 let IsSurfTexQuery = 1 in {
4661 def SUQ_CHANNEL_ORDER
4662   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4663               "suq.channel_order.b32 \t$d, [$a];",
4664               []>;
4665 def SUQ_CHANNEL_DATA_TYPE
4666   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4667               "suq.channel_data_type.b32 \t$d, [$a];",
4668               []>;
4669 def SUQ_WIDTH
4670   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4671               "suq.width.b32 \t$d, [$a];",
4672               []>;
4673 def SUQ_HEIGHT
4674   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4675               "suq.height.b32 \t$d, [$a];",
4676               []>;
4677 def SUQ_DEPTH
4678   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4679               "suq.depth.b32 \t$d, [$a];",
4680               []>;
4681 def SUQ_ARRAY_SIZE
4682   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4683               "suq.array_size.b32 \t$d, [$a];",
4684               []>;
4687 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4688           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4689 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4690           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4691 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4692           (SUQ_WIDTH Int64Regs:$a)>;
4693 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4694           (SUQ_HEIGHT Int64Regs:$a)>;
4695 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4696           (SUQ_DEPTH Int64Regs:$a)>;
4697 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4698           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4701 //===- Handle Query -------------------------------------------------------===//
4703 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4704 def ISTYPEP_SAMPLER
4705   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4706               "istypep.samplerref \t$d, $a;",
4707               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4708 def ISTYPEP_SURFACE
4709   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4710               "istypep.surfref \t$d, $a;",
4711               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4712 def ISTYPEP_TEXTURE
4713   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4714               "istypep.texref \t$d, $a;",
4715               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4717 //===- Surface Stores -----------------------------------------------------===//
4719 let IsSust = 1 in {
4720 // Unformatted
4721 // .clamp variant
4722 def SUST_B_1D_B8_CLAMP
4723   : NVPTXInst<(outs),
4724               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4725               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4726               []>;
4727 def SUST_B_1D_B16_CLAMP
4728   : NVPTXInst<(outs),
4729               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4730               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4731               []>;
4732 def SUST_B_1D_B32_CLAMP
4733   : NVPTXInst<(outs),
4734               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4735               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4736               []>;
4737 def SUST_B_1D_B64_CLAMP
4738   : NVPTXInst<(outs),
4739               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4740               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4741               []>;
4742 def SUST_B_1D_V2B8_CLAMP
4743   : NVPTXInst<(outs),
4744               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4745               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4746               []>;
4747 def SUST_B_1D_V2B16_CLAMP
4748   : NVPTXInst<(outs),
4749               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4750               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4751               []>;
4752 def SUST_B_1D_V2B32_CLAMP
4753   : NVPTXInst<(outs),
4754               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4755               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4756               []>;
4757 def SUST_B_1D_V2B64_CLAMP
4758   : NVPTXInst<(outs),
4759               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4760               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4761               []>;
4762 def SUST_B_1D_V4B8_CLAMP
4763   : NVPTXInst<(outs),
4764               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4765                    Int16Regs:$b, Int16Regs:$a),
4766               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4767               []>;
4768 def SUST_B_1D_V4B16_CLAMP
4769   : NVPTXInst<(outs),
4770               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4771                    Int16Regs:$b, Int16Regs:$a),
4772               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4773               []>;
4774 def SUST_B_1D_V4B32_CLAMP
4775   : NVPTXInst<(outs),
4776               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4777                    Int32Regs:$b, Int32Regs:$a),
4778               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4779               []>;
4782 def SUST_B_1D_ARRAY_B8_CLAMP
4783   : NVPTXInst<(outs),
4784               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4785               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4786               []>;
4787 def SUST_B_1D_ARRAY_B16_CLAMP
4788   : NVPTXInst<(outs),
4789               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4790               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4791               []>;
4792 def SUST_B_1D_ARRAY_B32_CLAMP
4793   : NVPTXInst<(outs),
4794               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4795               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4796               []>;
4797 def SUST_B_1D_ARRAY_B64_CLAMP
4798   : NVPTXInst<(outs),
4799               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4800               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4801               []>;
4802 def SUST_B_1D_ARRAY_V2B8_CLAMP
4803   : NVPTXInst<(outs),
4804               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4805                    Int16Regs:$g),
4806               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4807               []>;
4808 def SUST_B_1D_ARRAY_V2B16_CLAMP
4809   : NVPTXInst<(outs),
4810               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4811                    Int16Regs:$g),
4812               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4813               []>;
4814 def SUST_B_1D_ARRAY_V2B32_CLAMP
4815   : NVPTXInst<(outs),
4816               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4817                    Int32Regs:$g),
4818               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4819               []>;
4820 def SUST_B_1D_ARRAY_V2B64_CLAMP
4821   : NVPTXInst<(outs),
4822               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4823                    Int64Regs:$g),
4824               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4825               []>;
4826 def SUST_B_1D_ARRAY_V4B8_CLAMP
4827   : NVPTXInst<(outs),
4828               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4829                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4830               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4831               "\\{$r, $g, $b, $a\\};",
4832               []>;
4833 def SUST_B_1D_ARRAY_V4B16_CLAMP
4834   : NVPTXInst<(outs),
4835               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4836                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4837              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4838              "\\{$r, $g, $b, $a\\};",
4839               []>;
4840 def SUST_B_1D_ARRAY_V4B32_CLAMP
4841   : NVPTXInst<(outs),
4842               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4843                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4844              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4845              "\\{$r, $g, $b, $a\\};",
4846               []>;
4849 def SUST_B_2D_B8_CLAMP
4850   : NVPTXInst<(outs),
4851               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4852               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4853               []>;
4854 def SUST_B_2D_B16_CLAMP
4855   : NVPTXInst<(outs),
4856               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4857               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4858               []>;
4859 def SUST_B_2D_B32_CLAMP
4860   : NVPTXInst<(outs),
4861               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4862               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4863               []>;
4864 def SUST_B_2D_B64_CLAMP
4865   : NVPTXInst<(outs),
4866               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4867               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4868               []>;
4869 def SUST_B_2D_V2B8_CLAMP
4870   : NVPTXInst<(outs),
4871               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4872                    Int16Regs:$g),
4873               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4874               []>;
4875 def SUST_B_2D_V2B16_CLAMP
4876   : NVPTXInst<(outs),
4877               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4878                    Int16Regs:$g),
4879               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4880               []>;
4881 def SUST_B_2D_V2B32_CLAMP
4882   : NVPTXInst<(outs),
4883               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4884                    Int32Regs:$g),
4885               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4886               []>;
4887 def SUST_B_2D_V2B64_CLAMP
4888   : NVPTXInst<(outs),
4889               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4890                    Int64Regs:$g),
4891               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4892               []>;
4893 def SUST_B_2D_V4B8_CLAMP
4894   : NVPTXInst<(outs),
4895               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4896                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4897               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4898               "\\{$r, $g, $b, $a\\};",
4899               []>;
4900 def SUST_B_2D_V4B16_CLAMP
4901   : NVPTXInst<(outs),
4902               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4903                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4904              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4905              "\\{$r, $g, $b, $a\\};",
4906               []>;
4907 def SUST_B_2D_V4B32_CLAMP
4908   : NVPTXInst<(outs),
4909               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4910                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4911              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4912              "\\{$r, $g, $b, $a\\};",
4913               []>;
4916 def SUST_B_2D_ARRAY_B8_CLAMP
4917   : NVPTXInst<(outs),
4918               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4919                    Int16Regs:$r),
4920               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4921               []>;
4922 def SUST_B_2D_ARRAY_B16_CLAMP
4923   : NVPTXInst<(outs),
4924               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4925                    Int16Regs:$r),
4926               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4927               []>;
4928 def SUST_B_2D_ARRAY_B32_CLAMP
4929   : NVPTXInst<(outs),
4930               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4931                    Int32Regs:$r),
4932               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4933               []>;
4934 def SUST_B_2D_ARRAY_B64_CLAMP
4935   : NVPTXInst<(outs),
4936               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4937                    Int64Regs:$r),
4938               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4939               []>;
4940 def SUST_B_2D_ARRAY_V2B8_CLAMP
4941   : NVPTXInst<(outs),
4942               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4943                    Int16Regs:$r, Int16Regs:$g),
4944               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4945               "\\{$r, $g\\};",
4946               []>;
4947 def SUST_B_2D_ARRAY_V2B16_CLAMP
4948   : NVPTXInst<(outs),
4949               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4950                    Int16Regs:$r, Int16Regs:$g),
4951              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4952              "\\{$r, $g\\};",
4953               []>;
4954 def SUST_B_2D_ARRAY_V2B32_CLAMP
4955   : NVPTXInst<(outs),
4956               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4957                    Int32Regs:$r, Int32Regs:$g),
4958              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4959              "\\{$r, $g\\};",
4960               []>;
4961 def SUST_B_2D_ARRAY_V2B64_CLAMP
4962   : NVPTXInst<(outs),
4963               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4964                    Int64Regs:$r, Int64Regs:$g),
4965              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4966              "\\{$r, $g\\};",
4967               []>;
4968 def SUST_B_2D_ARRAY_V4B8_CLAMP
4969   : NVPTXInst<(outs),
4970               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4971                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4972       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4973       "\\{$r, $g, $b, $a\\};",
4974               []>;
4975 def SUST_B_2D_ARRAY_V4B16_CLAMP
4976   : NVPTXInst<(outs),
4977               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4978                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4979      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4980      "\\{$r, $g, $b, $a\\};",
4981               []>;
4982 def SUST_B_2D_ARRAY_V4B32_CLAMP
4983   : NVPTXInst<(outs),
4984               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4985                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4986      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4987      "\\{$r, $g, $b, $a\\};",
4988               []>;
4991 def SUST_B_3D_B8_CLAMP
4992   : NVPTXInst<(outs),
4993               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4994                    Int16Regs:$r),
4995               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4996               []>;
4997 def SUST_B_3D_B16_CLAMP
4998   : NVPTXInst<(outs),
4999               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5000                    Int16Regs:$r),
5001               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5002               []>;
5003 def SUST_B_3D_B32_CLAMP
5004   : NVPTXInst<(outs),
5005               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5006                    Int32Regs:$r),
5007               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5008               []>;
5009 def SUST_B_3D_B64_CLAMP
5010   : NVPTXInst<(outs),
5011               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5012                    Int64Regs:$r),
5013               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5014               []>;
5015 def SUST_B_3D_V2B8_CLAMP
5016   : NVPTXInst<(outs),
5017               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5018                    Int16Regs:$r, Int16Regs:$g),
5019               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5020               "\\{$r, $g\\};",
5021               []>;
5022 def SUST_B_3D_V2B16_CLAMP
5023   : NVPTXInst<(outs),
5024               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5025                    Int16Regs:$r, Int16Regs:$g),
5026               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5027               "\\{$r, $g\\};",
5028               []>;
5029 def SUST_B_3D_V2B32_CLAMP
5030   : NVPTXInst<(outs),
5031               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5032                    Int32Regs:$r, Int32Regs:$g),
5033               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5034               "\\{$r, $g\\};",
5035               []>;
5036 def SUST_B_3D_V2B64_CLAMP
5037   : NVPTXInst<(outs),
5038               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5039                    Int64Regs:$r, Int64Regs:$g),
5040               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5041               "\\{$r, $g\\};",
5042               []>;
5043 def SUST_B_3D_V4B8_CLAMP
5044   : NVPTXInst<(outs),
5045               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5046                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5047          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5048          "\\{$r, $g, $b, $a\\};",
5049               []>;
5050 def SUST_B_3D_V4B16_CLAMP
5051   : NVPTXInst<(outs),
5052               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5053                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5054         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5055         "\\{$r, $g, $b, $a\\};",
5056               []>;
5057 def SUST_B_3D_V4B32_CLAMP
5058   : NVPTXInst<(outs),
5059               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5060                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5061         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5062         "\\{$r, $g, $b, $a\\};",
5063               []>;
5066 // .trap variant
5067 def SUST_B_1D_B8_TRAP
5068   : NVPTXInst<(outs),
5069               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5070               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5071               []>;
5072 def SUST_B_1D_B16_TRAP
5073   : NVPTXInst<(outs),
5074               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5075               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5076               []>;
5077 def SUST_B_1D_B32_TRAP
5078   : NVPTXInst<(outs),
5079               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5080               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5081               []>;
5082 def SUST_B_1D_B64_TRAP
5083   : NVPTXInst<(outs),
5084               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5085               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5086               []>;
5087 def SUST_B_1D_V2B8_TRAP
5088   : NVPTXInst<(outs),
5089               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5090               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5091               []>;
5092 def SUST_B_1D_V2B16_TRAP
5093   : NVPTXInst<(outs),
5094               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5095               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5096               []>;
5097 def SUST_B_1D_V2B32_TRAP
5098   : NVPTXInst<(outs),
5099               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5100               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5101               []>;
5102 def SUST_B_1D_V2B64_TRAP
5103   : NVPTXInst<(outs),
5104               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5105               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5106               []>;
5107 def SUST_B_1D_V4B8_TRAP
5108   : NVPTXInst<(outs),
5109               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5110                    Int16Regs:$b, Int16Regs:$a),
5111               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5112               []>;
5113 def SUST_B_1D_V4B16_TRAP
5114   : NVPTXInst<(outs),
5115               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5116                    Int16Regs:$b, Int16Regs:$a),
5117               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5118               []>;
5119 def SUST_B_1D_V4B32_TRAP
5120   : NVPTXInst<(outs),
5121               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5122                    Int32Regs:$b, Int32Regs:$a),
5123               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5124               []>;
5127 def SUST_B_1D_ARRAY_B8_TRAP
5128   : NVPTXInst<(outs),
5129               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5130               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5131               []>;
5132 def SUST_B_1D_ARRAY_B16_TRAP
5133   : NVPTXInst<(outs),
5134               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5135               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5136               []>;
5137 def SUST_B_1D_ARRAY_B32_TRAP
5138   : NVPTXInst<(outs),
5139               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5140               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5141               []>;
5142 def SUST_B_1D_ARRAY_B64_TRAP
5143   : NVPTXInst<(outs),
5144               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5145               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5146               []>;
5147 def SUST_B_1D_ARRAY_V2B8_TRAP
5148   : NVPTXInst<(outs),
5149               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5150                    Int16Regs:$g),
5151               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5152               []>;
5153 def SUST_B_1D_ARRAY_V2B16_TRAP
5154   : NVPTXInst<(outs),
5155               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5156                    Int16Regs:$g),
5157               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5158               []>;
5159 def SUST_B_1D_ARRAY_V2B32_TRAP
5160   : NVPTXInst<(outs),
5161               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5162                    Int32Regs:$g),
5163               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5164               []>;
5165 def SUST_B_1D_ARRAY_V2B64_TRAP
5166   : NVPTXInst<(outs),
5167               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5168                    Int64Regs:$g),
5169               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5170               []>;
5171 def SUST_B_1D_ARRAY_V4B8_TRAP
5172   : NVPTXInst<(outs),
5173               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5174                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5175               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5176               "\\{$r, $g, $b, $a\\};",
5177               []>;
5178 def SUST_B_1D_ARRAY_V4B16_TRAP
5179   : NVPTXInst<(outs),
5180               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5181                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5182              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5183              "\\{$r, $g, $b, $a\\};",
5184               []>;
5185 def SUST_B_1D_ARRAY_V4B32_TRAP
5186   : NVPTXInst<(outs),
5187               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5188                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5189              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5190              "\\{$r, $g, $b, $a\\};",
5191               []>;
5194 def SUST_B_2D_B8_TRAP
5195   : NVPTXInst<(outs),
5196               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5197               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5198               []>;
5199 def SUST_B_2D_B16_TRAP
5200   : NVPTXInst<(outs),
5201               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5202               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5203               []>;
5204 def SUST_B_2D_B32_TRAP
5205   : NVPTXInst<(outs),
5206               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5207               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5208               []>;
5209 def SUST_B_2D_B64_TRAP
5210   : NVPTXInst<(outs),
5211               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5212               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5213               []>;
5214 def SUST_B_2D_V2B8_TRAP
5215   : NVPTXInst<(outs),
5216               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5217                    Int16Regs:$g),
5218               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5219               []>;
5220 def SUST_B_2D_V2B16_TRAP
5221   : NVPTXInst<(outs),
5222               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5223                    Int16Regs:$g),
5224               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5225               []>;
5226 def SUST_B_2D_V2B32_TRAP
5227   : NVPTXInst<(outs),
5228               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5229                    Int32Regs:$g),
5230               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5231               []>;
5232 def SUST_B_2D_V2B64_TRAP
5233   : NVPTXInst<(outs),
5234               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5235                    Int64Regs:$g),
5236               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5237               []>;
5238 def SUST_B_2D_V4B8_TRAP
5239   : NVPTXInst<(outs),
5240               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5241                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5242               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5243               "\\{$r, $g, $b, $a\\};",
5244               []>;
5245 def SUST_B_2D_V4B16_TRAP
5246   : NVPTXInst<(outs),
5247               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5248                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5249              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5250              "\\{$r, $g, $b, $a\\};",
5251               []>;
5252 def SUST_B_2D_V4B32_TRAP
5253   : NVPTXInst<(outs),
5254               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5255                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5256              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5257              "\\{$r, $g, $b, $a\\};",
5258               []>;
5261 def SUST_B_2D_ARRAY_B8_TRAP
5262   : NVPTXInst<(outs),
5263               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5264                    Int16Regs:$r),
5265               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5266               []>;
5267 def SUST_B_2D_ARRAY_B16_TRAP
5268   : NVPTXInst<(outs),
5269               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5270                    Int16Regs:$r),
5271               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5272               []>;
5273 def SUST_B_2D_ARRAY_B32_TRAP
5274   : NVPTXInst<(outs),
5275               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5276                    Int32Regs:$r),
5277               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5278               []>;
5279 def SUST_B_2D_ARRAY_B64_TRAP
5280   : NVPTXInst<(outs),
5281               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5282                    Int64Regs:$r),
5283               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5284               []>;
5285 def SUST_B_2D_ARRAY_V2B8_TRAP
5286   : NVPTXInst<(outs),
5287               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5288                    Int16Regs:$r, Int16Regs:$g),
5289               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5290               "\\{$r, $g\\};",
5291               []>;
5292 def SUST_B_2D_ARRAY_V2B16_TRAP
5293   : NVPTXInst<(outs),
5294               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5295                    Int16Regs:$r, Int16Regs:$g),
5296              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5297              "\\{$r, $g\\};",
5298               []>;
5299 def SUST_B_2D_ARRAY_V2B32_TRAP
5300   : NVPTXInst<(outs),
5301               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5302                    Int32Regs:$r, Int32Regs:$g),
5303              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5304              "\\{$r, $g\\};",
5305               []>;
5306 def SUST_B_2D_ARRAY_V2B64_TRAP
5307   : NVPTXInst<(outs),
5308               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5309                    Int64Regs:$r, Int64Regs:$g),
5310              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5311              "\\{$r, $g\\};",
5312               []>;
5313 def SUST_B_2D_ARRAY_V4B8_TRAP
5314   : NVPTXInst<(outs),
5315               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5316                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5317       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5318       "\\{$r, $g, $b, $a\\};",
5319               []>;
5320 def SUST_B_2D_ARRAY_V4B16_TRAP
5321   : NVPTXInst<(outs),
5322               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5323                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5324      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5325      "\\{$r, $g, $b, $a\\};",
5326               []>;
5327 def SUST_B_2D_ARRAY_V4B32_TRAP
5328   : NVPTXInst<(outs),
5329               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5330                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5331      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5332      "\\{$r, $g, $b, $a\\};",
5333               []>;
5336 def SUST_B_3D_B8_TRAP
5337   : NVPTXInst<(outs),
5338               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5339                    Int16Regs:$r),
5340               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5341               []>;
5342 def SUST_B_3D_B16_TRAP
5343   : NVPTXInst<(outs),
5344               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5345                    Int16Regs:$r),
5346               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5347               []>;
5348 def SUST_B_3D_B32_TRAP
5349   : NVPTXInst<(outs),
5350               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5351                    Int32Regs:$r),
5352               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5353               []>;
5354 def SUST_B_3D_B64_TRAP
5355   : NVPTXInst<(outs),
5356               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5357                    Int64Regs:$r),
5358               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5359               []>;
5360 def SUST_B_3D_V2B8_TRAP
5361   : NVPTXInst<(outs),
5362               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5363                    Int16Regs:$r, Int16Regs:$g),
5364               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5365               "\\{$r, $g\\};",
5366               []>;
5367 def SUST_B_3D_V2B16_TRAP
5368   : NVPTXInst<(outs),
5369               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5370                    Int16Regs:$r, Int16Regs:$g),
5371               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5372               "\\{$r, $g\\};",
5373               []>;
5374 def SUST_B_3D_V2B32_TRAP
5375   : NVPTXInst<(outs),
5376               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5377                    Int32Regs:$r, Int32Regs:$g),
5378               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5379               "\\{$r, $g\\};",
5380               []>;
5381 def SUST_B_3D_V2B64_TRAP
5382   : NVPTXInst<(outs),
5383               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5384                    Int64Regs:$r, Int64Regs:$g),
5385               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5386               "\\{$r, $g\\};",
5387               []>;
5388 def SUST_B_3D_V4B8_TRAP
5389   : NVPTXInst<(outs),
5390               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5391                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5392          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5393          "\\{$r, $g, $b, $a\\};",
5394               []>;
5395 def SUST_B_3D_V4B16_TRAP
5396   : NVPTXInst<(outs),
5397               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5398                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5399         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5400         "\\{$r, $g, $b, $a\\};",
5401               []>;
5402 def SUST_B_3D_V4B32_TRAP
5403   : NVPTXInst<(outs),
5404               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5405                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5406         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5407         "\\{$r, $g, $b, $a\\};",
5408               []>;
5411 // .zero variant
5412 def SUST_B_1D_B8_ZERO
5413   : NVPTXInst<(outs),
5414               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5415               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5416               []>;
5417 def SUST_B_1D_B16_ZERO
5418   : NVPTXInst<(outs),
5419               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5420               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5421               []>;
5422 def SUST_B_1D_B32_ZERO
5423   : NVPTXInst<(outs),
5424               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5425               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5426               []>;
5427 def SUST_B_1D_B64_ZERO
5428   : NVPTXInst<(outs),
5429               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5430               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5431               []>;
5432 def SUST_B_1D_V2B8_ZERO
5433   : NVPTXInst<(outs),
5434               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5435               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5436               []>;
5437 def SUST_B_1D_V2B16_ZERO
5438   : NVPTXInst<(outs),
5439               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5440               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5441               []>;
5442 def SUST_B_1D_V2B32_ZERO
5443   : NVPTXInst<(outs),
5444               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5445               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5446               []>;
5447 def SUST_B_1D_V2B64_ZERO
5448   : NVPTXInst<(outs),
5449               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5450               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5451               []>;
5452 def SUST_B_1D_V4B8_ZERO
5453   : NVPTXInst<(outs),
5454               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5455                    Int16Regs:$b, Int16Regs:$a),
5456               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5457               []>;
5458 def SUST_B_1D_V4B16_ZERO
5459   : NVPTXInst<(outs),
5460               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5461                    Int16Regs:$b, Int16Regs:$a),
5462               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5463               []>;
5464 def SUST_B_1D_V4B32_ZERO
5465   : NVPTXInst<(outs),
5466               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5467                    Int32Regs:$b, Int32Regs:$a),
5468               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5469               []>;
5472 def SUST_B_1D_ARRAY_B8_ZERO
5473   : NVPTXInst<(outs),
5474               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5475               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5476               []>;
5477 def SUST_B_1D_ARRAY_B16_ZERO
5478   : NVPTXInst<(outs),
5479               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5480               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5481               []>;
5482 def SUST_B_1D_ARRAY_B32_ZERO
5483   : NVPTXInst<(outs),
5484               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5485               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5486               []>;
5487 def SUST_B_1D_ARRAY_B64_ZERO
5488   : NVPTXInst<(outs),
5489               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5490               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5491               []>;
5492 def SUST_B_1D_ARRAY_V2B8_ZERO
5493   : NVPTXInst<(outs),
5494               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5495                    Int16Regs:$g),
5496               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5497               []>;
5498 def SUST_B_1D_ARRAY_V2B16_ZERO
5499   : NVPTXInst<(outs),
5500               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5501                    Int16Regs:$g),
5502               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5503               []>;
5504 def SUST_B_1D_ARRAY_V2B32_ZERO
5505   : NVPTXInst<(outs),
5506               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5507                    Int32Regs:$g),
5508               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5509               []>;
5510 def SUST_B_1D_ARRAY_V2B64_ZERO
5511   : NVPTXInst<(outs),
5512               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5513                    Int64Regs:$g),
5514               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5515               []>;
5516 def SUST_B_1D_ARRAY_V4B8_ZERO
5517   : NVPTXInst<(outs),
5518               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5519                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5520               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5521               "\\{$r, $g, $b, $a\\};",
5522               []>;
5523 def SUST_B_1D_ARRAY_V4B16_ZERO
5524   : NVPTXInst<(outs),
5525               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5526                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5527              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5528              "\\{$r, $g, $b, $a\\};",
5529               []>;
5530 def SUST_B_1D_ARRAY_V4B32_ZERO
5531   : NVPTXInst<(outs),
5532               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5533                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5534              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5535              "\\{$r, $g, $b, $a\\};",
5536               []>;
5539 def SUST_B_2D_B8_ZERO
5540   : NVPTXInst<(outs),
5541               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5542               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5543               []>;
5544 def SUST_B_2D_B16_ZERO
5545   : NVPTXInst<(outs),
5546               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5547               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5548               []>;
5549 def SUST_B_2D_B32_ZERO
5550   : NVPTXInst<(outs),
5551               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5552               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5553               []>;
5554 def SUST_B_2D_B64_ZERO
5555   : NVPTXInst<(outs),
5556               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5557               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5558               []>;
5559 def SUST_B_2D_V2B8_ZERO
5560   : NVPTXInst<(outs),
5561               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5562                    Int16Regs:$g),
5563               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5564               []>;
5565 def SUST_B_2D_V2B16_ZERO
5566   : NVPTXInst<(outs),
5567               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5568                    Int16Regs:$g),
5569               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5570               []>;
5571 def SUST_B_2D_V2B32_ZERO
5572   : NVPTXInst<(outs),
5573               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5574                    Int32Regs:$g),
5575               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5576               []>;
5577 def SUST_B_2D_V2B64_ZERO
5578   : NVPTXInst<(outs),
5579               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5580                    Int64Regs:$g),
5581               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5582               []>;
5583 def SUST_B_2D_V4B8_ZERO
5584   : NVPTXInst<(outs),
5585               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5586                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5587               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5588               "\\{$r, $g, $b, $a\\};",
5589               []>;
5590 def SUST_B_2D_V4B16_ZERO
5591   : NVPTXInst<(outs),
5592               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5593                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5594              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5595              "\\{$r, $g, $b, $a\\};",
5596               []>;
5597 def SUST_B_2D_V4B32_ZERO
5598   : NVPTXInst<(outs),
5599               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5600                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5601              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5602              "\\{$r, $g, $b, $a\\};",
5603               []>;
5606 def SUST_B_2D_ARRAY_B8_ZERO
5607   : NVPTXInst<(outs),
5608               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5609                    Int16Regs:$r),
5610               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5611               []>;
5612 def SUST_B_2D_ARRAY_B16_ZERO
5613   : NVPTXInst<(outs),
5614               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5615                    Int16Regs:$r),
5616               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5617               []>;
5618 def SUST_B_2D_ARRAY_B32_ZERO
5619   : NVPTXInst<(outs),
5620               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5621                    Int32Regs:$r),
5622               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5623               []>;
5624 def SUST_B_2D_ARRAY_B64_ZERO
5625   : NVPTXInst<(outs),
5626               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5627                    Int64Regs:$r),
5628               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5629               []>;
5630 def SUST_B_2D_ARRAY_V2B8_ZERO
5631   : NVPTXInst<(outs),
5632               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5633                    Int16Regs:$r, Int16Regs:$g),
5634               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5635               "\\{$r, $g\\};",
5636               []>;
5637 def SUST_B_2D_ARRAY_V2B16_ZERO
5638   : NVPTXInst<(outs),
5639               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5640                    Int16Regs:$r, Int16Regs:$g),
5641              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5642              "\\{$r, $g\\};",
5643               []>;
5644 def SUST_B_2D_ARRAY_V2B32_ZERO
5645   : NVPTXInst<(outs),
5646               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5647                    Int32Regs:$r, Int32Regs:$g),
5648              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5649              "\\{$r, $g\\};",
5650               []>;
5651 def SUST_B_2D_ARRAY_V2B64_ZERO
5652   : NVPTXInst<(outs),
5653               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5654                    Int64Regs:$r, Int64Regs:$g),
5655              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5656              "\\{$r, $g\\};",
5657               []>;
5658 def SUST_B_2D_ARRAY_V4B8_ZERO
5659   : NVPTXInst<(outs),
5660               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5661                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5662       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5663       "\\{$r, $g, $b, $a\\};",
5664               []>;
5665 def SUST_B_2D_ARRAY_V4B16_ZERO
5666   : NVPTXInst<(outs),
5667               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5668                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5669      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5670      "\\{$r, $g, $b, $a\\};",
5671               []>;
5672 def SUST_B_2D_ARRAY_V4B32_ZERO
5673   : NVPTXInst<(outs),
5674               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5675                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5676      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5677      "\\{$r, $g, $b, $a\\};",
5678               []>;
5681 def SUST_B_3D_B8_ZERO
5682   : NVPTXInst<(outs),
5683               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5684                    Int16Regs:$r),
5685               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5686               []>;
5687 def SUST_B_3D_B16_ZERO
5688   : NVPTXInst<(outs),
5689               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5690                    Int16Regs:$r),
5691               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5692               []>;
5693 def SUST_B_3D_B32_ZERO
5694   : NVPTXInst<(outs),
5695               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5696                    Int32Regs:$r),
5697               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5698               []>;
5699 def SUST_B_3D_B64_ZERO
5700   : NVPTXInst<(outs),
5701               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5702                    Int64Regs:$r),
5703               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5704               []>;
5705 def SUST_B_3D_V2B8_ZERO
5706   : NVPTXInst<(outs),
5707               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5708                    Int16Regs:$r, Int16Regs:$g),
5709               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5710               "\\{$r, $g\\};",
5711               []>;
5712 def SUST_B_3D_V2B16_ZERO
5713   : NVPTXInst<(outs),
5714               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5715                    Int16Regs:$r, Int16Regs:$g),
5716               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5717               "\\{$r, $g\\};",
5718               []>;
5719 def SUST_B_3D_V2B32_ZERO
5720   : NVPTXInst<(outs),
5721               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5722                    Int32Regs:$r, Int32Regs:$g),
5723               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5724               "\\{$r, $g\\};",
5725               []>;
5726 def SUST_B_3D_V2B64_ZERO
5727   : NVPTXInst<(outs),
5728               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5729                    Int64Regs:$r, Int64Regs:$g),
5730               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5731               "\\{$r, $g\\};",
5732               []>;
5733 def SUST_B_3D_V4B8_ZERO
5734   : NVPTXInst<(outs),
5735               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5736                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5737          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5738          "\\{$r, $g, $b, $a\\};",
5739               []>;
5740 def SUST_B_3D_V4B16_ZERO
5741   : NVPTXInst<(outs),
5742               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5743                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5744         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5745         "\\{$r, $g, $b, $a\\};",
5746               []>;
5747 def SUST_B_3D_V4B32_ZERO
5748   : NVPTXInst<(outs),
5749               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5750                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5751         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5752         "\\{$r, $g, $b, $a\\};",
5753               []>;
5757 // Formatted
5759 def SUST_P_1D_B8_TRAP
5760   : NVPTXInst<(outs),
5761               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5762               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5763               []>;
5764 def SUST_P_1D_B16_TRAP
5765   : NVPTXInst<(outs),
5766               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5767               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5768               []>;
5769 def SUST_P_1D_B32_TRAP
5770   : NVPTXInst<(outs),
5771               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5772               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5773               []>;
5774 def SUST_P_1D_V2B8_TRAP
5775   : NVPTXInst<(outs),
5776               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5777               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5778               []>;
5779 def SUST_P_1D_V2B16_TRAP
5780   : NVPTXInst<(outs),
5781               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5782               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5783               []>;
5784 def SUST_P_1D_V2B32_TRAP
5785   : NVPTXInst<(outs),
5786               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5787               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5788               []>;
5789 def SUST_P_1D_V4B8_TRAP
5790   : NVPTXInst<(outs),
5791               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5792                    Int16Regs:$b, Int16Regs:$a),
5793               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5794               []>;
5795 def SUST_P_1D_V4B16_TRAP
5796   : NVPTXInst<(outs),
5797               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5798                    Int16Regs:$b, Int16Regs:$a),
5799               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5800               []>;
5801 def SUST_P_1D_V4B32_TRAP
5802   : NVPTXInst<(outs),
5803               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5804                    Int32Regs:$b, Int32Regs:$a),
5805               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5806               []>;
5809 def SUST_P_1D_ARRAY_B8_TRAP
5810   : NVPTXInst<(outs),
5811               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5812               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5813               []>;
5814 def SUST_P_1D_ARRAY_B16_TRAP
5815   : NVPTXInst<(outs),
5816               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5817               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5818               []>;
5819 def SUST_P_1D_ARRAY_B32_TRAP
5820   : NVPTXInst<(outs),
5821               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5822               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5823               []>;
5824 def SUST_P_1D_ARRAY_V2B8_TRAP
5825   : NVPTXInst<(outs),
5826               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5827                    Int16Regs:$g),
5828               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5829               []>;
5830 def SUST_P_1D_ARRAY_V2B16_TRAP
5831   : NVPTXInst<(outs),
5832               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5833                    Int16Regs:$g),
5834               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5835               []>;
5836 def SUST_P_1D_ARRAY_V2B32_TRAP
5837   : NVPTXInst<(outs),
5838               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5839                    Int32Regs:$g),
5840               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5841               []>;
5842 def SUST_P_1D_ARRAY_V4B8_TRAP
5843   : NVPTXInst<(outs),
5844               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5845                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5846               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5847               "\\{$r, $g, $b, $a\\};",
5848               []>;
5849 def SUST_P_1D_ARRAY_V4B16_TRAP
5850   : NVPTXInst<(outs),
5851               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5852                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5853              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5854              "\\{$r, $g, $b, $a\\};",
5855               []>;
5856 def SUST_P_1D_ARRAY_V4B32_TRAP
5857   : NVPTXInst<(outs),
5858               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5859                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5860              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5861              "\\{$r, $g, $b, $a\\};",
5862               []>;
5865 def SUST_P_2D_B8_TRAP
5866   : NVPTXInst<(outs),
5867               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5868               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5869               []>;
5870 def SUST_P_2D_B16_TRAP
5871   : NVPTXInst<(outs),
5872               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5874               []>;
5875 def SUST_P_2D_B32_TRAP
5876   : NVPTXInst<(outs),
5877               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5878               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5879               []>;
5880 def SUST_P_2D_V2B8_TRAP
5881   : NVPTXInst<(outs),
5882               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5883                    Int16Regs:$g),
5884               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5885               []>;
5886 def SUST_P_2D_V2B16_TRAP
5887   : NVPTXInst<(outs),
5888               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5889                    Int16Regs:$g),
5890               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5891               []>;
5892 def SUST_P_2D_V2B32_TRAP
5893   : NVPTXInst<(outs),
5894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5895                    Int32Regs:$g),
5896               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5897               []>;
5898 def SUST_P_2D_V4B8_TRAP
5899   : NVPTXInst<(outs),
5900               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5901                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5902               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5903               "\\{$r, $g, $b, $a\\};",
5904               []>;
5905 def SUST_P_2D_V4B16_TRAP
5906   : NVPTXInst<(outs),
5907               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5908                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5909              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5910              "\\{$r, $g, $b, $a\\};",
5911               []>;
5912 def SUST_P_2D_V4B32_TRAP
5913   : NVPTXInst<(outs),
5914               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5915                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5916              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5917              "\\{$r, $g, $b, $a\\};",
5918               []>;
5921 def SUST_P_2D_ARRAY_B8_TRAP
5922   : NVPTXInst<(outs),
5923               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5924                    Int16Regs:$r),
5925               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5926               []>;
5927 def SUST_P_2D_ARRAY_B16_TRAP
5928   : NVPTXInst<(outs),
5929               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5930                    Int16Regs:$r),
5931               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5932               []>;
5933 def SUST_P_2D_ARRAY_B32_TRAP
5934   : NVPTXInst<(outs),
5935               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5936                    Int32Regs:$r),
5937               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5938               []>;
5939 def SUST_P_2D_ARRAY_V2B8_TRAP
5940   : NVPTXInst<(outs),
5941               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5942                    Int16Regs:$r, Int16Regs:$g),
5943               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5944               "\\{$r, $g\\};",
5945               []>;
5946 def SUST_P_2D_ARRAY_V2B16_TRAP
5947   : NVPTXInst<(outs),
5948               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5949                    Int16Regs:$r, Int16Regs:$g),
5950              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5951              "\\{$r, $g\\};",
5952               []>;
5953 def SUST_P_2D_ARRAY_V2B32_TRAP
5954   : NVPTXInst<(outs),
5955               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5956                    Int32Regs:$r, Int32Regs:$g),
5957              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5958              "\\{$r, $g\\};",
5959               []>;
5960 def SUST_P_2D_ARRAY_V4B8_TRAP
5961   : NVPTXInst<(outs),
5962               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5963                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5964       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5965       "\\{$r, $g, $b, $a\\};",
5966               []>;
5967 def SUST_P_2D_ARRAY_V4B16_TRAP
5968   : NVPTXInst<(outs),
5969               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5970                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5971      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5972      "\\{$r, $g, $b, $a\\};",
5973               []>;
5974 def SUST_P_2D_ARRAY_V4B32_TRAP
5975   : NVPTXInst<(outs),
5976               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5977                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5978      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5979      "\\{$r, $g, $b, $a\\};",
5980               []>;
5983 def SUST_P_3D_B8_TRAP
5984   : NVPTXInst<(outs),
5985               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5986                    Int16Regs:$r),
5987               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5988               []>;
5989 def SUST_P_3D_B16_TRAP
5990   : NVPTXInst<(outs),
5991               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5992                    Int16Regs:$r),
5993               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5994               []>;
5995 def SUST_P_3D_B32_TRAP
5996   : NVPTXInst<(outs),
5997               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5998                    Int32Regs:$r),
5999               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6000               []>;
6001 def SUST_P_3D_V2B8_TRAP
6002   : NVPTXInst<(outs),
6003               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6004                    Int16Regs:$r, Int16Regs:$g),
6005               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6006               "\\{$r, $g\\};",
6007               []>;
6008 def SUST_P_3D_V2B16_TRAP
6009   : NVPTXInst<(outs),
6010               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6011                    Int16Regs:$r, Int16Regs:$g),
6012               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6013               "\\{$r, $g\\};",
6014               []>;
6015 def SUST_P_3D_V2B32_TRAP
6016   : NVPTXInst<(outs),
6017               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6018                    Int32Regs:$r, Int32Regs:$g),
6019               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6020               "\\{$r, $g\\};",
6021               []>;
6022 def SUST_P_3D_V4B8_TRAP
6023   : NVPTXInst<(outs),
6024               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6025                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6026          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6027          "\\{$r, $g, $b, $a\\};",
6028               []>;
6029 def SUST_P_3D_V4B16_TRAP
6030   : NVPTXInst<(outs),
6031               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6032                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6033         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6034         "\\{$r, $g, $b, $a\\};",
6035               []>;
6036 def SUST_P_3D_V4B32_TRAP
6037   : NVPTXInst<(outs),
6038               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6039                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6040         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6041         "\\{$r, $g, $b, $a\\};",
6042               []>;
6045 // Surface store instruction patterns
6046 // I'm not sure why we can't just include these in the instruction definitions,
6047 // but TableGen complains of type errors :(
6049 // .clamp variant
6050 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6051            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6052           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6054 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6055            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6056           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6058 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6059            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6060           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6062 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6063            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6064           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6066 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6067            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6068           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6069            Int16Regs:$r, Int16Regs:$g)>;
6071 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6072            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6073           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6074            Int16Regs:$r, Int16Regs:$g)>;
6076 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6077            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6078           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6079            Int32Regs:$r, Int32Regs:$g)>;
6081 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6082            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6083           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6084            Int64Regs:$r, Int64Regs:$g)>;
6086 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6087            Int64Regs:$s, Int32Regs:$x,
6088            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6089           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6090            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6092 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6093            Int64Regs:$s, Int32Regs:$x,
6094            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6095           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6096            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6098 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6099            Int64Regs:$s, Int32Regs:$x,
6100            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6101           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6102            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6106 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6107            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6108           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109            Int16Regs:$r)>;
6111 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6112            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6113           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6114            Int16Regs:$r)>;
6116 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6117            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6118           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6119            Int32Regs:$r)>;
6121 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6122            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6123           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6124            Int64Regs:$r)>;
6126 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6127           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6128           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129            Int16Regs:$r, Int16Regs:$g)>;
6131 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6132           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6133           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6134            Int16Regs:$r, Int16Regs:$g)>;
6136 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6137           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6138           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139            Int32Regs:$r, Int32Regs:$g)>;
6141 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6142           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6143           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6144            Int64Regs:$r, Int64Regs:$g)>;
6146 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6147            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6148            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6149           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6150            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6152 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6153            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6154            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6155           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6156            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6158 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6159            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6160            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6161           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6162            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6166 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6167            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6168           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169            Int16Regs:$r)>;
6171 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6172            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6173           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6174            Int16Regs:$r)>;
6176 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6177            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6178           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6179            Int32Regs:$r)>;
6181 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6182            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6183           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6184            Int64Regs:$r)>;
6186 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6187           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6188           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189            Int16Regs:$r, Int16Regs:$g)>;
6191 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6192           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6193           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6194            Int16Regs:$r, Int16Regs:$g)>;
6196 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6197           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6198           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199            Int32Regs:$r, Int32Regs:$g)>;
6201 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6202           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6203           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6204            Int64Regs:$r, Int64Regs:$g)>;
6206 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6207            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6208            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6209           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6210            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6212 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6213            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6214            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6215           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6216            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6218 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6219            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6220            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6221           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6222            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6226 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6227           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6228           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6229            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6230            Int16Regs:$r)>;
6232 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6233           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6234           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6235            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6236            Int16Regs:$r)>;
6238 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6239           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6240           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6241            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6242            Int32Regs:$r)>;
6244 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6245           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6246           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6247            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6248            Int64Regs:$r)>;
6250 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6251            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6252            Int16Regs:$r, Int16Regs:$g),
6253           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6254            Int32Regs:$x, Int32Regs:$y,
6255            Int16Regs:$r, Int16Regs:$g)>;
6257 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6258            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6259            Int16Regs:$r, Int16Regs:$g),
6260           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6261            Int32Regs:$x, Int32Regs:$y,
6262            Int16Regs:$r, Int16Regs:$g)>;
6264 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6265            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6266            Int32Regs:$g),
6267           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6268            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6270 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6271            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6272            Int64Regs:$g),
6273           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6274            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6276 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6277            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6278            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6279           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6280            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6281            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6283 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6284            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6285            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6286           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6287            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6288            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6290 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6291            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6292            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6293           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6294            Int32Regs:$x, Int32Regs:$y,
6295            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6299 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6300            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6301            Int16Regs:$r),
6302           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6303            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304            Int16Regs:$r)>;
6306 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6307            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6308            Int16Regs:$r),
6309           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6310            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311            Int16Regs:$r)>;
6313 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6314            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6315            Int32Regs:$r),
6316           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6317            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318            Int32Regs:$r)>;
6320 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6321            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6322            Int64Regs:$r),
6323           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6324            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6325            Int64Regs:$r)>;
6327 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6328            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6329            Int16Regs:$r, Int16Regs:$g),
6330           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6331            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6332            Int16Regs:$r, Int16Regs:$g)>;
6334 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6335            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6336            Int16Regs:$r, Int16Regs:$g),
6337           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6338            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6339            Int16Regs:$r, Int16Regs:$g)>;
6341 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6342            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6343            Int32Regs:$r, Int32Regs:$g),
6344           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6345            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6346            Int32Regs:$r, Int32Regs:$g)>;
6348 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6349            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6350            Int64Regs:$r, Int64Regs:$g),
6351           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6352            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6353            Int64Regs:$r, Int64Regs:$g)>;
6355 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6356            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6357            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6358           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6359            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6360            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6362 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6363            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6364            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6365           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6366            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6367            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6369 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6370            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6371            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6372           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6373            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6374            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6377 // .trap variant
6378 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6379            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6380           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6382 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6383            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6384           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6386 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6387            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6388           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6390 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6391            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6392           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6394 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6395            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6396           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6397            Int16Regs:$r, Int16Regs:$g)>;
6399 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6400            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6401           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6402            Int16Regs:$r, Int16Regs:$g)>;
6404 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6405            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6406           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6407            Int32Regs:$r, Int32Regs:$g)>;
6409 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6410            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6411           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6412            Int64Regs:$r, Int64Regs:$g)>;
6414 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6415            Int64Regs:$s, Int32Regs:$x,
6416            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6417           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6418            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6420 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6421            Int64Regs:$s, Int32Regs:$x,
6422            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6423           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6424            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6426 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6427            Int64Regs:$s, Int32Regs:$x,
6428            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6429           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6430            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6434 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6435            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6436           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437            Int16Regs:$r)>;
6439 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6440            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6441           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6442            Int16Regs:$r)>;
6444 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6445            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6446           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6447            Int32Regs:$r)>;
6449 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6450            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6451           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6452            Int64Regs:$r)>;
6454 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6455           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6456           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457            Int16Regs:$r, Int16Regs:$g)>;
6459 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6460           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6461           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6462            Int16Regs:$r, Int16Regs:$g)>;
6464 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6465           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6466           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6467            Int32Regs:$r, Int32Regs:$g)>;
6469 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6470           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6471           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6472            Int64Regs:$r, Int64Regs:$g)>;
6474 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6475            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6476            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6477           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6478            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6480 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6481            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6482            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6483           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6484            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6486 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6487            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6488            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6489           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6490            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6494 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6495            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6496           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497            Int16Regs:$r)>;
6499 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6500            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6501           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6502            Int16Regs:$r)>;
6504 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6505            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6506           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6507            Int32Regs:$r)>;
6509 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6510            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6511           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6512            Int64Regs:$r)>;
6514 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6515           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6516           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517            Int16Regs:$r, Int16Regs:$g)>;
6519 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6520           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6521           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6522            Int16Regs:$r, Int16Regs:$g)>;
6524 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6525           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6526           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6527            Int32Regs:$r, Int32Regs:$g)>;
6529 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6530           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6531           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6532            Int64Regs:$r, Int64Regs:$g)>;
6534 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6535            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6536            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6537           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6538            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6540 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6541            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6542            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6543           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6544            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6546 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6547            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6548            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6549           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6550            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6554 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6555           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6556           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6557            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6558            Int16Regs:$r)>;
6560 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6561           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6562           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6563            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6564            Int16Regs:$r)>;
6566 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6567           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6568           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6569            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6570            Int32Regs:$r)>;
6572 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6573           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6574           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6575            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6576            Int64Regs:$r)>;
6578 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6579            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6580            Int16Regs:$r, Int16Regs:$g),
6581           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6582            Int32Regs:$x, Int32Regs:$y,
6583            Int16Regs:$r, Int16Regs:$g)>;
6585 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6586            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6587            Int16Regs:$r, Int16Regs:$g),
6588           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6589            Int32Regs:$x, Int32Regs:$y,
6590            Int16Regs:$r, Int16Regs:$g)>;
6592 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6593            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6594            Int32Regs:$g),
6595           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6596            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6598 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6599            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6600            Int64Regs:$g),
6601           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6602            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6604 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6605            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6606            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6607           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6608            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6609            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6611 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6612            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6613            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6614           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6615            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6616            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6618 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6619            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6620            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6621           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6622            Int32Regs:$x, Int32Regs:$y,
6623            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6627 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6628            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6629            Int16Regs:$r),
6630           (SUST_B_3D_B8_TRAP Int64Regs:$s,
6631            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632            Int16Regs:$r)>;
6634 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6635            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6636            Int16Regs:$r),
6637           (SUST_B_3D_B16_TRAP Int64Regs:$s,
6638            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639            Int16Regs:$r)>;
6641 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6642            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6643            Int32Regs:$r),
6644           (SUST_B_3D_B32_TRAP Int64Regs:$s,
6645            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646            Int32Regs:$r)>;
6648 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6649            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6650            Int64Regs:$r),
6651           (SUST_B_3D_B64_TRAP Int64Regs:$s,
6652            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6653            Int64Regs:$r)>;
6655 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6656            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6657            Int16Regs:$r, Int16Regs:$g),
6658           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6659            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6660            Int16Regs:$r, Int16Regs:$g)>;
6662 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6663            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6664            Int16Regs:$r, Int16Regs:$g),
6665           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6666            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6667            Int16Regs:$r, Int16Regs:$g)>;
6669 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6670            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6671            Int32Regs:$r, Int32Regs:$g),
6672           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6673            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6674            Int32Regs:$r, Int32Regs:$g)>;
6676 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6677            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6678            Int64Regs:$r, Int64Regs:$g),
6679           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6680            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6681            Int64Regs:$r, Int64Regs:$g)>;
6683 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6684            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6685            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6686           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6687            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6688            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6690 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6691            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6692            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6693           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6694            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6695            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6697 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6698            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6699            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6700           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6701            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6702            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6705 // .zero variant
6706 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6707            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6708           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6710 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6711            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6712           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6714 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6715            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6716           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6718 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6719            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6720           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6722 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6723            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6724           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6725            Int16Regs:$r, Int16Regs:$g)>;
6727 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6728            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6729           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6730            Int16Regs:$r, Int16Regs:$g)>;
6732 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6733            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6734           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6735            Int32Regs:$r, Int32Regs:$g)>;
6737 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6738            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6739           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6740            Int64Regs:$r, Int64Regs:$g)>;
6742 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6743            Int64Regs:$s, Int32Regs:$x,
6744            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6745           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6746            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6748 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6749            Int64Regs:$s, Int32Regs:$x,
6750            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6751           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6752            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6754 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6755            Int64Regs:$s, Int32Regs:$x,
6756            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6757           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6758            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6762 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6763            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6764           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6765            Int16Regs:$r)>;
6767 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6768            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6769           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6770            Int16Regs:$r)>;
6772 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6773            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6774           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6775            Int32Regs:$r)>;
6777 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6778            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6779           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6780            Int64Regs:$r)>;
6782 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6783           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6784           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6785            Int16Regs:$r, Int16Regs:$g)>;
6787 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6788           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6789           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6790            Int16Regs:$r, Int16Regs:$g)>;
6792 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6793           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6794           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6795            Int32Regs:$r, Int32Regs:$g)>;
6797 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6798           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6799           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6800            Int64Regs:$r, Int64Regs:$g)>;
6802 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6803            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6804            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6805           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6806            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6808 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6809            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6810            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6811           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6812            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6814 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6815            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6816            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6817           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6818            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6822 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6823            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6824           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825            Int16Regs:$r)>;
6827 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6828            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6829           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6830            Int16Regs:$r)>;
6832 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6833            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6834           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6835            Int32Regs:$r)>;
6837 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6838            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6839           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6840            Int64Regs:$r)>;
6842 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6843           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6844           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6845            Int16Regs:$r, Int16Regs:$g)>;
6847 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6848           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6849           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6850            Int16Regs:$r, Int16Regs:$g)>;
6852 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6853           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6854           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6855            Int32Regs:$r, Int32Regs:$g)>;
6857 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6858           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6859           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6860            Int64Regs:$r, Int64Regs:$g)>;
6862 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6863            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6864            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6865           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6866            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6868 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6869            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6870            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6871           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6872            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6874 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6875            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6876            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6877           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6878            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6882 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6883           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6884           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6885            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6886            Int16Regs:$r)>;
6888 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6889           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6890           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6891            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6892            Int16Regs:$r)>;
6894 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6895           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6896           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6897            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6898            Int32Regs:$r)>;
6900 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6901           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6902           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6903            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6904            Int64Regs:$r)>;
6906 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6907            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6908            Int16Regs:$r, Int16Regs:$g),
6909           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6910            Int32Regs:$x, Int32Regs:$y,
6911            Int16Regs:$r, Int16Regs:$g)>;
6913 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6914            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6915            Int16Regs:$r, Int16Regs:$g),
6916           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6917            Int32Regs:$x, Int32Regs:$y,
6918            Int16Regs:$r, Int16Regs:$g)>;
6920 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6921            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6922            Int32Regs:$g),
6923           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6924            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6926 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6927            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6928            Int64Regs:$g),
6929           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6930            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6932 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6933            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6934            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6935           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6936            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6937            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6939 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6940            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6941            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6942           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6943            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6944            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6946 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6947            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6948            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6949           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6950            Int32Regs:$x, Int32Regs:$y,
6951            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6955 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6956            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6957            Int16Regs:$r),
6958           (SUST_B_3D_B8_ZERO Int64Regs:$s,
6959            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6960            Int16Regs:$r)>;
6962 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6963            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6964            Int16Regs:$r),
6965           (SUST_B_3D_B16_ZERO Int64Regs:$s,
6966            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6967            Int16Regs:$r)>;
6969 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6970            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6971            Int32Regs:$r),
6972           (SUST_B_3D_B32_ZERO Int64Regs:$s,
6973            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6974            Int32Regs:$r)>;
6976 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6977            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6978            Int64Regs:$r),
6979           (SUST_B_3D_B64_ZERO Int64Regs:$s,
6980            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6981            Int64Regs:$r)>;
6983 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6984            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6985            Int16Regs:$r, Int16Regs:$g),
6986           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6987            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6988            Int16Regs:$r, Int16Regs:$g)>;
6990 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6991            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6992            Int16Regs:$r, Int16Regs:$g),
6993           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6994            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6995            Int16Regs:$r, Int16Regs:$g)>;
6997 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6998            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6999            Int32Regs:$r, Int32Regs:$g),
7000           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
7001            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7002            Int32Regs:$r, Int32Regs:$g)>;
7004 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
7005            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7006            Int64Regs:$r, Int64Regs:$g),
7007           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
7008            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7009            Int64Regs:$r, Int64Regs:$g)>;
7011 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
7012            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7013            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7014           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7015            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7016            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7018 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7019            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7020            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7021           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7022            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7023            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7025 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7026            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7027            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7028           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7029            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7030            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7035 def : Pat<(int_nvvm_sust_p_1d_i8_trap
7036            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7037           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7039 def : Pat<(int_nvvm_sust_p_1d_i16_trap
7040            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7041           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7043 def : Pat<(int_nvvm_sust_p_1d_i32_trap
7044            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7045           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7047 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7048            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7049           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7050            Int16Regs:$r, Int16Regs:$g)>;
7052 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7053            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7054           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7055            Int16Regs:$r, Int16Regs:$g)>;
7057 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7058            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7059           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7060            Int32Regs:$r, Int32Regs:$g)>;
7062 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7063            Int64Regs:$s, Int32Regs:$x,
7064            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7065           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7066            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7068 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7069            Int64Regs:$s, Int32Regs:$x,
7070            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7071           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7072            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7074 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7075            Int64Regs:$s, Int32Regs:$x,
7076            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7077           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7078            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7082 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7083            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7084           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7085            Int16Regs:$r)>;
7087 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7088            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7089           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7090            Int16Regs:$r)>;
7092 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7093            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7094           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7095            Int32Regs:$r)>;
7097 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7098           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7099           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7100            Int16Regs:$r, Int16Regs:$g)>;
7102 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7103           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7104           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7105            Int16Regs:$r, Int16Regs:$g)>;
7107 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7108           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7109           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7110            Int32Regs:$r, Int32Regs:$g)>;
7112 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7113            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7114            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7115           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7116            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7118 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7119            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7120            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7121           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7122            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7124 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7125            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7126            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7127           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7128            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7132 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7133            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7134           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7135            Int16Regs:$r)>;
7137 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7138            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7139           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7140            Int16Regs:$r)>;
7142 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7143            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7144           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7145            Int32Regs:$r)>;
7147 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7148           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7149           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7150            Int16Regs:$r, Int16Regs:$g)>;
7152 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7153           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7154           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7155            Int16Regs:$r, Int16Regs:$g)>;
7157 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7158           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7159           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7160            Int32Regs:$r, Int32Regs:$g)>;
7162 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7163            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7164            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7165           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7166            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7168 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7169            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7170            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7171           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7172            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7174 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7175            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7176            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7177           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7178            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7182 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7183           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7184           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7185            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7186            Int16Regs:$r)>;
7188 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7189           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7190           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7191            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7192            Int16Regs:$r)>;
7194 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7195           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7196           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7197            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7198            Int32Regs:$r)>;
7200 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7201            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7202            Int16Regs:$r, Int16Regs:$g),
7203           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7204            Int32Regs:$x, Int32Regs:$y,
7205            Int16Regs:$r, Int16Regs:$g)>;
7207 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7208            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7209            Int16Regs:$r, Int16Regs:$g),
7210           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7211            Int32Regs:$x, Int32Regs:$y,
7212            Int16Regs:$r, Int16Regs:$g)>;
7214 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7215            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7216            Int32Regs:$g),
7217           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7218            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7220 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7221            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7222            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7223           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7224            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7225            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7227 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7228            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7229            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7230           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7231            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7232            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7234 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7235            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7236            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7237           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7238            Int32Regs:$x, Int32Regs:$y,
7239            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7243 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7244            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7245            Int16Regs:$r),
7246           (SUST_P_3D_B8_TRAP Int64Regs:$s,
7247            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7248            Int16Regs:$r)>;
7250 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7251            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7252            Int16Regs:$r),
7253           (SUST_P_3D_B16_TRAP Int64Regs:$s,
7254            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7255            Int16Regs:$r)>;
7257 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7258            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7259            Int32Regs:$r),
7260           (SUST_P_3D_B32_TRAP Int64Regs:$s,
7261            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7262            Int32Regs:$r)>;
7264 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7265            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7266            Int16Regs:$r, Int16Regs:$g),
7267           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7268            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7269            Int16Regs:$r, Int16Regs:$g)>;
7271 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7272            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7273            Int16Regs:$r, Int16Regs:$g),
7274           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7275            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7276            Int16Regs:$r, Int16Regs:$g)>;
7278 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7279            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7280            Int32Regs:$r, Int32Regs:$g),
7281           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7282            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7283            Int32Regs:$r, Int32Regs:$g)>;
7285 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7286            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7287            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7288           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7289            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7290            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7292 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7293            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7294            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7295           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7296            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7297            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7299 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7300            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7301            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7302           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7303            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7304            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7306 //-----------------------------------
7307 // Read Special Registers
7308 //-----------------------------------
7310 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7311   : NVPTXInst<(outs Int64Regs:$d), (ins),
7312               !strconcat("mov.u64 \t$d, %", regname, ";"),
7313               [(set Int64Regs:$d, (intop))]>;
7315 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7316   : NVPTXInst<(outs Int32Regs:$d), (ins),
7317               !strconcat("mov.u32 \t$d, %", regname, ";"),
7318               [(set Int32Regs:$d, (intop))]>;
7320 // TODO Add read vector-version of special registers
7322 def INT_PTX_SREG_TID_X :
7323     PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7324 def INT_PTX_SREG_TID_Y :
7325     PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7326 def INT_PTX_SREG_TID_Z :
7327     PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7328 def INT_PTX_SREG_TID_W :
7329     PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7331 def INT_PTX_SREG_NTID_X :
7332     PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7333 def INT_PTX_SREG_NTID_Y :
7334     PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7335 def INT_PTX_SREG_NTID_Z :
7336     PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7337 def INT_PTX_SREG_NTID_W :
7338     PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7340 def INT_PTX_SREG_LANEID :
7341     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7342 def INT_PTX_SREG_WARPID :
7343     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7344 def INT_PTX_SREG_NWARPID :
7345     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7347 def INT_PTX_SREG_CTAID_X :
7348     PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7349 def INT_PTX_SREG_CTAID_Y :
7350     PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7351 def INT_PTX_SREG_CTAID_Z :
7352     PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7353 def INT_PTX_SREG_CTAID_W :
7354     PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7356 def INT_PTX_SREG_NCTAID_X :
7357     PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7358 def INT_PTX_SREG_NCTAID_Y :
7359     PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7360 def INT_PTX_SREG_NCTAID_Z :
7361     PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7362 def INT_PTX_SREG_NCTAID_W :
7363     PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7365 def INT_PTX_SREG_SMID :
7366     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7367 def INT_PTX_SREG_NSMID :
7368     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7369 def INT_PTX_SREG_GRIDID :
7370     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7372 def INT_PTX_SREG_LANEMASK_EQ :
7373     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7374 def INT_PTX_SREG_LANEMASK_LE :
7375     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7376 def INT_PTX_SREG_LANEMASK_LT :
7377     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7378 def INT_PTX_SREG_LANEMASK_GE :
7379     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7380 def INT_PTX_SREG_LANEMASK_GT :
7381     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7383 def INT_PTX_SREG_CLOCK :
7384     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7385 def INT_PTX_SREG_CLOCK64 :
7386     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7388 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7389 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7390 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7391 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7393 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7394 // handle the constant.
7395 def INT_PTX_SREG_WARPSIZE :
7396     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7397               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7399 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7400 // In addition to target-independent fields provided by WMMA_REGS, it adds
7401 // the fields commonly used to implement specific PTX instruction -- register
7402 // types and names, constraints, parts of assembly, etc.
7403 class WMMA_REGINFO<WMMA_REGS r>
7404       : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7405   // NVPTX register types used to carry fragment data.
7406   NVPTXRegClass regclass = !cond(
7407     !eq(ptx_elt_type, "f16") : Float16x2Regs,
7408     !eq(ptx_elt_type, "f32") : Float32Regs,
7409     !eq(ptx_elt_type, "s32") : Int32Regs,
7410     !eq(ptx_elt_type, "s8") : Int32Regs,
7411     !eq(ptx_elt_type, "u8") : Int32Regs,
7412     !eq(ptx_elt_type, "s4") : Int32Regs,
7413     !eq(ptx_elt_type, "u4") : Int32Regs,
7414     !eq(ptx_elt_type, "b1") : Int32Regs);
7416   // Instruction input/output arguments for the fragment.
7417   list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
7419   // List of register names for the fragment -- ["ra0", "ra1",...]
7420   list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7422   // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7423   string regstring = "{{$" # !head(reg_names)
7424                            # !foldl("", !tail(reg_names), a, b,
7425                                     !strconcat(a, ", $", b))
7426                      # "}}";
7428   // Predicates for particular fragment variant. Technically those are
7429   // per-instruction predicates, but currently all fragments that can be used in
7430   // a given instruction are subject to the same constraints, so an instruction
7431   // can use predicates from any of its fragments. If/when this is no
7432   // longer the case, we can concat all per-fragment predicates to enforce that
7433   // all fragments of the instruction are viable.
7434   list<Predicate> Predicates = !cond(
7435     // fp16 -> fp16/fp32 @ m16n16k16
7436     !and(!eq(geom, "m16n16k16"),
7437          !or(!eq(ptx_elt_type, "f16"),
7438              !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7440     // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7441     !and(!or(!eq(geom, "m8n32k16"),
7442              !eq(geom, "m32n8k16")),
7443          !or(!eq(ptx_elt_type, "f16"),
7444              !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7446     // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7447     !and(!or(!eq(geom,"m16n16k16"),
7448              !eq(geom,"m8n32k16"),
7449              !eq(geom,"m32n8k16")),
7450          !or(!eq(ptx_elt_type, "u8"),
7451              !eq(ptx_elt_type, "s8"),
7452              !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7454     // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
7455     !or(!eq(geom,"m8n8k128"),
7456         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]);
7458   // template DAGs for instruction inputs/output.
7459   dag Outs = !dag(outs, ptx_regs, reg_names);
7460   dag Ins = !dag(ins, ptx_regs, reg_names);
7463 // Convert dag of arguments into a dag to match given intrinsic.
7464 class BuildPatternI<Intrinsic Intr, dag Ins> {
7465   // Build a dag pattern that matches the intrinsic call.
7466   dag ret = !foreach(tmp, Ins,
7467                           !subst(imem, ADDRvar,
7468                           !subst(MEMri64, ADDRri64,
7469                           !subst(MEMri, ADDRri,
7470                           !subst(ins, Intr, tmp)))));
7473 // Same as above, but uses PatFrag instead of an Intrinsic.
7474 class BuildPatternPF<PatFrag Intr, dag Ins> {
7475   // Build a dag pattern that matches the intrinsic call.
7476   dag ret = !foreach(tmp, Ins,
7477                           !subst(imem, ADDRvar,
7478                           !subst(MEMri64, ADDRri64,
7479                           !subst(MEMri, ADDRri,
7480                           !subst(ins, Intr, tmp)))));
7483 // Common WMMA-related fields used for building patterns for all MMA instructions.
7484 class WMMA_INSTR<string _Intr, list<dag> _Args>
7485   : NVPTXInst<(outs), (ins), "?", []> {
7486   Intrinsic Intr = !cast<Intrinsic>(_Intr);
7487   // Concatenate all arguments into a single dag.
7488   dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7489   // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7490   dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7494 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7497 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7498                 DAGOperand SrcOp>
7499   : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7500                               [!con((ins SrcOp:$src),
7501                                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7502     Requires<Frag.Predicates> {
7503   // Load/store intrinsics are overloaded on pointer's address space.
7504   // To match the right intrinsic, we need to build AS-constrained PatFrag.
7505   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7506   dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7507   // Build PatFrag that only matches particular address space.
7508   PatFrag IntrFrag = PatFrag<PFOperands,
7509                              !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7510                              !cond(!eq(Space, ".shared"): AS_match.shared,
7511                                    !eq(Space, ".global"): AS_match.global,
7512                                    1: AS_match.generic)>;
7513   // Build AS-constrained pattern.
7514   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7516   let OutOperandList = Frag.Outs;
7517   let InOperandList = !con(Args, (ins MmaCode:$ptx));
7518   let AsmString = "wmma.load."
7519                   # Frag.frag
7520                   # ".sync"
7521                   # "${ptx:aligned}"
7522                   # "." # Layout
7523                   # "." # Frag.geom
7524                   # Space
7525                   # "." # Frag.ptx_elt_type # " \t"
7526                   # Frag.regstring
7527                   # ", [$src]"
7528                   # !if(WithStride, ", $ldm", "")
7529                   # ";";
7533 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7535 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7536                    bit WithStride, DAGOperand DstOp>
7537   : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7538                [!con((ins DstOp:$dst),
7539                      Frag.Ins,
7540                      !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7541     Requires<Frag.Predicates> {
7543   // Load/store intrinsics are overloaded on pointer's address space.
7544   // To match the right intrinsic, we need to build AS-constrained PatFrag.
7545   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7546   dag PFOperands = !con((ops node:$dst),
7547                         !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
7548                         !if(WithStride, (ops node:$ldm), (ops)));
7549   // Build PatFrag that only matches particular address space.
7550   PatFrag IntrFrag = PatFrag<PFOperands,
7551                              !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7552                              !cond(!eq(Space, ".shared"): AS_match.shared,
7553                                    !eq(Space, ".global"): AS_match.global,
7554                                    1: AS_match.generic)>;
7555   // Build AS-constrained pattern.
7556   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7558   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7559   let OutOperandList = (outs);
7560   let AsmString = "wmma.store.d.sync"
7561                   # "${ptx:aligned}"
7562                   # "." # Layout
7563                   # "." # Frag.geom
7564                   # Space
7565                   # "." # Frag.ptx_elt_type
7566                   # " \t[$dst],"
7567                   # Frag.regstring
7568                   # !if(WithStride, ", $ldm", "")
7569                   # ";";
7572 // Create all load/store variants
7573 defset list<WMMA_INSTR> MMA_LDSTs  = {
7574   foreach layout = ["row", "col"] in {
7575     foreach stride = [0, 1] in {
7576       foreach space = [".global", ".shared", ""] in {
7577         foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7578           foreach frag = NVVM_MMA_OPS.all_ld_ops in
7579             foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7580               def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7581           foreach frag = NVVM_MMA_OPS.all_st_ops in
7582             foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7583               def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7584         } // addr
7585       } // space
7586     } // stride
7587   } // layout
7588 } // defset
7590 // WMMA.MMA
7591 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7592                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7593                string ALayout, string BLayout, int Satfinite>
7594   : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
7595                              [FragA.Ins, FragB.Ins, FragC.Ins]>,
7596     // Requires does not seem to have effect on Instruction w/o Patterns.
7597     // We set it here anyways and propagate to the Pat<> we construct below.
7598     Requires<FragA.Predicates> {
7599   let OutOperandList = FragD.Outs;
7600   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7601   string TypeList = !cond(
7602     !eq(FragD.ptx_elt_type, "s32") : ".s32"
7603                                      # "." # FragA.ptx_elt_type
7604                                      # "." # FragB.ptx_elt_type
7605                                      # ".s32",
7606     1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
7607   );
7608   let AsmString = "wmma.mma"
7609                   # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
7610                   # ".sync"
7611                   # "${ptx:aligned}"
7612                   # "." # ALayout
7613                   # "." # BLayout
7614                   # "." # FragA.geom
7615                   # TypeList
7616                   # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7617                   # FragD.regstring # ",\n\t\t"
7618                   # FragA.regstring # ",\n\t\t"
7619                   # FragB.regstring # ",\n\t\t"
7620                   # FragC.regstring # ";";
7623 defset list<WMMA_INSTR> MMAs  = {
7624   foreach layout_a = ["row", "col"] in {
7625     foreach layout_b = ["row", "col"] in {
7626       foreach satf = [0, 1] in {
7627         foreach op = NVVM_MMA_OPS.all_mma_ops in {
7628           foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
7629             def : WMMA_MMA<WMMA_REGINFO<op[0]>,
7630                            WMMA_REGINFO<op[1]>,
7631                            WMMA_REGINFO<op[2]>,
7632                            WMMA_REGINFO<op[3]>,
7633                            layout_a, layout_b, satf>;
7634           }
7635         } // op
7636       } // satf
7637     } // layout_b
7638   } // layout_a
7639 } // defset
7642 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7643 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7644 // the instruction record.
7645 class WMMA_PAT<WMMA_INSTR wi>
7646       : Pat<wi.IntrinsicPattern,
7647             !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7648                  (wi ptx.version))>,
7649         Requires<wi.Predicates>;
7651 // Build intrinsic->instruction patterns for all MMA instructions.
7652 foreach mma = !listconcat(MMAs, MMA_LDSTs) in
7653   def : WMMA_PAT<mma>;