[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
blob138f32bd2bd2a4827a900bc6b75b4730588f540f
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10     float f = (float)N->getValueAPF().convertToFloat();
11     return (f==0.0f);
12 }]>;
14 def immFloat1 : PatLeaf<(fpimm), [{
15     float f = (float)N->getValueAPF().convertToFloat();
16     return (f==1.0f);
17 }]>;
19 def immDouble0 : PatLeaf<(fpimm), [{
20     double d = (double)N->getValueAPF().convertToDouble();
21     return (d==0.0);
22 }]>;
24 def immDouble1 : PatLeaf<(fpimm), [{
25     double d = (double)N->getValueAPF().convertToDouble();
26     return (d==1.0);
27 }]>;
29 def AS_match {
30   code generic = [{
31    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32   }];
33   code shared = [{
34    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35   }];
36   code global = [{
37    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38   }];
41 // A node that will be replaced with the current PTX version.
42 class PTX {
43   SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44     return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45   }]>;
46   // (i32 0) will be XForm'ed to the currently used PTX version.
47   dag version = (PTXVerXform (i32 0));
49 def ptx : PTX;
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54   list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                         [prefix # !sub(n, 1)]),
56                             []);
59 class THREADMASK_INFO<bit sync> {
60   list<bit> ret = !if(sync, [0, 1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = true in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                   "bar.sync \t0;",
69       [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                   "bar.sync \t$src1;",
72       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                   "bar.sync \t$src1, $src2;",
75       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77   !strconcat("{{ \n\t",
78              ".reg .pred \t%p1; \n\t",
79              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81              "}}"),
82       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84   !strconcat("{{ \n\t",
85              ".reg .pred \t%p1; \n\t",
86              ".reg .pred \t%p2; \n\t",
87              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90              "}}"),
91       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93   !strconcat("{{ \n\t",
94              ".reg .pred \t%p1; \n\t",
95              ".reg .pred \t%p2; \n\t",
96              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99              "}}"),
100       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                              [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                              [(int_nvvm_bar_warp_sync imm:$i)]>,
107         Requires<[hasPTX60, hasSM30]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                              [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110         Requires<[hasPTX60, hasSM30]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                    [(int_nvvm_barrier_sync imm:$i)]>,
114         Requires<[hasPTX60, hasSM30]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                    [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117         Requires<[hasPTX60, hasSM30]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                  "barrier.sync \t$id, $cnt;",
121                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122         Requires<[hasPTX60, hasSM30]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                  "barrier.sync \t$id, $cnt;",
125                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126         Requires<[hasPTX60, hasSM30]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                  "barrier.sync \t$id, $cnt;",
129                  [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130         Requires<[hasPTX60, hasSM30]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                  "barrier.sync \t$id, $cnt;",
133                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134         Requires<[hasPTX60, hasSM30]>;
136 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                  bit offset_imm, bit mask_imm, bit threadmask_imm>
138       : NVPTXInst<(outs), (ins), "?", []> {
139   NVPTXRegClass rc = !cond(
140     !eq(reg, "i32"): Int32Regs,
141     !eq(reg, "f32"): Float32Regs);
142   string IntrName = "int_nvvm_shfl_"
143                     # !if(sync, "sync_", "")
144                     # mode
145                     # "_" # reg
146                     # !if(return_pred, "p", "");
147   Intrinsic Intr = !cast<Intrinsic>(IntrName);
148   let InOperandList = !con(
149     !if(sync,
150         !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151         (ins)),
152     (ins rc:$src),
153     !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154     !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155     );
156   let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157   let AsmString = "shfl."
158      # !if(sync, "sync.", "")
159      # mode # ".b32\t"
160      # "$dst"
161      # !if(return_pred, "|$pred", "") # ", "
162      # "$src, $offset, $mask"
163      # !if(sync, ", $threadmask", "")
164      # ";"
165      ;
166   let Pattern = [!con(
167       !foreach(tmp, OutOperandList,
168              !subst(outs, set,
169              !subst(i32imm, imm, tmp))),
170       (set !foreach(tmp, InOperandList,
171              !subst(ins, Intr,
172              !subst(i32imm, imm, tmp))))
173   )];
176 foreach sync = [false, true] in {
177   foreach mode = ["up", "down", "bfly", "idx"] in {
178     foreach regclass = ["i32", "f32"] in {
179       foreach return_pred = [false, true] in {
180         foreach offset_imm = [false, true] in {
181           foreach mask_imm = [false, true] in {
182             foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183               def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                                offset_imm, mask_imm, threadmask_imm>,
185                     Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
186             }
187           }
188         }
189       }
190     }
191   }
194 // vote.{all,any,uni,ballot}
195 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196   def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197               "vote." # mode # " \t$dest, $pred;",
198               [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199         Requires<[hasPTX60, hasSM30]>;
202 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
207 // vote.sync.{all,any,uni,ballot}
208 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209   def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210               "vote.sync." # mode # " \t$dest, $pred, $mask;",
211               [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212           Requires<[hasPTX60, hasSM30]>;
213   def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214               "vote.sync." # mode #" \t$dest, $pred, $mask;",
215               [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216           Requires<[hasPTX60, hasSM30]>;
219 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
224 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                           Operand ImmOp> {
226   def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228               [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229            Requires<[hasPTX60, hasSM70]>;
230   def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232               [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233            Requires<[hasPTX60, hasSM70]>;
234   def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236               [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237            Requires<[hasPTX60, hasSM70]>;
238   def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240               [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241            Requires<[hasPTX60, hasSM70]>;
244 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                         i32imm>;
246 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                         i64imm>;
249 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                           Operand ImmOp> {
251   def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252                      (ins i32imm:$mask, ImmOp:$value),
253               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255            Requires<[hasPTX60, hasSM70]>;
256   def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257                      (ins Int32Regs:$mask, ImmOp:$value),
258               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260            Requires<[hasPTX60, hasSM70]>;
261   def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262                      (ins i32imm:$mask, regclass:$value),
263               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265            Requires<[hasPTX60, hasSM70]>;
266   def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267                      (ins Int32Regs:$mask, regclass:$value),
268               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270            Requires<[hasPTX60, hasSM70]>;
272 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                          i32imm>;
274 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                          i64imm>;
277 multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
278   def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
279           "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
280           [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
281         Requires<[hasPTX70, hasSM80]>;
284 defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
285 defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
286 defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
287 defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
288 defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
289 defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
290 defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
291 defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
293 } // isConvergent = true
295 //-----------------------------------
296 // Explicit Memory Fence Functions
297 //-----------------------------------
298 class MEMBAR<string StrOp, Intrinsic IntOP> :
299               NVPTXInst<(outs), (ins),
300             StrOp, [(IntOP)]>;
302 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
303 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
304 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
307 //-----------------------------------
308 // Async Copy Functions
309 //-----------------------------------
311 multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
312   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
313             !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
314             [(Intrin Int32Regs:$addr)]>,
315     Requires<[hasPTX70, hasSM80]>;
316   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
317             !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
318             [(Intrin Int64Regs:$addr)]>,
319     Requires<[hasPTX70, hasSM80]>;
322 defm CP_ASYNC_MBARRIER_ARRIVE :
323   CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
324 defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
325   CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
326 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
327   CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
328 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
329   CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
331 multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
332   def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
333             !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
334             [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
335     Requires<[hasPTX70, hasSM80]>;
336   def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
337             !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
338             [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
339     Requires<[hasPTX70, hasSM80]>;
342 defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
343   CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
345 defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
346   CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
348 defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
349   CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
351 multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
352   def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
353             !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
354             [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
355     Requires<[hasPTX70, hasSM80]>;
356   def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
357             !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
358             [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
359     Requires<[hasPTX70, hasSM80]>;
362 defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
363   CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
365 def CP_ASYNC_COMMIT_GROUP :
366   NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
367   Requires<[hasPTX70, hasSM80]>;
369 def CP_ASYNC_WAIT_GROUP :
370   NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
371   [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
372   Requires<[hasPTX70, hasSM80]>;
374 def CP_ASYNC_WAIT_ALL :
375   NVPTXInst<(outs), (ins), "cp.async.wait_all;",
376   [(int_nvvm_cp_async_wait_all)]>,
377   Requires<[hasPTX70, hasSM80]>;
379 //-----------------------------------
380 // MBarrier Functions
381 //-----------------------------------
383 multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
384   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
385            !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
386     [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
387     Requires<[hasPTX70, hasSM80]>;
388   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
389            !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
390     [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
391     Requires<[hasPTX70, hasSM80]>;
394 defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
395 defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
396                                           int_nvvm_mbarrier_init_shared>;
398 multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
399   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
400            !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
401     [(Intrin Int32Regs:$addr)]>,
402     Requires<[hasPTX70, hasSM80]>;
403   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
404            !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
405     [(Intrin Int64Regs:$addr)]>,
406     Requires<[hasPTX70, hasSM80]>;
409 defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
410 defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
411                                             int_nvvm_mbarrier_inval_shared>;
413 multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
414   def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
415            !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
416     [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
417     Requires<[hasPTX70, hasSM80]>;
418   def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
419            !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
420     [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
421     Requires<[hasPTX70, hasSM80]>;
424 defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
425 defm MBARRIER_ARRIVE_SHARED :
426   MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
428 multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
429   def _32 : NVPTXInst<(outs Int64Regs:$state),
430            (ins Int32Regs:$addr, Int32Regs:$count),
431            !strconcat("mbarrier.arrive.noComplete", AddrSpace,
432                       ".b64 $state, [$addr], $count;"),
433     [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
434     Requires<[hasPTX70, hasSM80]>;
435   def _64 : NVPTXInst<(outs Int64Regs:$state),
436            (ins Int64Regs:$addr, Int32Regs:$count),
437            !strconcat("mbarrier.arrive.noComplete", AddrSpace,
438                       ".b64 $state, [$addr], $count;"),
439     [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
440     Requires<[hasPTX70, hasSM80]>;
443 defm MBARRIER_ARRIVE_NOCOMPLETE :
444   MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
445 defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
446   MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
448 multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
449   def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
450            !strconcat("mbarrier.arrive_drop", AddrSpace,
451                       ".b64 $state, [$addr];"),
452            [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
453     Requires<[hasPTX70, hasSM80]>;
454   def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
455            !strconcat("mbarrier.arrive_drop", AddrSpace,
456                       ".b64 $state, [$addr];"),
457            [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
458     Requires<[hasPTX70, hasSM80]>;
461 defm MBARRIER_ARRIVE_DROP :
462   MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
463 defm MBARRIER_ARRIVE_DROP_SHARED :
464   MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
466 multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
467   def _32 : NVPTXInst<(outs Int64Regs:$state),
468            (ins Int32Regs:$addr, Int32Regs:$count),
469            !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
470                       ".b64 $state, [$addr], $count;"),
471            [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
472     Requires<[hasPTX70, hasSM80]>;
473   def _64 : NVPTXInst<(outs Int64Regs:$state),
474            (ins Int64Regs:$addr, Int32Regs:$count),
475            !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
476                       ".b64 $state, [$addr], $count;"),
477            [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
478     Requires<[hasPTX70, hasSM80]>;
481 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
482   MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
483 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
484   MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
485                        int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
487 multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
488   def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
489            !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
490            [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
491     Requires<[hasPTX70, hasSM80]>;
492   def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
493            !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
494            [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
495     Requires<[hasPTX70, hasSM80]>;
498 defm MBARRIER_TEST_WAIT :
499   MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
500 defm MBARRIER_TEST_WAIT_SHARED :
501   MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
503 class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
504            NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
505            "mbarrier.pending_count.b64 $res, $state;",
506            [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
507     Requires<[hasPTX70, hasSM80]>;
509 def MBARRIER_PENDING_COUNT :
510   MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
512 //-----------------------------------
513 // Math Functions
514 //-----------------------------------
516 // Map min(1.0, max(0.0, x)) to sat(x)
517 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
518 // NaN
519 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
520 // Same story for fmax, fmin.
522 def : Pat<(int_nvvm_fmin_f immFloat1,
523             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
524           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
525 def : Pat<(int_nvvm_fmin_f immFloat1,
526             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
527           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
528 def : Pat<(int_nvvm_fmin_f
529             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
530           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531 def : Pat<(int_nvvm_fmin_f
532             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
533           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
535 def : Pat<(int_nvvm_fmin_d immDouble1,
536             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
537           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538 def : Pat<(int_nvvm_fmin_d immDouble1,
539             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
540           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
541 def : Pat<(int_nvvm_fmin_d
542             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
543           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
544 def : Pat<(int_nvvm_fmin_d
545             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
546           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
549 // We need a full string for OpcStr here because we need to deal with case like
550 // INT_PTX_RECIP.
551 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
552   NVPTXRegClass src_regclass, Intrinsic IntOP>
553             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
554             OpcStr,
555         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
557 // We need a full string for OpcStr here because we need to deal with the case
558 // like INT_PTX_NATIVE_POWR_F.
559 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
560   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
561             : NVPTXInst<(outs t_regclass:$dst),
562               (ins s0_regclass:$src0, s1_regclass:$src1),
563             OpcStr,
564         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
566 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
567   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
568   NVPTXRegClass s2_regclass, Intrinsic IntOP>
569             : NVPTXInst<(outs t_regclass:$dst),
570               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
571             OpcStr,
572         [(set t_regclass:$dst,
573           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
576 // MISC
579 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
580   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
583 // Min Max
586 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
587   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
588 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
589   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
591 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
592   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
593 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
594   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
596 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
597   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
598 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
599   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
603 // Multiplication
606 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
607   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
608 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
609   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
611 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
612   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
613 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
614   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
616 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
617   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
618 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
619   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
620 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
621   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
622 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
623   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
624 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
625   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
626 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
627   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
628 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
629   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
630 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
631   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
633 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
634   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
635 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
636   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
637 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
638   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
639 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
640   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
642 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
643   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
644 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
645   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
648 // Div
651 def INT_NVVM_DIV_APPROX_FTZ_F
652   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
653     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
654 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
655   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
657 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
658   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
659 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
660   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
661 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
662   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
663 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
664   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
665 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
666   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
667 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
668   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
669 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
670   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
671 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
672   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
674 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
675   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
676 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
677   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
678 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
679   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
680 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
681   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
684 // Sad
687 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
688   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
689 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
690   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
693 // Floor  Ceil
696 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
697           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
698 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
699           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
700 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
701           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
703 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
704           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
706           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
707 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
708           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
711 // Abs
714 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
715   Float32Regs, int_nvvm_fabs_ftz_f>;
716 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
717   Float32Regs, int_nvvm_fabs_f>;
719 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
720   Float64Regs, int_nvvm_fabs_d>;
723 // Round
726 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
727           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
728 def : Pat<(int_nvvm_round_f Float32Regs:$a),
729           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
730 def : Pat<(int_nvvm_round_d Float64Regs:$a),
731           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
734 // Trunc
737 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
738           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
739 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
740           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
741 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
742           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
745 // Saturate
748 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
749           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
750 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
751           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
752 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
753           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
756 // Exp2  Log2
759 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
760   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
761 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
762   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
763 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
764   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
766 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
767   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
768 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
769   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
770 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
771   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
774 // Sin  Cos
777 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
778   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
779 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
780   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
782 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
783   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
784 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
785   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
788 // Fma
791 def INT_NVVM_FMA_RN_FTZ_F
792   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
793     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
794 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
795   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
796 def INT_NVVM_FMA_RZ_FTZ_F
797   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
798     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
799 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
800   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
801 def INT_NVVM_FMA_RM_FTZ_F
802   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
803     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
804 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
805   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
806 def INT_NVVM_FMA_RP_FTZ_F
807   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
808     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
809 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
810   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
812 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
813   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
814 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
815   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
816 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
817   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
818 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
819   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
822 // Rcp
825 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
826   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
827 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
828   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
829 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
830   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
831 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
832   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
833 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
834   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
835 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
836   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
837 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
838   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
839 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
840   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
842 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
843   Float64Regs, int_nvvm_rcp_rn_d>;
844 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
845   Float64Regs, int_nvvm_rcp_rz_d>;
846 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
847   Float64Regs, int_nvvm_rcp_rm_d>;
848 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
849   Float64Regs, int_nvvm_rcp_rp_d>;
851 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
852   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
855 // Sqrt
858 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
859   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
860 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
861   Float32Regs, int_nvvm_sqrt_rn_f>;
862 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
863   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
864 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
865   Float32Regs, int_nvvm_sqrt_rz_f>;
866 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
867   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
868 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
869   Float32Regs, int_nvvm_sqrt_rm_f>;
870 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
871   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
872 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
873   Float32Regs, int_nvvm_sqrt_rp_f>;
874 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
875   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
876 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
877   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
879 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
880   Float64Regs, int_nvvm_sqrt_rn_d>;
881 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
882   Float64Regs, int_nvvm_sqrt_rz_d>;
883 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
884   Float64Regs, int_nvvm_sqrt_rm_d>;
885 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
886   Float64Regs, int_nvvm_sqrt_rp_d>;
888 // nvvm_sqrt intrinsic
889 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
890           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
891 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
892           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
893 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
894           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
895 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
896           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
899 // Rsqrt
902 def INT_NVVM_RSQRT_APPROX_FTZ_F
903   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
904     int_nvvm_rsqrt_approx_ftz_f>;
905 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
906   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
907 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
908   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
911 // Add
914 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
915   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
916 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
917   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
918 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
919   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
920 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
921   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
922 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
923   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
924 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
925   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
926 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
927   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
928 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
929   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
931 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
932   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
933 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
934   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
935 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
936   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
937 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
938   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
941 // Convert
944 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
945           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
946 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
947           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
948 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
949           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
950 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
951           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
952 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
953           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
954 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
955           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
956 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
957           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
958 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
959           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
961 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
962           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
963 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
964           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
965 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
966           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
967 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
968           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
970 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
971           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
972 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
973           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
974 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
975           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
976 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
977           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
979 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
980           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
981 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
982           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
983 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
984           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
985 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
986           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
988 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
989           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
990 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
991           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
992 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
993           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
994 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
995           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
997 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
998           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
999 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1000           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1001 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1002           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1003 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1004           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1005 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1006           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1007 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1008           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1009 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1010           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1011 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1012           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1014 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1015           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1016 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1017           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1018 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1019           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1020 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1021           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1022 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1023           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1024 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1025           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1026 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1027           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1028 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1029           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1031 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1032           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1033 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1034           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1035 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1036           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1037 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1038           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1040 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1041           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1042 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1043           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1044 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1045           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1046 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1047           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1049 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1050   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1052 def INT_NVVM_D2I_LO : F_MATH_1<
1053   !strconcat("{{\n\t",
1054              ".reg .b32 %temp; \n\t",
1055              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1056              "}}"),
1057   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1058 def INT_NVVM_D2I_HI : F_MATH_1<
1059   !strconcat("{{\n\t",
1060              ".reg .b32 %temp; \n\t",
1061              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1062              "}}"),
1063   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1065 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1066           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1067 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1068           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1069 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1070           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1071 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1072           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1073 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1074           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1075 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1076           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1077 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1078           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1079 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1080           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1082 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1083           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1084 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1085           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1086 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1087           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1088 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1089           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1090 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1091           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1092 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1093           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1094 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1095           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1096 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1097           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1099 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1100           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1101 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1102           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1103 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1104           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1105 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1106           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1108 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1109           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1110 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1111           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1112 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1113           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1114 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1115           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1117 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1118           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1119 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1120           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1121 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1122           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1123 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1124           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1126 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1127           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1128 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1129           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1130 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1131           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1132 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1133           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1135 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1136           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1137 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1138           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1139 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1140           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1141 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1142           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1144 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1145           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1146 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1147           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1148 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1149           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1150 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1151           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1154 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1155           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
1156 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1157           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
1160 // Bitcast
1163 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1164   Float32Regs, int_nvvm_bitcast_f2i>;
1165 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1166   Int32Regs, int_nvvm_bitcast_i2f>;
1168 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1169   Int64Regs, int_nvvm_bitcast_ll2d>;
1170 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1171   Float64Regs, int_nvvm_bitcast_d2ll>;
1174 // FNS
1177 class INT_FNS_MBO<dag ins, dag Operands>
1178   : NVPTXInst<(outs Int32Regs:$dst), ins,
1179                "fns.b32 \t$dst, $mask, $base, $offset;",
1180                [(set Int32Regs:$dst, Operands )]>,
1181     Requires<[hasPTX60, hasSM30]>;
1183 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1184                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1185 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1186                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1187 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1188                      (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1189 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1190                      (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1191 def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1192                      (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1193 def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1194                      (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1195 def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1196                      (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1197 def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1198                      (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1200 //-----------------------------------
1201 // Atomic Functions
1202 //-----------------------------------
1204 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1205  : PatFrag<ops, frag, AS_match.global>;
1206 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1207  : PatFrag<ops, frag, AS_match.shared>;
1208 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1209  : PatFrag<ops, frag, AS_match.generic>;
1211 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1212   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1213   Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1214   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1215     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1216     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1217   Requires<Pred>;
1218   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1219     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1220     [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1221   Requires<Pred>;
1223 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1224   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1225   list<Predicate> Pred = []> {
1226   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1227     IntOp, IMMType, IMM, Pred>;
1228   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1229     IntOp, IMMType, IMM, Pred>;
1232 // has 2 operands, neg the second one
1233 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1234   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1235   Operand IMMType, list<Predicate> Pred> {
1236   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1237     !strconcat(
1238       "{{ \n\t",
1239       ".reg \t.s", TypeStr, " temp; \n\t",
1240       "neg.s", TypeStr, " \ttemp, $b; \n\t",
1241       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1242       "}}"),
1243     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1244   Requires<Pred>;
1246 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1247   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1248   list<Predicate> Pred = []> {
1249  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1250    IntOp, IMMType, Pred> ;
1251  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1252    IntOp, IMMType, Pred> ;
1255 // has 3 operands
1256 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1257   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1258   Operand IMMType, list<Predicate> Pred> {
1259   def reg : NVPTXInst<(outs regclass:$dst),
1260     (ins ptrclass:$addr, regclass:$b, regclass:$c),
1261     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1262     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1263   Requires<Pred>;
1265   def imm1 : NVPTXInst<(outs regclass:$dst),
1266     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1267     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1268     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1269   Requires<Pred>;
1271   def imm2 : NVPTXInst<(outs regclass:$dst),
1272     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1273     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1274     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1275   Requires<Pred>;
1277   def imm3 : NVPTXInst<(outs regclass:$dst),
1278     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1279     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1280     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1281   Requires<Pred>;
1283 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1284   string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1285   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1286     IntOp, IMMType, Pred>;
1287   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1288     IntOp, IMMType, Pred>;
1291 // atom_add
1293 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1294   (atomic_load_add_32 node:$a, node:$b)>;
1295 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1296   (atomic_load_add_32 node:$a, node:$b)>;
1297 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1298   (atomic_load_add_32 node:$a, node:$b)>;
1299 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1300   (atomic_load_add_64 node:$a, node:$b)>;
1301 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1302   (atomic_load_add_64 node:$a, node:$b)>;
1303 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1304   (atomic_load_add_64 node:$a, node:$b)>;
1305 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1306   (atomic_load_fadd node:$a, node:$b)>;
1307 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1308   (atomic_load_fadd node:$a, node:$b)>;
1309 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1310   (atomic_load_fadd node:$a, node:$b)>;
1312 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1313   atomic_load_add_32_g, i32imm, imm>;
1314 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1315   atomic_load_add_32_s, i32imm, imm>;
1316 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1317   atomic_load_add_32_gen, i32imm, imm>;
1318 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1319   ".add", atomic_load_add_32_gen, i32imm, imm>;
1321 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1322   atomic_load_add_64_g, i64imm, imm>;
1323 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1324   atomic_load_add_64_s, i64imm, imm>;
1325 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1326   atomic_load_add_64_gen, i64imm, imm>;
1327 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1328   ".add", atomic_load_add_64_gen, i64imm, imm>;
1330 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1331   atomic_load_add_g, f32imm, fpimm>;
1332 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1333   atomic_load_add_s, f32imm, fpimm>;
1334 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1335   atomic_load_add_gen, f32imm, fpimm>;
1337 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1338   atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1339 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1340   atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1341 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1342   atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1344 // atom_sub
1346 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1347   (atomic_load_sub_32 node:$a, node:$b)>;
1348 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1349   (atomic_load_sub_32 node:$a, node:$b)>;
1350 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1351   (atomic_load_sub_32 node:$a, node:$b)>;
1352 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1353   (atomic_load_sub_64 node:$a, node:$b)>;
1354 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1355   (atomic_load_sub_64 node:$a, node:$b)>;
1356 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1357   (atomic_load_sub_64 node:$a, node:$b)>;
1359 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1360   atomic_load_sub_32_g, i32imm>;
1361 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1362   atomic_load_sub_64_g, i64imm>;
1363 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1364   atomic_load_sub_32_gen, i32imm>;
1365 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1366   ".add", atomic_load_sub_32_gen, i32imm>;
1367 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1368   atomic_load_sub_32_s, i32imm>;
1369 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1370   atomic_load_sub_64_s, i64imm>;
1371 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1372   atomic_load_sub_64_gen, i64imm>;
1373 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1374   ".add", atomic_load_sub_64_gen, i64imm>;
1376 // atom_swap
1378 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1379   (atomic_swap_32 node:$a, node:$b)>;
1380 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1381   (atomic_swap_32 node:$a, node:$b)>;
1382 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1383   (atomic_swap_32 node:$a, node:$b)>;
1384 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1385   (atomic_swap_64 node:$a, node:$b)>;
1386 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1387   (atomic_swap_64 node:$a, node:$b)>;
1388 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1389   (atomic_swap_64 node:$a, node:$b)>;
1391 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1392   atomic_swap_32_g, i32imm, imm>;
1393 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1394   atomic_swap_32_s, i32imm, imm>;
1395 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1396   atomic_swap_32_gen, i32imm, imm>;
1397 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1398   ".exch", atomic_swap_32_gen, i32imm, imm>;
1399 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1400   atomic_swap_64_g, i64imm, imm>;
1401 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1402   atomic_swap_64_s, i64imm, imm>;
1403 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1404   atomic_swap_64_gen, i64imm, imm>;
1405 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1406   ".exch", atomic_swap_64_gen, i64imm, imm>;
1408 // atom_max
1410 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1411   , (atomic_load_max_32 node:$a, node:$b)>;
1412 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1413   (atomic_load_max_32 node:$a, node:$b)>;
1414 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1415   (atomic_load_max_32 node:$a, node:$b)>;
1416 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1417   , (atomic_load_max_64 node:$a, node:$b)>;
1418 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1419   (atomic_load_max_64 node:$a, node:$b)>;
1420 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1421   (atomic_load_max_64 node:$a, node:$b)>;
1422 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1423   (atomic_load_umax_32 node:$a, node:$b)>;
1424 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1425   (atomic_load_umax_32 node:$a, node:$b)>;
1426 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1427   (atomic_load_umax_32 node:$a, node:$b)>;
1428 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1429   (atomic_load_umax_64 node:$a, node:$b)>;
1430 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1431   (atomic_load_umax_64 node:$a, node:$b)>;
1432 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1433   (atomic_load_umax_64 node:$a, node:$b)>;
1435 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1436   ".max", atomic_load_max_32_g, i32imm, imm>;
1437 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1438   ".max", atomic_load_max_32_s, i32imm, imm>;
1439 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1440   atomic_load_max_32_gen, i32imm, imm>;
1441 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1442   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1443 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1444   ".max", atomic_load_max_64_g, i64imm, imm>;
1445 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1446   ".max", atomic_load_max_64_s, i64imm, imm>;
1447 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1448   atomic_load_max_64_gen, i64imm, imm>;
1449 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1450   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1451 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1452   ".max", atomic_load_umax_32_g, i32imm, imm>;
1453 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1454   ".max", atomic_load_umax_32_s, i32imm, imm>;
1455 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1456   atomic_load_umax_32_gen, i32imm, imm>;
1457 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1458   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1459 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1460   ".max", atomic_load_umax_64_g, i64imm, imm>;
1461 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1462   ".max", atomic_load_umax_64_s, i64imm, imm>;
1463 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1464   atomic_load_umax_64_gen, i64imm, imm>;
1465 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1466   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1468 // atom_min
1470 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1471   (atomic_load_min_32 node:$a, node:$b)>;
1472 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1473   (atomic_load_min_32 node:$a, node:$b)>;
1474 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1475   (atomic_load_min_32 node:$a, node:$b)>;
1476 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1477   (atomic_load_min_64 node:$a, node:$b)>;
1478 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1479   (atomic_load_min_64 node:$a, node:$b)>;
1480 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1481   (atomic_load_min_64 node:$a, node:$b)>;
1482 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1483   (atomic_load_umin_32 node:$a, node:$b)>;
1484 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1485   (atomic_load_umin_32 node:$a, node:$b)>;
1486 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1487   (atomic_load_umin_32 node:$a, node:$b)>;
1488 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1489   (atomic_load_umin_64 node:$a, node:$b)>;
1490 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1491   (atomic_load_umin_64 node:$a, node:$b)>;
1492 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1493   (atomic_load_umin_64 node:$a, node:$b)>;
1495 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1496   ".min", atomic_load_min_32_g, i32imm, imm>;
1497 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1498   ".min", atomic_load_min_32_s, i32imm, imm>;
1499 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1500   atomic_load_min_32_gen, i32imm, imm>;
1501 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1502   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1503 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1504   ".min", atomic_load_min_64_g, i64imm, imm>;
1505 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1506   ".min", atomic_load_min_64_s, i64imm, imm>;
1507 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1508   atomic_load_min_64_gen, i64imm, imm>;
1509 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1510   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1511 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1512   ".min", atomic_load_umin_32_g, i32imm, imm>;
1513 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1514   ".min", atomic_load_umin_32_s, i32imm, imm>;
1515 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1516   atomic_load_umin_32_gen, i32imm, imm>;
1517 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1518   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1519 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1520   ".min", atomic_load_umin_64_g, i64imm, imm>;
1521 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1522   ".min", atomic_load_umin_64_s, i64imm, imm>;
1523 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1524   atomic_load_umin_64_gen, i64imm, imm>;
1525 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1526   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1528 // atom_inc  atom_dec
1530 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1531   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1532 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1533   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1534 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1535   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1536 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1537   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1538 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1539   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1540 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1541   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1543 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1544   atomic_load_inc_32_g, i32imm, imm>;
1545 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1546   atomic_load_inc_32_s, i32imm, imm>;
1547 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1548   atomic_load_inc_32_gen, i32imm, imm>;
1549 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1550   ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1551 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1552   atomic_load_dec_32_g, i32imm, imm>;
1553 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1554   atomic_load_dec_32_s, i32imm, imm>;
1555 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1556   atomic_load_dec_32_gen, i32imm, imm>;
1557 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1558   ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1560 // atom_and
1562 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1563   (atomic_load_and_32 node:$a, node:$b)>;
1564 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1565   (atomic_load_and_32 node:$a, node:$b)>;
1566 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1567   (atomic_load_and_32 node:$a, node:$b)>;
1568 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1569   (atomic_load_and_64 node:$a, node:$b)>;
1570 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1571   (atomic_load_and_64 node:$a, node:$b)>;
1572 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1573   (atomic_load_and_64 node:$a, node:$b)>;
1575 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1576   atomic_load_and_32_g, i32imm, imm>;
1577 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1578   atomic_load_and_32_s, i32imm, imm>;
1579 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1580   atomic_load_and_32_gen, i32imm, imm>;
1581 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1582   ".and", atomic_load_and_32_gen, i32imm, imm>;
1583 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1584   atomic_load_and_64_g, i64imm, imm>;
1585 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1586   atomic_load_and_64_s, i64imm, imm>;
1587 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1588   atomic_load_and_64_gen, i64imm, imm>;
1589 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1590   ".and", atomic_load_and_64_gen, i64imm, imm>;
1592 // atom_or
1594 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1595   (atomic_load_or_32 node:$a, node:$b)>;
1596 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1597   (atomic_load_or_32 node:$a, node:$b)>;
1598 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1599   (atomic_load_or_32 node:$a, node:$b)>;
1600 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1601   (atomic_load_or_64 node:$a, node:$b)>;
1602 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1603   (atomic_load_or_64 node:$a, node:$b)>;
1604 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1605   (atomic_load_or_64 node:$a, node:$b)>;
1607 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1608   atomic_load_or_32_g, i32imm, imm>;
1609 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1610   atomic_load_or_32_gen, i32imm, imm>;
1611 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1612   ".or", atomic_load_or_32_gen, i32imm, imm>;
1613 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1614   atomic_load_or_32_s, i32imm, imm>;
1615 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1616   atomic_load_or_64_g, i64imm, imm>;
1617 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1618   atomic_load_or_64_gen, i64imm, imm>;
1619 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1620   ".or", atomic_load_or_64_gen, i64imm, imm>;
1621 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1622   atomic_load_or_64_s, i64imm, imm>;
1624 // atom_xor
1626 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1627   (atomic_load_xor_32 node:$a, node:$b)>;
1628 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1629   (atomic_load_xor_32 node:$a, node:$b)>;
1630 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1631   (atomic_load_xor_32 node:$a, node:$b)>;
1632 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1633   (atomic_load_xor_64 node:$a, node:$b)>;
1634 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1635   (atomic_load_xor_64 node:$a, node:$b)>;
1636 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1637   (atomic_load_xor_64 node:$a, node:$b)>;
1639 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1640   atomic_load_xor_32_g, i32imm, imm>;
1641 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1642   atomic_load_xor_32_s, i32imm, imm>;
1643 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1644   atomic_load_xor_32_gen, i32imm, imm>;
1645 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1646   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1647 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1648   atomic_load_xor_64_g, i64imm, imm>;
1649 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1650   atomic_load_xor_64_s, i64imm, imm>;
1651 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1652   atomic_load_xor_64_gen, i64imm, imm>;
1653 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1654   ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1656 // atom_cas
1658 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1659   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1660 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1661   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1662 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1663   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1664 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1665   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1666 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1667   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1668 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1669   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1671 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1672   atomic_cmp_swap_32_g, i32imm>;
1673 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1674   atomic_cmp_swap_32_s, i32imm>;
1675 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1676   atomic_cmp_swap_32_gen, i32imm>;
1677 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1678   ".cas", atomic_cmp_swap_32_gen, i32imm>;
1679 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1680   atomic_cmp_swap_64_g, i64imm>;
1681 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1682   atomic_cmp_swap_64_s, i64imm>;
1683 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1684   atomic_cmp_swap_64_gen, i64imm>;
1685 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1686   ".cas", atomic_cmp_swap_64_gen, i64imm>;
1688 // Support for scoped atomic operations.  Matches
1689 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1690 // and converts it into the appropriate instruction.
1691 // NOTE: not all possible combinations are implemented
1692 //  'space' is limited to generic as it's the only one needed to support CUDA.
1693 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1694 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1695                   dag ins, dag Operands>
1696       : NVPTXInst<(outs regclass:$result), ins,
1697                   AsmStr,
1698                   [(set regclass:$result, Operands)]>,
1699         Requires<Preds>;
1701 // Define instruction variants for all addressing modes.
1702 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1703                        NVPTXRegClass regclass, Operand ImmType,
1704                        SDNode Imm, ValueType ImmTy,
1705                        list<Predicate> Preds> {
1706   let AddedComplexity = 1 in {
1707     def : ATOM23_impl<AsmStr, regclass, Preds,
1708                       (ins Int32Regs:$src, regclass:$b),
1709                       (Intr Int32Regs:$src, regclass:$b)>;
1710     def : ATOM23_impl<AsmStr, regclass, Preds,
1711                       (ins Int64Regs:$src, regclass:$b),
1712                       (Intr Int64Regs:$src, regclass:$b)>;
1713   }
1714   // tablegen can't infer argument types from Intrinsic (though it can
1715   // from Instruction) so we have to enforce specific type on
1716   // immediates via explicit cast to ImmTy.
1717   def : ATOM23_impl<AsmStr, regclass, Preds,
1718                     (ins Int32Regs:$src, ImmType:$b),
1719                     (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1720   def : ATOM23_impl<AsmStr, regclass, Preds,
1721                     (ins Int64Regs:$src, ImmType:$b),
1722                     (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1725 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1726                        NVPTXRegClass regclass, Operand ImmType,
1727                        SDNode Imm, ValueType ImmTy,
1728                        list<Predicate> Preds> {
1729   // Variants for register/immediate permutations of $b and $c
1730   let AddedComplexity = 2 in {
1731     def : ATOM23_impl<AsmStr, regclass, Preds,
1732                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
1733                       (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1734     def : ATOM23_impl<AsmStr, regclass, Preds,
1735                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
1736                       (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1737   }
1738   let AddedComplexity = 1 in {
1739     def : ATOM23_impl<AsmStr, regclass, Preds,
1740                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1741                       (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1742     def : ATOM23_impl<AsmStr, regclass, Preds,
1743                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1744                       (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1745     def : ATOM23_impl<AsmStr, regclass, Preds,
1746                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1747                       (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1748     def : ATOM23_impl<AsmStr, regclass, Preds,
1749                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1750                       (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1751   }
1752   def : ATOM23_impl<AsmStr, regclass, Preds,
1753                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1754                     (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1755   def : ATOM23_impl<AsmStr, regclass, Preds,
1756                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1757                     (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1760 // Constructs instrinsic name and instruction asm strings.
1761 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1762                        string ScopeStr, string SpaceStr,
1763                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1764                        ValueType ImmTy, list<Predicate> Preds> {
1765   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1766                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1767                             # "." # OpStr # "." # TypeStr
1768                             # " \t$result, [$src], $b;",
1769                      !cast<Intrinsic>(
1770                             "int_nvvm_atomic_" # OpStr
1771                             # "_" # SpaceStr # "_" # IntTypeStr
1772                             # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1773                      regclass, ImmType, Imm, ImmTy, Preds>;
1775 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1776                        string ScopeStr, string SpaceStr,
1777                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1778                        ValueType ImmTy, list<Predicate> Preds> {
1779   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1780                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1781                             # "." # OpStr # "." # TypeStr
1782                             # " \t$result, [$src], $b, $c;",
1783                      !cast<Intrinsic>(
1784                             "int_nvvm_atomic_" # OpStr
1785                             # "_" # SpaceStr # "_" # IntTypeStr
1786                             # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1787                      regclass, ImmType, Imm, ImmTy, Preds>;
1790 // Constructs variants for different address spaces.
1791 // For now we only need variants for generic space pointers.
1792 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1793                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1794                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1795    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1796                             regclass, ImmType, Imm, ImmTy, Preds>;
1798 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1799                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1800                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1801    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1802                             regclass, ImmType, Imm, ImmTy, Preds>;
1805 // Constructs variants for different scopes of atomic op.
1806 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1807                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1808                        ValueType ImmTy, list<Predicate> Preds> {
1809    // .gpu scope is default and is currently covered by existing
1810    // atomics w/o explicitly specified scope.
1811    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1812                            regclass, ImmType, Imm, ImmTy,
1813                            !listconcat(Preds,[hasAtomScope])>;
1814    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1815                            regclass, ImmType, Imm, ImmTy,
1816                            !listconcat(Preds,[hasAtomScope])>;
1818 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1819            NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1820            list<Predicate> Preds> {
1821    // No need to define ".gpu"-scoped atomics.  They do the same thing
1822    // as the regular, non-scoped atomics defined elsewhere.
1823    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1824                            regclass, ImmType, Imm, ImmTy,
1825                            !listconcat(Preds,[hasAtomScope])>;
1826    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1827                            regclass, ImmType, Imm, ImmTy,
1828                            !listconcat(Preds,[hasAtomScope])>;
1831 // atom.add
1832 multiclass ATOM2_add_impl<string OpStr> {
1833    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1834    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1835    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1836    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1837                             []>;
1838    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1839                             [hasAtomAddF64]>;
1842 // atom.{and,or,xor}
1843 multiclass ATOM2_bitwise_impl<string OpStr> {
1844    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1845    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1846                             [hasAtomBitwise64]>;
1849 // atom.exch
1850 multiclass ATOM2_exch_impl<string OpStr> {
1851    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1852    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1855 // atom.{min,max}
1856 multiclass ATOM2_minmax_impl<string OpStr> {
1857    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1858    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1859    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1860                             [hasAtomMinMax64]>;
1861    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1862                             [hasAtomMinMax64]>;
1865 // atom.{inc,dec}
1866 multiclass ATOM2_incdec_impl<string OpStr> {
1867    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1870 // atom.cas
1871 multiclass ATOM3_cas_impl<string OpStr> {
1872    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1873    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1876 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1877 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1878 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1879 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1880 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1881 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1882 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1883 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1884 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1885 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1887 //-----------------------------------
1888 // Support for ldu on sm_20 or later
1889 //-----------------------------------
1891 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1892 // read-only in a kernel.
1894 // Scalar
1896 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1897   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1898                !strconcat("ldu.global.", TyStr),
1899                       []>, Requires<[hasLDU]>;
1900   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1901                !strconcat("ldu.global.", TyStr),
1902                         []>, Requires<[hasLDU]>;
1903  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1904                !strconcat("ldu.global.", TyStr),
1905                       []>, Requires<[hasLDU]>;
1906  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1907                !strconcat("ldu.global.", TyStr),
1908                       []>, Requires<[hasLDU]>;
1909  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1910                !strconcat("ldu.global.", TyStr),
1911                         []>, Requires<[hasLDU]>;
1914 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1915 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1916 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1917 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1918 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1919 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1920 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1921 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1922 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1923 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1925 // vector
1927 // Elementized vector ldu
1928 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1929  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1930                      (ins Int32Regs:$src),
1931                      !strconcat("ldu.global.", TyStr), []>;
1932  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1933                      (ins Int64Regs:$src),
1934                      !strconcat("ldu.global.", TyStr), []>;
1935  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1936                      (ins MEMri:$src),
1937                      !strconcat("ldu.global.", TyStr), []>;
1938  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1939                      (ins MEMri64:$src),
1940                      !strconcat("ldu.global.", TyStr), []>;
1941  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1942                      (ins imemAny:$src),
1943                      !strconcat("ldu.global.", TyStr), []>;
1946 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1947  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1948                             regclass:$dst4), (ins Int32Regs:$src),
1949                !strconcat("ldu.global.", TyStr), []>;
1950  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1951                             regclass:$dst4), (ins Int64Regs:$src),
1952                !strconcat("ldu.global.", TyStr), []>;
1953  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1954                             regclass:$dst4), (ins MEMri:$src),
1955                !strconcat("ldu.global.", TyStr), []>;
1956  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1957                             regclass:$dst4), (ins MEMri64:$src),
1958                !strconcat("ldu.global.", TyStr), []>;
1959  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1960                             regclass:$dst4), (ins imemAny:$src),
1961                !strconcat("ldu.global.", TyStr), []>;
1964 defm INT_PTX_LDU_G_v2i8_ELE
1965   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1966 defm INT_PTX_LDU_G_v2i16_ELE
1967   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1968 defm INT_PTX_LDU_G_v2i32_ELE
1969   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1970 defm INT_PTX_LDU_G_v2f16_ELE
1971   : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1972 defm INT_PTX_LDU_G_v2f16x2_ELE
1973   : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1974 defm INT_PTX_LDU_G_v2f32_ELE
1975   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1976 defm INT_PTX_LDU_G_v2i64_ELE
1977   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1978 defm INT_PTX_LDU_G_v2f64_ELE
1979   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1980 defm INT_PTX_LDU_G_v4i8_ELE
1981   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1982 defm INT_PTX_LDU_G_v4i16_ELE
1983   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1984     Int16Regs>;
1985 defm INT_PTX_LDU_G_v4i32_ELE
1986   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1987     Int32Regs>;
1988 defm INT_PTX_LDU_G_v4f16_ELE
1989   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1990     Float16Regs>;
1991 defm INT_PTX_LDU_G_v4f16x2_ELE
1992   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1993     Float16x2Regs>;
1994 defm INT_PTX_LDU_G_v4f32_ELE
1995   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1996     Float32Regs>;
1999 //-----------------------------------
2000 // Support for ldg on sm_35 or later
2001 //-----------------------------------
2003 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
2004 // non-coherent texture cache, and therefore the values read must be read-only
2005 // during the lifetime of the kernel.
2007 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2008   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2009                !strconcat("ld.global.nc.", TyStr),
2010                       []>, Requires<[hasLDG]>;
2011   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2012                !strconcat("ld.global.nc.", TyStr),
2013                         []>, Requires<[hasLDG]>;
2014  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2015                !strconcat("ld.global.nc.", TyStr),
2016                       []>, Requires<[hasLDG]>;
2017  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2018                !strconcat("ld.global.nc.", TyStr),
2019                       []>, Requires<[hasLDG]>;
2020  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2021                !strconcat("ld.global.nc.", TyStr),
2022                         []>, Requires<[hasLDG]>;
2025 defm INT_PTX_LDG_GLOBAL_i8
2026   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2027 defm INT_PTX_LDG_GLOBAL_i16
2028   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2029 defm INT_PTX_LDG_GLOBAL_i32
2030   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2031 defm INT_PTX_LDG_GLOBAL_i64
2032   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2033 defm INT_PTX_LDG_GLOBAL_f16
2034   : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
2035 defm INT_PTX_LDG_GLOBAL_f16x2
2036   : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
2037 defm INT_PTX_LDG_GLOBAL_f32
2038   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2039 defm INT_PTX_LDG_GLOBAL_f64
2040   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2041 defm INT_PTX_LDG_GLOBAL_p32
2042   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2043 defm INT_PTX_LDG_GLOBAL_p64
2044   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2046 // vector
2048 // Elementized vector ldg
2049 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2050  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2051                      (ins Int32Regs:$src),
2052                      !strconcat("ld.global.nc.", TyStr), []>;
2053  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2054                      (ins Int64Regs:$src),
2055                      !strconcat("ld.global.nc.", TyStr), []>;
2056  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2057                      (ins MEMri:$src),
2058                      !strconcat("ld.global.nc.", TyStr), []>;
2059  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2060                      (ins MEMri64:$src),
2061                      !strconcat("ld.global.nc.", TyStr), []>;
2062  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2063                      (ins imemAny:$src),
2064                      !strconcat("ld.global.nc.", TyStr), []>;
2067 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2068   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2069                               regclass:$dst4), (ins Int32Regs:$src),
2070                !strconcat("ld.global.nc.", TyStr), []>;
2071   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2072                                regclass:$dst4), (ins Int64Regs:$src),
2073                !strconcat("ld.global.nc.", TyStr), []>;
2074   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2075                               regclass:$dst4), (ins MEMri:$src),
2076                !strconcat("ld.global.nc.", TyStr), []>;
2077   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2078                               regclass:$dst4), (ins MEMri64:$src),
2079                !strconcat("ld.global.nc.", TyStr), []>;
2080   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2081                              regclass:$dst4), (ins imemAny:$src),
2082                !strconcat("ld.global.nc.", TyStr), []>;
2085 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2086 defm INT_PTX_LDG_G_v2i8_ELE
2087   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2088 defm INT_PTX_LDG_G_v2i16_ELE
2089   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2090 defm INT_PTX_LDG_G_v2i32_ELE
2091   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2092 defm INT_PTX_LDG_G_v2f16_ELE
2093   : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2094 defm INT_PTX_LDG_G_v2f16x2_ELE
2095   : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2096 defm INT_PTX_LDG_G_v2f32_ELE
2097   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2098 defm INT_PTX_LDG_G_v2i64_ELE
2099   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2100 defm INT_PTX_LDG_G_v2f64_ELE
2101   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2102 defm INT_PTX_LDG_G_v4i8_ELE
2103   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2104 defm INT_PTX_LDG_G_v4i16_ELE
2105   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2106 defm INT_PTX_LDG_G_v4i32_ELE
2107   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2108 defm INT_PTX_LDG_G_v4f16_ELE
2109   : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
2110 defm INT_PTX_LDG_G_v4f16x2_ELE
2111   : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
2112 defm INT_PTX_LDG_G_v4f32_ELE
2113   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2116 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2117    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2118           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2119       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2120    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2121           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2122       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2123    def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2124           "{{ .reg .b64 %tmp;\n\t"
2125           #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2126           #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2127       [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2128       Requires<[useShortPtr]>;
2131 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2132    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2133           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2134       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2135    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2136           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2137       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2138    def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2139           "{{ .reg .b64 %tmp;\n\t"
2140           #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2141           #"  cvt.u32.u64 \t$result, %tmp; }}",
2142       [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2143       Requires<[useShortPtr]>;
2146 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2147 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2148 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2149 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2151 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2152 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2153 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2154 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2157 // nvvm.ptr.gen.to.param
2158 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2159   (ins Int32Regs:$src),
2160                         "mov.u32 \t$result, $src;",
2161                               [(set Int32Regs:$result,
2162                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2163 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2164   (ins Int64Regs:$src),
2165                         "mov.u64 \t$result, $src;",
2166                               [(set Int64Regs:$result,
2167                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2170 // nvvm.move intrinsicc
2171 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2172                              "mov.b16 \t$r, $s;",
2173                              [(set Int16Regs:$r,
2174                                (int_nvvm_move_i16 Int16Regs:$s))]>;
2175 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2176                              "mov.b32 \t$r, $s;",
2177                              [(set Int32Regs:$r,
2178                                (int_nvvm_move_i32 Int32Regs:$s))]>;
2179 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2180                              "mov.b64 \t$r, $s;",
2181                              [(set Int64Regs:$r,
2182                                (int_nvvm_move_i64 Int64Regs:$s))]>;
2183 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2184                              "mov.f32 \t$r, $s;",
2185                              [(set Float32Regs:$r,
2186                                (int_nvvm_move_float Float32Regs:$s))]>;
2187 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2188                              "mov.f64 \t$r, $s;",
2189                              [(set Float64Regs:$r,
2190                                (int_nvvm_move_double Float64Regs:$s))]>;
2191 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2192                              "mov.u32 \t$r, $s;",
2193                              [(set Int32Regs:$r,
2194                                (int_nvvm_move_ptr Int32Regs:$s))]>;
2195 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2196                              "mov.u64 \t$r, $s;",
2197                              [(set Int64Regs:$r,
2198                                (int_nvvm_move_ptr Int64Regs:$s))]>;
2200 // @TODO: Are these actually needed, or will we always just see symbols
2201 // copied to registers first?
2202 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2203                              "mov.u32 \t$r, $s;",
2204                              [(set Int32Regs:$r,
2205                              (int_nvvm_move_ptr texternalsym:$s))]>;
2206 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2207                              "mov.u64 \t$r, $s;",
2208                              [(set Int64Regs:$r,
2209                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
2212 // MoveParam        %r1, param
2213 // ptr_local_to_gen %r2, %r1
2214 // ptr_gen_to_local %r3, %r2
2215 // ->
2216 // mov %r1, param
2218 // @TODO: Revisit this.  There is a type
2219 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2220 // instructions are not currently defined. However, we can use the ptr
2221 // variants and the asm printer will do the right thing.
2222 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2223                 (MoveParam texternalsym:$src)))),
2224                (nvvm_move_ptr64  texternalsym:$src)>;
2225 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2226                 (MoveParam texternalsym:$src)))),
2227                (nvvm_move_ptr32  texternalsym:$src)>;
2229 def texsurf_handles
2230   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2231               "mov.u64 \t$result, $src;", []>;
2233 //-----------------------------------
2234 // Compiler Error Warn
2235 // - Just ignore them in codegen
2236 //-----------------------------------
2238 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2239                 "// llvm.nvvm.compiler.warn()",
2240                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2241 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2242                 "// llvm.nvvm.compiler.warn()",
2243                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2244 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2245                 "// llvm.nvvm.compiler.error()",
2246                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2247 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2248                 "// llvm.nvvm.compiler.error()",
2249                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2252 // isspacep
2254 def ISSPACEP_CONST_32
2255   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2256               "isspacep.const \t$d, $a;",
2257               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2258     Requires<[hasPTX31]>;
2259 def ISSPACEP_CONST_64
2260   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2261               "isspacep.const \t$d, $a;",
2262               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2263     Requires<[hasPTX31]>;
2264 def ISSPACEP_GLOBAL_32
2265   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2266               "isspacep.global \t$d, $a;",
2267               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2268 def ISSPACEP_GLOBAL_64
2269   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2270               "isspacep.global \t$d, $a;",
2271               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2272 def ISSPACEP_LOCAL_32
2273   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2274               "isspacep.local \t$d, $a;",
2275               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2276 def ISSPACEP_LOCAL_64
2277   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2278               "isspacep.local \t$d, $a;",
2279               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2280 def ISSPACEP_SHARED_32
2281   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2282               "isspacep.shared \t$d, $a;",
2283               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2284 def ISSPACEP_SHARED_64
2285   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2286               "isspacep.shared \t$d, $a;",
2287               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2290 // Special register reads
2291 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2292                             (ins SpecialRegs:$r),
2293                             "mov.b32 \t$d, $r;", []>;
2295 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2296 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2297 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2298 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2299 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2300 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2301 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2302 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2303 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2304 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2305 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2306 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2307 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2308 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2309 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2310 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2311 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2312 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2313 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2314 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2315 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2316 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2317 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2318 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2319 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2320 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2321 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2322 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2323 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2324 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2325 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2326 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2329 // rotate builtin support
2331 def ROTATE_B32_HW_IMM
2332   : NVPTXInst<(outs Int32Regs:$dst),
2333               (ins  Int32Regs:$src, i32imm:$amt),
2334               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2335               [(set Int32Regs:$dst,
2336                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2337               Requires<[hasHWROT32]> ;
2339 def ROTATE_B32_HW_REG
2340   : NVPTXInst<(outs Int32Regs:$dst),
2341               (ins  Int32Regs:$src, Int32Regs:$amt),
2342               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2343               [(set Int32Regs:$dst,
2344                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2345               Requires<[hasHWROT32]> ;
2347 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2348           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2349       Requires<[noHWROT32]> ;
2351 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2352           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2353       Requires<[noHWROT32]> ;
2355 let hasSideEffects = false in {
2356   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2357     !strconcat("{{\n\t",
2358                ".reg .b32 %dummy;\n\t",
2359                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2360                "}}"),
2361           []> ;
2363   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2364     !strconcat("{{\n\t",
2365                ".reg .b32 %dummy;\n\t",
2366                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2367                "}}"),
2368           []> ;
2371 let hasSideEffects = false in {
2372   def PACK_TWO_INT32
2373     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2374                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2377 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2378           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2379                           (GET_LO_INT64 Int64Regs:$src))> ;
2381 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2382 // no side effects.
2383 let hasSideEffects = false in {
2384   def SHF_L_WRAP_B32_IMM
2385     : NVPTXInst<(outs Int32Regs:$dst),
2386                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2387                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2388       Requires<[hasHWROT32]>;
2390   def SHF_L_WRAP_B32_REG
2391     : NVPTXInst<(outs Int32Regs:$dst),
2392                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2393                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2394       Requires<[hasHWROT32]>;
2396   def SHF_R_WRAP_B32_IMM
2397     : NVPTXInst<(outs Int32Regs:$dst),
2398                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2399                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2400       Requires<[hasHWROT32]>;
2402   def SHF_R_WRAP_B32_REG
2403     : NVPTXInst<(outs Int32Regs:$dst),
2404                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2405                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2406       Requires<[hasHWROT32]>;
2409 // HW version of rotate 64
2410 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2411           (PACK_TWO_INT32
2412             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2413                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2414             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2415                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2416       Requires<[hasHWROT32]>;
2418 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2419           (PACK_TWO_INT32
2420             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2421                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2422             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2423                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2424       Requires<[hasHWROT32]>;
2427 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2428           (PACK_TWO_INT32
2429             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2430                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2431             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2432                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2433       Requires<[hasHWROT32]>;
2435 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2436           (PACK_TWO_INT32
2437             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2438                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2439             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2440                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2441       Requires<[hasHWROT32]>;
2443 // SW version of rotate 64
2444 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2445           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2446       Requires<[noHWROT32]>;
2447 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2448           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2449       Requires<[noHWROT32]>;
2450 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2451           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2452       Requires<[noHWROT32]>;
2453 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2454           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2455       Requires<[noHWROT32]>;
2458 //-----------------------------------
2459 // Texture Intrinsics
2460 //-----------------------------------
2462 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2463 // also defined in NVPTXReplaceImageHandles.cpp
2465 // texmode_independent
2466 let IsTex = true, IsTexModeUnified = false in {
2467 // Texture fetch instructions using handles
2468 def TEX_1D_F32_S32
2469   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2470                     Float32Regs:$b, Float32Regs:$a),
2471               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2472               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2473               []>;
2474 def TEX_1D_F32_F32
2475   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2476                     Float32Regs:$b, Float32Regs:$a),
2477               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2478               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2479               []>;
2480 def TEX_1D_F32_F32_LEVEL
2481   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2482                     Float32Regs:$b, Float32Regs:$a),
2483               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2484               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2485               "[$t, $s, \\{$x\\}], $lod;",
2486               []>;
2487 def TEX_1D_F32_F32_GRAD
2488   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2489                     Float32Regs:$b, Float32Regs:$a),
2490               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2491                    Float32Regs:$gradx, Float32Regs:$grady),
2492               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2493               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2494               []>;
2495 def TEX_1D_S32_S32
2496   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2497                     Int32Regs:$b, Int32Regs:$a),
2498               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2499               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2500               []>;
2501 def TEX_1D_S32_F32
2502   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2503                     Int32Regs:$b, Int32Regs:$a),
2504               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2505               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2506               []>;
2507 def TEX_1D_S32_F32_LEVEL
2508   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2509                     Int32Regs:$b, Int32Regs:$a),
2510               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2511                    Float32Regs:$lod),
2512               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2513               "[$t, $s, \\{$x\\}], $lod;",
2514               []>;
2515 def TEX_1D_S32_F32_GRAD
2516   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2517                     Int32Regs:$b, Int32Regs:$a),
2518               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2519                    Float32Regs:$gradx, Float32Regs:$grady),
2520               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2521               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2522               []>;
2523 def TEX_1D_U32_S32
2524   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2525                     Int32Regs:$b, Int32Regs:$a),
2526               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2527               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2528               []>;
2529 def TEX_1D_U32_F32
2530   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2531                     Int32Regs:$b, Int32Regs:$a),
2532               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2533               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2534               []>;
2535 def TEX_1D_U32_F32_LEVEL
2536   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2537                     Int32Regs:$b, Int32Regs:$a),
2538               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2539                    Float32Regs:$lod),
2540               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2541               "[$t, $s, \\{$x\\}], $lod;",
2542               []>;
2543 def TEX_1D_U32_F32_GRAD
2544   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2545                     Int32Regs:$b, Int32Regs:$a),
2546               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2547                    Float32Regs:$gradx, Float32Regs:$grady),
2548               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2549               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2550               []>;
2552 def TEX_1D_ARRAY_F32_S32
2553   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2554                     Float32Regs:$b, Float32Regs:$a),
2555               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2556               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2557               "[$t, $s, \\{$l, $x\\}];",
2558               []>;
2559 def TEX_1D_ARRAY_F32_F32
2560   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2561                     Float32Regs:$b, Float32Regs:$a),
2562               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2563               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2564               "[$t, $s, \\{$l, $x\\}];",
2565               []>;
2566 def TEX_1D_ARRAY_F32_F32_LEVEL
2567   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2568                     Float32Regs:$b, Float32Regs:$a),
2569               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2570                    Float32Regs:$lod),
2571               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2572               "[$t, $s, \\{$l, $x\\}], $lod;",
2573               []>;
2574 def TEX_1D_ARRAY_F32_F32_GRAD
2575   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2576                     Float32Regs:$b, Float32Regs:$a),
2577               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2578                    Float32Regs:$gradx, Float32Regs:$grady),
2579               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2580               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2581               []>;
2582 def TEX_1D_ARRAY_S32_S32
2583   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2584                     Int32Regs:$b, Int32Regs:$a),
2585               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2586               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2587               "[$t, $s, \\{$l, $x\\}];",
2588               []>;
2589 def TEX_1D_ARRAY_S32_F32
2590   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2591                     Int32Regs:$b, Int32Regs:$a),
2592               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2593               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2594               "[$t, $s, \\{$l, $x\\}];",
2595               []>;
2596 def TEX_1D_ARRAY_S32_F32_LEVEL
2597   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2598                     Int32Regs:$b, Int32Regs:$a),
2599               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2600                    Float32Regs:$lod),
2601               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2602               "[$t, $s, \\{$l, $x\\}], $lod;",
2603               []>;
2604 def TEX_1D_ARRAY_S32_F32_GRAD
2605   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606                     Int32Regs:$b, Int32Regs:$a),
2607               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2608                    Float32Regs:$gradx, Float32Regs:$grady),
2609               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2610               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2611               []>;
2612 def TEX_1D_ARRAY_U32_S32
2613   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614                     Int32Regs:$b, Int32Regs:$a),
2615               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2616               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2617               "[$t, $s, \\{$l, $x\\}];",
2618               []>;
2619 def TEX_1D_ARRAY_U32_F32
2620   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2621                     Int32Regs:$b, Int32Regs:$a),
2622               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2623               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2624               "[$t, $s, \\{$l, $x\\}];",
2625               []>;
2626 def TEX_1D_ARRAY_U32_F32_LEVEL
2627   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2628                     Int32Regs:$b, Int32Regs:$a),
2629               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2630                    Float32Regs:$lod),
2631               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2632               "[$t, $s, \\{$l, $x\\}], $lod;",
2633               []>;
2634 def TEX_1D_ARRAY_U32_F32_GRAD
2635   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2636                     Int32Regs:$b, Int32Regs:$a),
2637               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2638                    Float32Regs:$gradx, Float32Regs:$grady),
2639               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2640               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2641               []>;
2643 def TEX_2D_F32_S32
2644   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2645                     Float32Regs:$b, Float32Regs:$a),
2646               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2647               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2648               "[$t, $s, \\{$x, $y\\}];",
2649               []>;
2650 def TEX_2D_F32_F32
2651   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2652                     Float32Regs:$b, Float32Regs:$a),
2653               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2654               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2655               "[$t, $s, \\{$x, $y\\}];",
2656               []>;
2657 def TEX_2D_F32_F32_LEVEL
2658   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2659                     Float32Regs:$b, Float32Regs:$a),
2660               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2661                    Float32Regs:$lod),
2662               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2663               "[$t, $s, \\{$x, $y\\}], $lod;",
2664               []>;
2665 def TEX_2D_F32_F32_GRAD
2666   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2667                     Float32Regs:$b, Float32Regs:$a),
2668               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2669                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2670                    Float32Regs:$grady0, Float32Regs:$grady1),
2671               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2672               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2673               "\\{$grady0, $grady1\\};",
2674               []>;
2675 def TEX_2D_S32_S32
2676   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2677                     Int32Regs:$b, Int32Regs:$a),
2678               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2679               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2680               "[$t, $s, \\{$x, $y\\}];",
2681               []>;
2682 def TEX_2D_S32_F32
2683   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2684                     Int32Regs:$b, Int32Regs:$a),
2685               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2686               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2687               "[$t, $s, \\{$x, $y\\}];",
2688               []>;
2689 def TEX_2D_S32_F32_LEVEL
2690   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2691                     Int32Regs:$b, Int32Regs:$a),
2692               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2693                    Float32Regs:$lod),
2694               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2695               "[$t, $s, \\{$x, $y\\}], $lod;",
2696               []>;
2697 def TEX_2D_S32_F32_GRAD
2698   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2699                     Int32Regs:$b, Int32Regs:$a),
2700               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2701                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2702                    Float32Regs:$grady0, Float32Regs:$grady1),
2703               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2704               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2705               "\\{$grady0, $grady1\\};",
2706               []>;
2707 def TEX_2D_U32_S32
2708   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2709                     Int32Regs:$b, Int32Regs:$a),
2710               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2711               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2712               "[$t, $s, \\{$x, $y\\}];",
2713               []>;
2714 def TEX_2D_U32_F32
2715   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716                     Int32Regs:$b, Int32Regs:$a),
2717               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2718               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2719               "[$t, $s, \\{$x, $y\\}];",
2720               []>;
2721 def TEX_2D_U32_F32_LEVEL
2722   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2723                     Int32Regs:$b, Int32Regs:$a),
2724               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2725                    Float32Regs:$lod),
2726               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2727               "[$t, $s, \\{$x, $y\\}], $lod;",
2728               []>;
2729 def TEX_2D_U32_F32_GRAD
2730   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2731                     Int32Regs:$b, Int32Regs:$a),
2732               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2733                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2734                    Float32Regs:$grady0, Float32Regs:$grady1),
2735               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2736               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2737               "\\{$grady0, $grady1\\};",
2738               []>;
2740 def TEX_2D_ARRAY_F32_S32
2741   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2742                     Float32Regs:$b, Float32Regs:$a),
2743               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2744                    Int32Regs:$y),
2745               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2746               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2747               []>;
2748 def TEX_2D_ARRAY_F32_F32
2749   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2750                     Float32Regs:$b, Float32Regs:$a),
2751               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2752                    Float32Regs:$y),
2753               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2754               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2755               []>;
2756 def TEX_2D_ARRAY_F32_F32_LEVEL
2757   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2758                     Float32Regs:$b, Float32Regs:$a),
2759               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2760                    Float32Regs:$y, Float32Regs:$lod),
2761               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2762               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2763               []>;
2764 def TEX_2D_ARRAY_F32_F32_GRAD
2765   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2766                     Float32Regs:$b, Float32Regs:$a),
2767               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2768                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2769                    Float32Regs:$grady0, Float32Regs:$grady1),
2770               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2771               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2772               "\\{$grady0, $grady1\\};",
2773               []>;
2774 def TEX_2D_ARRAY_S32_S32
2775   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2776                     Int32Regs:$b, Int32Regs:$a),
2777               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2778                    Int32Regs:$y),
2779               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2780               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2781               []>;
2782 def TEX_2D_ARRAY_S32_F32
2783   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2784                     Int32Regs:$b, Int32Regs:$a),
2785               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2786                    Float32Regs:$y),
2787               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2788               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2789               []>;
2790 def TEX_2D_ARRAY_S32_F32_LEVEL
2791   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2792                     Int32Regs:$b, Int32Regs:$a),
2793               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2794                    Float32Regs:$y, Float32Regs:$lod),
2795               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2796               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2797               []>;
2798 def TEX_2D_ARRAY_S32_F32_GRAD
2799   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2800                     Int32Regs:$b, Int32Regs:$a),
2801               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2802                    Float32Regs:$y,
2803                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2804                    Float32Regs:$grady0, Float32Regs:$grady1),
2805               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2806               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2807               "\\{$grady0, $grady1\\};",
2808               []>;
2809 def TEX_2D_ARRAY_U32_S32
2810   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2811                     Int32Regs:$b, Int32Regs:$a),
2812               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2813                    Int32Regs:$y),
2814               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2815               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2816               []>;
2817 def TEX_2D_ARRAY_U32_F32
2818   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2819                     Int32Regs:$b, Int32Regs:$a),
2820               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2821                    Float32Regs:$y),
2822               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2823               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2824               []>;
2825 def TEX_2D_ARRAY_U32_F32_LEVEL
2826   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2827                     Int32Regs:$b, Int32Regs:$a),
2828               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2829                    Float32Regs:$y, Float32Regs:$lod),
2830               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2831               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2832               []>;
2833 def TEX_2D_ARRAY_U32_F32_GRAD
2834   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2835                     Int32Regs:$b, Int32Regs:$a),
2836               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2837                    Float32Regs:$y,
2838                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2839                    Float32Regs:$grady0, Float32Regs:$grady1),
2840               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2841               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2842               "\\{$grady0, $grady1\\};",
2843               []>;
2845 def TEX_3D_F32_S32
2846   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2847                     Float32Regs:$b, Float32Regs:$a),
2848               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2849                    Int32Regs:$z),
2850               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2851               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2852               []>;
2853 def TEX_3D_F32_F32
2854   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2855                     Float32Regs:$b, Float32Regs:$a),
2856               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2857                    Float32Regs:$z),
2858               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2859               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2860               []>;
2861 def TEX_3D_F32_F32_LEVEL
2862   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2863                     Float32Regs:$b, Float32Regs:$a),
2864               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2865                    Float32Regs:$z, Float32Regs:$lod),
2866               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2867               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2868               []>;
2869 def TEX_3D_F32_F32_GRAD
2870   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2871                     Float32Regs:$b, Float32Regs:$a),
2872               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2873                    Float32Regs:$z,
2874                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2875                    Float32Regs:$gradx2, Float32Regs:$grady0,
2876                    Float32Regs:$grady1, Float32Regs:$grady2),
2877               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2878               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2879               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2880               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2881               []>;
2882 def TEX_3D_S32_S32
2883   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2884                     Int32Regs:$b, Int32Regs:$a),
2885               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2886                    Int32Regs:$z),
2887               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2888               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2889               []>;
2890 def TEX_3D_S32_F32
2891   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2892                     Int32Regs:$b, Int32Regs:$a),
2893               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2894                    Float32Regs:$z),
2895               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2896               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2897               []>;
2898 def TEX_3D_S32_F32_LEVEL
2899   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2900                     Int32Regs:$b, Int32Regs:$a),
2901               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2902                    Float32Regs:$z, Float32Regs:$lod),
2903               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2904               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2905               []>;
2906 def TEX_3D_S32_F32_GRAD
2907   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2908                     Int32Regs:$b, Int32Regs:$a),
2909               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2910                    Float32Regs:$z,
2911                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2912                    Float32Regs:$gradx2, Float32Regs:$grady0,
2913                    Float32Regs:$grady1, Float32Regs:$grady2),
2914               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2915               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2916               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2917               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2918               []>;
2919 def TEX_3D_U32_S32
2920   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2921                     Int32Regs:$b, Int32Regs:$a),
2922               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2923                    Int32Regs:$z),
2924               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2925               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2926               []>;
2927 def TEX_3D_U32_F32
2928   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2929                     Int32Regs:$b, Int32Regs:$a),
2930               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2931                    Float32Regs:$z),
2932               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2933               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2934               []>;
2935 def TEX_3D_U32_F32_LEVEL
2936   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2937                     Int32Regs:$b, Int32Regs:$a),
2938               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2939                    Float32Regs:$z, Float32Regs:$lod),
2940               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2941               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2942               []>;
2943 def TEX_3D_U32_F32_GRAD
2944   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2945                     Int32Regs:$b, Int32Regs:$a),
2946               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2947                    Float32Regs:$z,
2948                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2949                    Float32Regs:$gradx2, Float32Regs:$grady0,
2950                    Float32Regs:$grady1, Float32Regs:$grady2),
2951               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2952               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2953               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2954               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2955               []>;
2957 def TEX_CUBE_F32_F32
2958   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2959                     Float32Regs:$b, Float32Regs:$a),
2960               (ins Int64Regs:$t, Int64Regs:$s,
2961                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2962               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2963               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2964               []>;
2965 def TEX_CUBE_F32_F32_LEVEL
2966   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2967                     Float32Regs:$b, Float32Regs:$a),
2968               (ins Int64Regs:$t, Int64Regs:$s,
2969                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2970                    Float32Regs:$lod),
2971               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2972               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2973               []>;
2974 def TEX_CUBE_S32_F32
2975   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2976                     Int32Regs:$b, Int32Regs:$a),
2977               (ins Int64Regs:$t, Int64Regs:$s,
2978                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2979               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2980               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2981               []>;
2982 def TEX_CUBE_S32_F32_LEVEL
2983   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2984                     Int32Regs:$b, Int32Regs:$a),
2985               (ins Int64Regs:$t, Int64Regs:$s,
2986                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2987                    Float32Regs:$lod),
2988               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2989               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2990               []>;
2991 def TEX_CUBE_U32_F32
2992   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2993                     Int32Regs:$b, Int32Regs:$a),
2994               (ins Int64Regs:$t, Int64Regs:$s,
2995                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2996               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2997               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2998               []>;
2999 def TEX_CUBE_U32_F32_LEVEL
3000   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3001                     Int32Regs:$b, Int32Regs:$a),
3002               (ins Int64Regs:$t, Int64Regs:$s,
3003                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3004                    Float32Regs:$lod),
3005               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3006               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3007               []>;
3009 def TEX_CUBE_ARRAY_F32_F32
3010   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3011                     Float32Regs:$b, Float32Regs:$a),
3012               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3013                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3014               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3015               "[$t, $s, \\{$l, $x, $y, $z\\}];",
3016               []>;
3017 def TEX_CUBE_ARRAY_F32_F32_LEVEL
3018   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3019                     Float32Regs:$b, Float32Regs:$a),
3020               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3021                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3022                    Float32Regs:$lod),
3023               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3024               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3025               []>;
3026 def TEX_CUBE_ARRAY_S32_F32
3027   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3028                     Int32Regs:$b, Int32Regs:$a),
3029               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3030                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3031               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3032               "[$t, $s, \\{$l, $x, $y, $z\\}];",
3033               []>;
3034 def TEX_CUBE_ARRAY_S32_F32_LEVEL
3035   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3036                     Int32Regs:$b, Int32Regs:$a),
3037               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3038                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3039                    Float32Regs:$lod),
3040               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3041               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3042               []>;
3043 def TEX_CUBE_ARRAY_U32_F32
3044   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3045                     Int32Regs:$b, Int32Regs:$a),
3046               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3047                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3048               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3049               "[$t, $s, \\{$l, $x, $y, $z\\}];",
3050               []>;
3051 def TEX_CUBE_ARRAY_U32_F32_LEVEL
3052   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3053                     Int32Regs:$b, Int32Regs:$a),
3054               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3055                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3056                    Float32Regs:$lod),
3057               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3058               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3059               []>;
3061 def TLD4_R_2D_F32_F32
3062   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3063                     Float32Regs:$v2, Float32Regs:$v3),
3064               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3065               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3066               "[$t, $s, \\{$x, $y\\}];",
3067               []>;
3068 def TLD4_G_2D_F32_F32
3069   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3070                     Float32Regs:$v2, Float32Regs:$v3),
3071               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3072               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3073               "[$t, $s, \\{$x, $y\\}];",
3074               []>;
3075 def TLD4_B_2D_F32_F32
3076   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3077                     Float32Regs:$v2, Float32Regs:$v3),
3078               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3079               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3080               "[$t, $s, \\{$x, $y\\}];",
3081               []>;
3082 def TLD4_A_2D_F32_F32
3083   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3084                     Float32Regs:$v2, Float32Regs:$v3),
3085               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3086               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3087               "[$t, $s, \\{$x, $y\\}];",
3088               []>;
3089 def TLD4_R_2D_S32_F32
3090   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3091                     Int32Regs:$v2, Int32Regs:$v3),
3092               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3093               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3094               "[$t, $s, \\{$x, $y\\}];",
3095               []>;
3096 def TLD4_G_2D_S32_F32
3097   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3098                     Int32Regs:$v2, Int32Regs:$v3),
3099               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3100               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3101               "[$t, $s, \\{$x, $y\\}];",
3102               []>;
3103 def TLD4_B_2D_S32_F32
3104   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3105                     Int32Regs:$v2, Int32Regs:$v3),
3106               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3107               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3108               "[$t, $s, \\{$x, $y\\}];",
3109               []>;
3110 def TLD4_A_2D_S32_F32
3111   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3112                     Int32Regs:$v2, Int32Regs:$v3),
3113               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3114               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3115               "[$t, $s, \\{$x, $y\\}];",
3116               []>;
3117 def TLD4_R_2D_U32_F32
3118   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3119                     Int32Regs:$v2, Int32Regs:$v3),
3120               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3121               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3122               "[$t, $s, \\{$x, $y\\}];",
3123               []>;
3124 def TLD4_G_2D_U32_F32
3125   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3126                     Int32Regs:$v2, Int32Regs:$v3),
3127               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3128               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3129               "[$t, $s, \\{$x, $y\\}];",
3130               []>;
3131 def TLD4_B_2D_U32_F32
3132   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3133                     Int32Regs:$v2, Int32Regs:$v3),
3134               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3135               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3136               "[$t, $s, \\{$x, $y\\}];",
3137               []>;
3138 def TLD4_A_2D_U32_F32
3139   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3140                     Int32Regs:$v2, Int32Regs:$v3),
3141               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3142               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3143               "[$t, $s, \\{$x, $y\\}];",
3144               []>;
3148 // texmode_unified
3149 let IsTex = true, IsTexModeUnified = true in {
3150 // Texture fetch instructions using handles
3151 def TEX_UNIFIED_1D_F32_S32
3152   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3153                     Float32Regs:$b, Float32Regs:$a),
3154               (ins Int64Regs:$t, Int32Regs:$x),
3155               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3156               []>;
3157 def TEX_UNIFIED_1D_F32_F32
3158   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3159                     Float32Regs:$b, Float32Regs:$a),
3160               (ins Int64Regs:$t, Float32Regs:$x),
3161               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3162               []>;
3163 def TEX_UNIFIED_1D_F32_F32_LEVEL
3164   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3165                     Float32Regs:$b, Float32Regs:$a),
3166               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
3167               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3168               "[$t, \\{$x\\}], $lod;",
3169               []>;
3170 def TEX_UNIFIED_1D_F32_F32_GRAD
3171   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3172                     Float32Regs:$b, Float32Regs:$a),
3173               (ins Int64Regs:$t, Float32Regs:$x,
3174                    Float32Regs:$gradx, Float32Regs:$grady),
3175               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3176               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3177               []>;
3178 def TEX_UNIFIED_1D_S32_S32
3179   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3180                     Int32Regs:$b, Int32Regs:$a),
3181               (ins Int64Regs:$t, Int32Regs:$x),
3182               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3183               []>;
3184 def TEX_UNIFIED_1D_S32_F32
3185   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3186                     Int32Regs:$b, Int32Regs:$a),
3187               (ins Int64Regs:$t, Float32Regs:$x),
3188               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3189               []>;
3190 def TEX_UNIFIED_1D_S32_F32_LEVEL
3191   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3192                     Int32Regs:$b, Int32Regs:$a),
3193               (ins Int64Regs:$t, Float32Regs:$x,
3194                    Float32Regs:$lod),
3195               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3196               "[$t, \\{$x\\}], $lod;",
3197               []>;
3198 def TEX_UNIFIED_1D_S32_F32_GRAD
3199   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3200                     Int32Regs:$b, Int32Regs:$a),
3201               (ins Int64Regs:$t, Float32Regs:$x,
3202                    Float32Regs:$gradx, Float32Regs:$grady),
3203               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3204               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3205               []>;
3206 def TEX_UNIFIED_1D_U32_S32
3207   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3208                     Int32Regs:$b, Int32Regs:$a),
3209               (ins Int64Regs:$t, Int32Regs:$x),
3210               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3211               []>;
3212 def TEX_UNIFIED_1D_U32_F32
3213   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3214                     Int32Regs:$b, Int32Regs:$a),
3215               (ins Int64Regs:$t, Float32Regs:$x),
3216               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3217               []>;
3218 def TEX_UNIFIED_1D_U32_F32_LEVEL
3219   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3220                     Int32Regs:$b, Int32Regs:$a),
3221               (ins Int64Regs:$t, Float32Regs:$x,
3222                    Float32Regs:$lod),
3223               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3224               "[$t, \\{$x\\}], $lod;",
3225               []>;
3226 def TEX_UNIFIED_1D_U32_F32_GRAD
3227   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3228                     Int32Regs:$b, Int32Regs:$a),
3229               (ins Int64Regs:$t, Float32Regs:$x,
3230                    Float32Regs:$gradx, Float32Regs:$grady),
3231               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3232               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3233               []>;
3235 def TEX_UNIFIED_1D_ARRAY_F32_S32
3236   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3237                     Float32Regs:$b, Float32Regs:$a),
3238               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3239               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3240               "[$t, \\{$l, $x\\}];",
3241               []>;
3242 def TEX_UNIFIED_1D_ARRAY_F32_F32
3243   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3244                     Float32Regs:$b, Float32Regs:$a),
3245               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3246               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3247               "[$t, \\{$l, $x\\}];",
3248               []>;
3249 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3250   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3251                     Float32Regs:$b, Float32Regs:$a),
3252               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3253                    Float32Regs:$lod),
3254               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3255               "[$t, \\{$l, $x\\}], $lod;",
3256               []>;
3257 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3258   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3259                     Float32Regs:$b, Float32Regs:$a),
3260               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3261                    Float32Regs:$gradx, Float32Regs:$grady),
3262               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3263               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3264               []>;
3265 def TEX_UNIFIED_1D_ARRAY_S32_S32
3266   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3267                     Int32Regs:$b, Int32Regs:$a),
3268               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3269               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3270               "[$t, \\{$l, $x\\}];",
3271               []>;
3272 def TEX_UNIFIED_1D_ARRAY_S32_F32
3273   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3274                     Int32Regs:$b, Int32Regs:$a),
3275               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3276               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3277               "[$t, \\{$l, $x\\}];",
3278               []>;
3279 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3280   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3281                     Int32Regs:$b, Int32Regs:$a),
3282               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3283                    Float32Regs:$lod),
3284               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3285               "[$t, \\{$l, $x\\}], $lod;",
3286               []>;
3287 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3288   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289                     Int32Regs:$b, Int32Regs:$a),
3290               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3291                    Float32Regs:$gradx, Float32Regs:$grady),
3292               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3293               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3294               []>;
3295 def TEX_UNIFIED_1D_ARRAY_U32_S32
3296   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297                     Int32Regs:$b, Int32Regs:$a),
3298               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3299               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3300               "[$t, \\{$l, $x\\}];",
3301               []>;
3302 def TEX_UNIFIED_1D_ARRAY_U32_F32
3303   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3304                     Int32Regs:$b, Int32Regs:$a),
3305               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3306               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3307               "[$t, \\{$l, $x\\}];",
3308               []>;
3309 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3310   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3311                     Int32Regs:$b, Int32Regs:$a),
3312               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3313                    Float32Regs:$lod),
3314               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3315               "[$t, \\{$l, $x\\}], $lod;",
3316               []>;
3317 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3318   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3319                     Int32Regs:$b, Int32Regs:$a),
3320               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3321                    Float32Regs:$gradx, Float32Regs:$grady),
3322               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3323               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3324               []>;
3326 def TEX_UNIFIED_2D_F32_S32
3327   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3328                     Float32Regs:$b, Float32Regs:$a),
3329               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3330               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3331               "[$t, \\{$x, $y\\}];",
3332               []>;
3333 def TEX_UNIFIED_2D_F32_F32
3334   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3335                     Float32Regs:$b, Float32Regs:$a),
3336               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3337               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3338               "[$t, \\{$x, $y\\}];",
3339               []>;
3340 def TEX_UNIFIED_2D_F32_F32_LEVEL
3341   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3342                     Float32Regs:$b, Float32Regs:$a),
3343               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3344                    Float32Regs:$lod),
3345               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3346               "[$t, \\{$x, $y\\}], $lod;",
3347               []>;
3348 def TEX_UNIFIED_2D_F32_F32_GRAD
3349   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3350                     Float32Regs:$b, Float32Regs:$a),
3351               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3352                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3353                    Float32Regs:$grady0, Float32Regs:$grady1),
3354               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3355               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3356               "\\{$grady0, $grady1\\};",
3357               []>;
3358 def TEX_UNIFIED_2D_S32_S32
3359   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3360                     Int32Regs:$b, Int32Regs:$a),
3361               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3362               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3363               "[$t, \\{$x, $y\\}];",
3364               []>;
3365 def TEX_UNIFIED_2D_S32_F32
3366   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3367                     Int32Regs:$b, Int32Regs:$a),
3368               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3369               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3370               "[$t, \\{$x, $y\\}];",
3371               []>;
3372 def TEX_UNIFIED_2D_S32_F32_LEVEL
3373   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3374                     Int32Regs:$b, Int32Regs:$a),
3375               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3376                    Float32Regs:$lod),
3377               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3378               "[$t, \\{$x, $y\\}], $lod;",
3379               []>;
3380 def TEX_UNIFIED_2D_S32_F32_GRAD
3381   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3382                     Int32Regs:$b, Int32Regs:$a),
3383               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3384                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3385                    Float32Regs:$grady0, Float32Regs:$grady1),
3386               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3387               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3388               "\\{$grady0, $grady1\\};",
3389               []>;
3390 def TEX_UNIFIED_2D_U32_S32
3391   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3392                     Int32Regs:$b, Int32Regs:$a),
3393               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3394               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3395               "[$t, \\{$x, $y\\}];",
3396               []>;
3397 def TEX_UNIFIED_2D_U32_F32
3398   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399                     Int32Regs:$b, Int32Regs:$a),
3400               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3401               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3402               "[$t, \\{$x, $y\\}];",
3403               []>;
3404 def TEX_UNIFIED_2D_U32_F32_LEVEL
3405   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3406                     Int32Regs:$b, Int32Regs:$a),
3407               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3408                    Float32Regs:$lod),
3409               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3410               "[$t, \\{$x, $y\\}], $lod;",
3411               []>;
3412 def TEX_UNIFIED_2D_U32_F32_GRAD
3413   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3414                     Int32Regs:$b, Int32Regs:$a),
3415               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3416                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3417                    Float32Regs:$grady0, Float32Regs:$grady1),
3418               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3419               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3420               "\\{$grady0, $grady1\\};",
3421               []>;
3423 def TEX_UNIFIED_2D_ARRAY_F32_S32
3424   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3425                     Float32Regs:$b, Float32Regs:$a),
3426               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3427                    Int32Regs:$y),
3428               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3429               "[$t, \\{$l, $x, $y, $y\\}];",
3430               []>;
3431 def TEX_UNIFIED_2D_ARRAY_F32_F32
3432   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3433                     Float32Regs:$b, Float32Regs:$a),
3434               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3435                    Float32Regs:$y),
3436               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3437               "[$t, \\{$l, $x, $y, $y\\}];",
3438               []>;
3439 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3440   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3441                     Float32Regs:$b, Float32Regs:$a),
3442               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3443                    Float32Regs:$y, Float32Regs:$lod),
3444               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3445               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3446               []>;
3447 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3448   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3449                     Float32Regs:$b, Float32Regs:$a),
3450               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3451                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3452                    Float32Regs:$grady0, Float32Regs:$grady1),
3453               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3454               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3455               "\\{$grady0, $grady1\\};",
3456               []>;
3457 def TEX_UNIFIED_2D_ARRAY_S32_S32
3458   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3459                     Int32Regs:$b, Int32Regs:$a),
3460               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3461                    Int32Regs:$y),
3462               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3463               "[$t, \\{$l, $x, $y, $y\\}];",
3464               []>;
3465 def TEX_UNIFIED_2D_ARRAY_S32_F32
3466   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3467                     Int32Regs:$b, Int32Regs:$a),
3468               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3469                    Float32Regs:$y),
3470               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3471               "[$t, \\{$l, $x, $y, $y\\}];",
3472               []>;
3473 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3474   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3475                     Int32Regs:$b, Int32Regs:$a),
3476               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3477                    Float32Regs:$y, Float32Regs:$lod),
3478               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3479               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3480               []>;
3481 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3482   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3483                     Int32Regs:$b, Int32Regs:$a),
3484               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3485                    Float32Regs:$y,
3486                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3487                    Float32Regs:$grady0, Float32Regs:$grady1),
3488               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3489               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3490               "\\{$grady0, $grady1\\};",
3491               []>;
3492 def TEX_UNIFIED_2D_ARRAY_U32_S32
3493   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3494                     Int32Regs:$b, Int32Regs:$a),
3495               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3496                    Int32Regs:$y),
3497               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3498               "[$t, \\{$l, $x, $y, $y\\}];",
3499               []>;
3500 def TEX_UNIFIED_2D_ARRAY_U32_F32
3501   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3502                     Int32Regs:$b, Int32Regs:$a),
3503               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3504                    Float32Regs:$y),
3505               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3506               "[$t, \\{$l, $x, $y, $y\\}];",
3507               []>;
3508 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3509   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3510                     Int32Regs:$b, Int32Regs:$a),
3511               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3512                    Float32Regs:$y, Float32Regs:$lod),
3513               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3514               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3515               []>;
3516 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3517   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3518                     Int32Regs:$b, Int32Regs:$a),
3519               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3520                    Float32Regs:$y,
3521                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3522                    Float32Regs:$grady0, Float32Regs:$grady1),
3523               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3524               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3525               "\\{$grady0, $grady1\\};",
3526               []>;
3528 def TEX_UNIFIED_3D_F32_S32
3529   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3530                     Float32Regs:$b, Float32Regs:$a),
3531               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3532                    Int32Regs:$z),
3533               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3534               "[$t, \\{$x, $y, $z, $z\\}];",
3535               []>;
3536 def TEX_UNIFIED_3D_F32_F32
3537   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3538                     Float32Regs:$b, Float32Regs:$a),
3539               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3540                    Float32Regs:$z),
3541               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3542               "[$t, \\{$x, $y, $z, $z\\}];",
3543               []>;
3544 def TEX_UNIFIED_3D_F32_F32_LEVEL
3545   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3546                     Float32Regs:$b, Float32Regs:$a),
3547               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3548                    Float32Regs:$z, Float32Regs:$lod),
3549               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3550               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3551               []>;
3552 def TEX_UNIFIED_3D_F32_F32_GRAD
3553   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3554                     Float32Regs:$b, Float32Regs:$a),
3555               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3556                    Float32Regs:$z,
3557                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3558                    Float32Regs:$gradx2, Float32Regs:$grady0,
3559                    Float32Regs:$grady1, Float32Regs:$grady2),
3560               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3561               "[$t, \\{$x, $y, $z, $z\\}], "
3562               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3563               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3564               []>;
3565 def TEX_UNIFIED_3D_S32_S32
3566   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3567                     Int32Regs:$b, Int32Regs:$a),
3568               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3569                    Int32Regs:$z),
3570               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3571               "[$t, \\{$x, $y, $z, $z\\}];",
3572               []>;
3573 def TEX_UNIFIED_3D_S32_F32
3574   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3575                     Int32Regs:$b, Int32Regs:$a),
3576               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3577                    Float32Regs:$z),
3578               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3579               "[$t, \\{$x, $y, $z, $z\\}];",
3580               []>;
3581 def TEX_UNIFIED_3D_S32_F32_LEVEL
3582   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3583                     Int32Regs:$b, Int32Regs:$a),
3584               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3585                    Float32Regs:$z, Float32Regs:$lod),
3586               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3587               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3588               []>;
3589 def TEX_UNIFIED_3D_S32_F32_GRAD
3590   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3591                     Int32Regs:$b, Int32Regs:$a),
3592               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3593                    Float32Regs:$z,
3594                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3595                    Float32Regs:$gradx2, Float32Regs:$grady0,
3596                    Float32Regs:$grady1, Float32Regs:$grady2),
3597               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3598               "[$t, \\{$x, $y, $z, $z\\}], "
3599               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3600               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3601               []>;
3602 def TEX_UNIFIED_3D_U32_S32
3603   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3604                     Int32Regs:$b, Int32Regs:$a),
3605               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3606                    Int32Regs:$z),
3607               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3608               "[$t, \\{$x, $y, $z, $z\\}];",
3609               []>;
3610 def TEX_UNIFIED_3D_U32_F32
3611   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3612                     Int32Regs:$b, Int32Regs:$a),
3613               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3614                    Float32Regs:$z),
3615               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3616               "[$t, \\{$x, $y, $z, $z\\}];",
3617               []>;
3618 def TEX_UNIFIED_3D_U32_F32_LEVEL
3619   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3620                     Int32Regs:$b, Int32Regs:$a),
3621               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3622                    Float32Regs:$z, Float32Regs:$lod),
3623               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3624               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3625               []>;
3626 def TEX_UNIFIED_3D_U32_F32_GRAD
3627   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3628                     Int32Regs:$b, Int32Regs:$a),
3629               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3630                    Float32Regs:$z,
3631                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3632                    Float32Regs:$gradx2, Float32Regs:$grady0,
3633                    Float32Regs:$grady1, Float32Regs:$grady2),
3634               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3635               "[$t, \\{$x, $y, $z, $z\\}], "
3636               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3637               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3638               []>;
3640 def TEX_UNIFIED_CUBE_F32_F32
3641   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3642                     Float32Regs:$b, Float32Regs:$a),
3643               (ins Int64Regs:$t,
3644                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3645               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3646               "[$t, \\{$x, $y, $z, $z\\}];",
3647               []>;
3648 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3649   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3650                     Float32Regs:$b, Float32Regs:$a),
3651               (ins Int64Regs:$t,
3652                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3653                    Float32Regs:$lod),
3654               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3655               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3656               []>;
3657 def TEX_UNIFIED_CUBE_S32_F32
3658   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3659                     Int32Regs:$b, Int32Regs:$a),
3660               (ins Int64Regs:$t,
3661                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3662               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3663               "[$t, \\{$x, $y, $z, $z\\}];",
3664               []>;
3665 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3666   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3667                     Int32Regs:$b, Int32Regs:$a),
3668               (ins Int64Regs:$t,
3669                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3670                    Float32Regs:$lod),
3671               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3672               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3673               []>;
3674 def TEX_UNIFIED_CUBE_U32_F32
3675   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3676                     Int32Regs:$b, Int32Regs:$a),
3677               (ins Int64Regs:$t,
3678                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3679               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3680               "[$t, \\{$x, $y, $z, $z\\}];",
3681               []>;
3682 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3683   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3684                     Int32Regs:$b, Int32Regs:$a),
3685               (ins Int64Regs:$t,
3686                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3687                    Float32Regs:$lod),
3688               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3689               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3690               []>;
3692 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3693   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3694                     Float32Regs:$b, Float32Regs:$a),
3695               (ins Int64Regs:$t, Int32Regs:$l,
3696                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3697               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3698               "[$t, \\{$l, $x, $y, $z\\}];",
3699               []>;
3700 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3701   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3702                     Float32Regs:$b, Float32Regs:$a),
3703               (ins Int64Regs:$t, Int32Regs:$l,
3704                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3705                    Float32Regs:$lod),
3706               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3707               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3708               []>;
3709 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3710   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3711                     Int32Regs:$b, Int32Regs:$a),
3712               (ins Int64Regs:$t, Int32Regs:$l,
3713                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3714               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3715               "[$t, \\{$l, $x, $y, $z\\}];",
3716               []>;
3717 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3718   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3719                     Int32Regs:$b, Int32Regs:$a),
3720               (ins Int64Regs:$t, Int32Regs:$l,
3721                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3722                    Float32Regs:$lod),
3723               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3724               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3725               []>;
3726 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3727   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3728                     Int32Regs:$b, Int32Regs:$a),
3729               (ins Int64Regs:$t, Int32Regs:$l,
3730                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3731               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3732               "[$t, \\{$l, $x, $y, $z\\}];",
3733               []>;
3734 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3735   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3736                     Int32Regs:$b, Int32Regs:$a),
3737               (ins Int64Regs:$t, Int32Regs:$l,
3738                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3739                    Float32Regs:$lod),
3740               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3741               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3742               []>;
3744 def TLD4_UNIFIED_R_2D_F32_F32
3745   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3746                     Float32Regs:$v2, Float32Regs:$v3),
3747               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3748               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3749               "[$t, \\{$x, $y\\}];",
3750               []>;
3751 def TLD4_UNIFIED_G_2D_F32_F32
3752   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3753                     Float32Regs:$v2, Float32Regs:$v3),
3754               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3755               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3756               "[$t, \\{$x, $y\\}];",
3757               []>;
3758 def TLD4_UNIFIED_B_2D_F32_F32
3759   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3760                     Float32Regs:$v2, Float32Regs:$v3),
3761               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3762               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3763               "[$t, \\{$x, $y\\}];",
3764               []>;
3765 def TLD4_UNIFIED_A_2D_F32_F32
3766   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3767                     Float32Regs:$v2, Float32Regs:$v3),
3768               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3769               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3770               "[$t, \\{$x, $y\\}];",
3771               []>;
3772 def TLD4_UNIFIED_R_2D_S32_F32
3773   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3774                     Int32Regs:$v2, Int32Regs:$v3),
3775               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3776               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3777               "[$t, \\{$x, $y\\}];",
3778               []>;
3779 def TLD4_UNIFIED_G_2D_S32_F32
3780   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3781                     Int32Regs:$v2, Int32Regs:$v3),
3782               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3783               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3784               "[$t, \\{$x, $y\\}];",
3785               []>;
3786 def TLD4_UNIFIED_B_2D_S32_F32
3787   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3788                     Int32Regs:$v2, Int32Regs:$v3),
3789               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3790               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3791               "[$t, \\{$x, $y\\}];",
3792               []>;
3793 def TLD4_UNIFIED_A_2D_S32_F32
3794   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3795                     Int32Regs:$v2, Int32Regs:$v3),
3796               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3797               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3798               "[$t, \\{$x, $y\\}];",
3799               []>;
3800 def TLD4_UNIFIED_R_2D_U32_F32
3801   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3802                     Int32Regs:$v2, Int32Regs:$v3),
3803               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3804               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3805               "[$t, \\{$x, $y\\}];",
3806               []>;
3807 def TLD4_UNIFIED_G_2D_U32_F32
3808   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3809                     Int32Regs:$v2, Int32Regs:$v3),
3810               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3811               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3812               "[$t, \\{$x, $y\\}];",
3813               []>;
3814 def TLD4_UNIFIED_B_2D_U32_F32
3815   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3816                     Int32Regs:$v2, Int32Regs:$v3),
3817               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3818               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3819               "[$t, \\{$x, $y\\}];",
3820               []>;
3821 def TLD4_UNIFIED_A_2D_U32_F32
3822   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3823                     Int32Regs:$v2, Int32Regs:$v3),
3824               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3825               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3826               "[$t, \\{$x, $y\\}];",
3827               []>;
3832 //=== Surface load instructions
3833 // .clamp variant
3834 let IsSuld = true in {
3835 def SULD_1D_I8_CLAMP
3836   : NVPTXInst<(outs Int16Regs:$r),
3837               (ins Int64Regs:$s, Int32Regs:$x),
3838               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3839               []>;
3840 def SULD_1D_I16_CLAMP
3841   : NVPTXInst<(outs Int16Regs:$r),
3842               (ins Int64Regs:$s, Int32Regs:$x),
3843               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3844               []>;
3845 def SULD_1D_I32_CLAMP
3846   : NVPTXInst<(outs Int32Regs:$r),
3847               (ins Int64Regs:$s, Int32Regs:$x),
3848               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3849               []>;
3850 def SULD_1D_I64_CLAMP
3851   : NVPTXInst<(outs Int64Regs:$r),
3852               (ins Int64Regs:$s, Int32Regs:$x),
3853               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3854               []>;
3856 def SULD_1D_ARRAY_I8_CLAMP
3857   : NVPTXInst<(outs Int16Regs:$r),
3858               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3859               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3860               []>;
3861 def SULD_1D_ARRAY_I16_CLAMP
3862   : NVPTXInst<(outs Int16Regs:$r),
3863               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3864               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3865               []>;
3866 def SULD_1D_ARRAY_I32_CLAMP
3867   : NVPTXInst<(outs Int32Regs:$r),
3868               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3869               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3870               []>;
3871 def SULD_1D_ARRAY_I64_CLAMP
3872   : NVPTXInst<(outs Int64Regs:$r),
3873               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3874               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3875               []>;
3877 def SULD_2D_I8_CLAMP
3878   : NVPTXInst<(outs Int16Regs:$r),
3879               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3880               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3881               []>;
3882 def SULD_2D_I16_CLAMP
3883   : NVPTXInst<(outs Int16Regs:$r),
3884               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3885               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3886               []>;
3887 def SULD_2D_I32_CLAMP
3888   : NVPTXInst<(outs Int32Regs:$r),
3889               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3890               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3891               []>;
3892 def SULD_2D_I64_CLAMP
3893   : NVPTXInst<(outs Int64Regs:$r),
3894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3895               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3896               []>;
3898 def SULD_2D_ARRAY_I8_CLAMP
3899   : NVPTXInst<(outs Int16Regs:$r),
3900               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3901               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3902               []>;
3903 def SULD_2D_ARRAY_I16_CLAMP
3904   : NVPTXInst<(outs Int16Regs:$r),
3905               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3906               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3907               []>;
3908 def SULD_2D_ARRAY_I32_CLAMP
3909   : NVPTXInst<(outs Int32Regs:$r),
3910               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3911               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3912               []>;
3913 def SULD_2D_ARRAY_I64_CLAMP
3914   : NVPTXInst<(outs Int64Regs:$r),
3915               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3916               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3917               []>;
3919 def SULD_3D_I8_CLAMP
3920   : NVPTXInst<(outs Int16Regs:$r),
3921               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3922               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3923               []>;
3924 def SULD_3D_I16_CLAMP
3925   : NVPTXInst<(outs Int16Regs:$r),
3926               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3927               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3928               []>;
3929 def SULD_3D_I32_CLAMP
3930   : NVPTXInst<(outs Int32Regs:$r),
3931               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3932               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3933               []>;
3934 def SULD_3D_I64_CLAMP
3935   : NVPTXInst<(outs Int64Regs:$r),
3936               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3937               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3938               []>;
3941 let IsSuld = 2 in {
3942 def SULD_1D_V2I8_CLAMP
3943   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3944               (ins Int64Regs:$s, Int32Regs:$x),
3945               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3946               []>;
3947 def SULD_1D_V2I16_CLAMP
3948   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3949               (ins Int64Regs:$s, Int32Regs:$x),
3950               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3951               []>;
3952 def SULD_1D_V2I32_CLAMP
3953   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3954               (ins Int64Regs:$s, Int32Regs:$x),
3955               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3956               []>;
3957 def SULD_1D_V2I64_CLAMP
3958   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3959               (ins Int64Regs:$s, Int32Regs:$x),
3960               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3961               []>;
3963 def SULD_1D_ARRAY_V2I8_CLAMP
3964   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3965               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3966               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3967               []>;
3968 def SULD_1D_ARRAY_V2I16_CLAMP
3969   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3970               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3971               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3972               []>;
3973 def SULD_1D_ARRAY_V2I32_CLAMP
3974   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3975               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3976               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3977               []>;
3978 def SULD_1D_ARRAY_V2I64_CLAMP
3979   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3980               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3981               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3982               []>;
3984 def SULD_2D_V2I8_CLAMP
3985   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3986               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3987               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3988               []>;
3989 def SULD_2D_V2I16_CLAMP
3990   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3991               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3992               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3993               []>;
3994 def SULD_2D_V2I32_CLAMP
3995   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3996               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3997               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3998               []>;
3999 def SULD_2D_V2I64_CLAMP
4000   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4001               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4002               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4003               []>;
4005 def SULD_2D_ARRAY_V2I8_CLAMP
4006   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4007               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4008               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
4009               "[$s, \\{$l, $x, $y, $y\\}];",
4010               []>;
4011 def SULD_2D_ARRAY_V2I16_CLAMP
4012   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4013               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4014               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
4015               "[$s, \\{$l, $x, $y, $y\\}];",
4016               []>;
4017 def SULD_2D_ARRAY_V2I32_CLAMP
4018   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4019               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4020               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
4021               "[$s, \\{$l, $x, $y, $y\\}];",
4022               []>;
4023 def SULD_2D_ARRAY_V2I64_CLAMP
4024   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4025               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4026               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
4027               "[$s, \\{$l, $x, $y, $y\\}];",
4028               []>;
4030 def SULD_3D_V2I8_CLAMP
4031   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4032               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4033               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4034               []>;
4035 def SULD_3D_V2I16_CLAMP
4036   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4037               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4038               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4039               []>;
4040 def SULD_3D_V2I32_CLAMP
4041   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4042               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4043               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4044               []>;
4045 def SULD_3D_V2I64_CLAMP
4046   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4047               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4048               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4049               []>;
4052 let IsSuld = 3 in {
4053 def SULD_1D_V4I8_CLAMP
4054   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4055               (ins Int64Regs:$s, Int32Regs:$x),
4056               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4057               []>;
4058 def SULD_1D_V4I16_CLAMP
4059   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4060               (ins Int64Regs:$s, Int32Regs:$x),
4061               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4062               []>;
4063 def SULD_1D_V4I32_CLAMP
4064   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4065               (ins Int64Regs:$s, Int32Regs:$x),
4066               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4067               []>;
4069 def SULD_1D_ARRAY_V4I8_CLAMP
4070   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4071               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4072               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4073               "[$s, \\{$l, $x\\}];",
4074               []>;
4075 def SULD_1D_ARRAY_V4I16_CLAMP
4076   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4077               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4078               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4079               "[$s, \\{$l, $x\\}];",
4080               []>;
4081 def SULD_1D_ARRAY_V4I32_CLAMP
4082   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4083               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4084               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4085               "[$s, \\{$l, $x\\}];",
4086               []>;
4088 def SULD_2D_V4I8_CLAMP
4089   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4090               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4091               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4092               []>;
4093 def SULD_2D_V4I16_CLAMP
4094   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4095               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4096               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4097               []>;
4098 def SULD_2D_V4I32_CLAMP
4099   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4100               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4101               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4102               []>;
4104 def SULD_2D_ARRAY_V4I8_CLAMP
4105   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4106               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4107               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4108               "[$s, \\{$l, $x, $y, $y\\}];",
4109               []>;
4110 def SULD_2D_ARRAY_V4I16_CLAMP
4111   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4112               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4113               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4114               "[$s, \\{$l, $x, $y, $y\\}];",
4115               []>;
4116 def SULD_2D_ARRAY_V4I32_CLAMP
4117   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4118               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4119               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4120               "[$s, \\{$l, $x, $y, $y\\}];",
4121               []>;
4124 def SULD_3D_V4I8_CLAMP
4125   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4126               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4127               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4128               "[$s, \\{$x, $y, $z, $z\\}];",
4129               []>;
4130 def SULD_3D_V4I16_CLAMP
4131   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4132               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4133               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4134               "[$s, \\{$x, $y, $z, $z\\}];",
4135               []>;
4136 def SULD_3D_V4I32_CLAMP
4137   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4138               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4139               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4140               "[$s, \\{$x, $y, $z, $z\\}];",
4141               []>;
4145 // .trap variant
4146 let IsSuld = true in {
4147 def SULD_1D_I8_TRAP
4148   : NVPTXInst<(outs Int16Regs:$r),
4149               (ins Int64Regs:$s, Int32Regs:$x),
4150               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
4151               []>;
4152 def SULD_1D_I16_TRAP
4153   : NVPTXInst<(outs Int16Regs:$r),
4154               (ins Int64Regs:$s, Int32Regs:$x),
4155               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
4156               []>;
4157 def SULD_1D_I32_TRAP
4158   : NVPTXInst<(outs Int32Regs:$r),
4159               (ins Int64Regs:$s, Int32Regs:$x),
4160               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
4161               []>;
4162 def SULD_1D_I64_TRAP
4163   : NVPTXInst<(outs Int64Regs:$r),
4164               (ins Int64Regs:$s, Int32Regs:$x),
4165               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
4166               []>;
4168 def SULD_1D_ARRAY_I8_TRAP
4169   : NVPTXInst<(outs Int16Regs:$r),
4170               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4171               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4172               []>;
4173 def SULD_1D_ARRAY_I16_TRAP
4174   : NVPTXInst<(outs Int16Regs:$r),
4175               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4176               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4177               []>;
4178 def SULD_1D_ARRAY_I32_TRAP
4179   : NVPTXInst<(outs Int32Regs:$r),
4180               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4181               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4182               []>;
4183 def SULD_1D_ARRAY_I64_TRAP
4184   : NVPTXInst<(outs Int64Regs:$r),
4185               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4186               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4187               []>;
4189 def SULD_2D_I8_TRAP
4190   : NVPTXInst<(outs Int16Regs:$r),
4191               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4192               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4193               []>;
4194 def SULD_2D_I16_TRAP
4195   : NVPTXInst<(outs Int16Regs:$r),
4196               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4197               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4198               []>;
4199 def SULD_2D_I32_TRAP
4200   : NVPTXInst<(outs Int32Regs:$r),
4201               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4202               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4203               []>;
4204 def SULD_2D_I64_TRAP
4205   : NVPTXInst<(outs Int64Regs:$r),
4206               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4207               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4208               []>;
4210 def SULD_2D_ARRAY_I8_TRAP
4211   : NVPTXInst<(outs Int16Regs:$r),
4212               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4213               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4214               []>;
4215 def SULD_2D_ARRAY_I16_TRAP
4216   : NVPTXInst<(outs Int16Regs:$r),
4217               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4218               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4219               []>;
4220 def SULD_2D_ARRAY_I32_TRAP
4221   : NVPTXInst<(outs Int32Regs:$r),
4222               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4223               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4224               []>;
4225 def SULD_2D_ARRAY_I64_TRAP
4226   : NVPTXInst<(outs Int64Regs:$r),
4227               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4228               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4229               []>;
4231 def SULD_3D_I8_TRAP
4232   : NVPTXInst<(outs Int16Regs:$r),
4233               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4234               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4235               []>;
4236 def SULD_3D_I16_TRAP
4237   : NVPTXInst<(outs Int16Regs:$r),
4238               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4239               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4240               []>;
4241 def SULD_3D_I32_TRAP
4242   : NVPTXInst<(outs Int32Regs:$r),
4243               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4244               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4245               []>;
4246 def SULD_3D_I64_TRAP
4247   : NVPTXInst<(outs Int64Regs:$r),
4248               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4249               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4250               []>;
4253 let IsSuld = 2 in {
4254 def SULD_1D_V2I8_TRAP
4255   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4256               (ins Int64Regs:$s, Int32Regs:$x),
4257               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4258               []>;
4259 def SULD_1D_V2I16_TRAP
4260   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4261               (ins Int64Regs:$s, Int32Regs:$x),
4262               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4263               []>;
4264 def SULD_1D_V2I32_TRAP
4265   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4266               (ins Int64Regs:$s, Int32Regs:$x),
4267               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4268               []>;
4269 def SULD_1D_V2I64_TRAP
4270   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4271               (ins Int64Regs:$s, Int32Regs:$x),
4272               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4273               []>;
4275 def SULD_1D_ARRAY_V2I8_TRAP
4276   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4277               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4278               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4279               []>;
4280 def SULD_1D_ARRAY_V2I16_TRAP
4281   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4282               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4283               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4284               []>;
4285 def SULD_1D_ARRAY_V2I32_TRAP
4286   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4287               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4288               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4289               []>;
4290 def SULD_1D_ARRAY_V2I64_TRAP
4291   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4292               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4293               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4294               []>;
4296 def SULD_2D_V2I8_TRAP
4297   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4298               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4299               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4300               []>;
4301 def SULD_2D_V2I16_TRAP
4302   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4303               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4304               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4305               []>;
4306 def SULD_2D_V2I32_TRAP
4307   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4308               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4309               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4310               []>;
4311 def SULD_2D_V2I64_TRAP
4312   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4313               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4314               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4315               []>;
4317 def SULD_2D_ARRAY_V2I8_TRAP
4318   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4319               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4320               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4321               "[$s, \\{$l, $x, $y, $y\\}];",
4322               []>;
4323 def SULD_2D_ARRAY_V2I16_TRAP
4324   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4325               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4326               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4327               "[$s, \\{$l, $x, $y, $y\\}];",
4328               []>;
4329 def SULD_2D_ARRAY_V2I32_TRAP
4330   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4331               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4332               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4333               "[$s, \\{$l, $x, $y, $y\\}];",
4334               []>;
4335 def SULD_2D_ARRAY_V2I64_TRAP
4336   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4337               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4338               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4339               "[$s, \\{$l, $x, $y, $y\\}];",
4340               []>;
4342 def SULD_3D_V2I8_TRAP
4343   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4344               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4345               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4346               []>;
4347 def SULD_3D_V2I16_TRAP
4348   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4349               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4350               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4351               []>;
4352 def SULD_3D_V2I32_TRAP
4353   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4354               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4355               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4356               []>;
4357 def SULD_3D_V2I64_TRAP
4358   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4359               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4360               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4361               []>;
4364 let IsSuld = 3 in {
4365 def SULD_1D_V4I8_TRAP
4366   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4367               (ins Int64Regs:$s, Int32Regs:$x),
4368               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4369               []>;
4370 def SULD_1D_V4I16_TRAP
4371   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4372               (ins Int64Regs:$s, Int32Regs:$x),
4373               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4374               []>;
4375 def SULD_1D_V4I32_TRAP
4376   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4377               (ins Int64Regs:$s, Int32Regs:$x),
4378               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4379               []>;
4381 def SULD_1D_ARRAY_V4I8_TRAP
4382   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4383               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4384               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4385               "[$s, \\{$l, $x\\}];",
4386               []>;
4387 def SULD_1D_ARRAY_V4I16_TRAP
4388   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4389               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4390               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4391               "[$s, \\{$l, $x\\}];",
4392               []>;
4393 def SULD_1D_ARRAY_V4I32_TRAP
4394   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4395               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4396               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4397               "[$s, \\{$l, $x\\}];",
4398               []>;
4400 def SULD_2D_V4I8_TRAP
4401   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4402               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4403               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4404               []>;
4405 def SULD_2D_V4I16_TRAP
4406   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4407               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4408               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4409               []>;
4410 def SULD_2D_V4I32_TRAP
4411   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4412               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4413               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4414               []>;
4416 def SULD_2D_ARRAY_V4I8_TRAP
4417   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4418               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4419               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4420               "[$s, \\{$l, $x, $y, $y\\}];",
4421               []>;
4422 def SULD_2D_ARRAY_V4I16_TRAP
4423   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4424               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4425               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4426               "[$s, \\{$l, $x, $y, $y\\}];",
4427               []>;
4428 def SULD_2D_ARRAY_V4I32_TRAP
4429   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4430               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4431               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4432               "[$s, \\{$l, $x, $y, $y\\}];",
4433               []>;
4436 def SULD_3D_V4I8_TRAP
4437   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4438               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4439               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4440               "[$s, \\{$x, $y, $z, $z\\}];",
4441               []>;
4442 def SULD_3D_V4I16_TRAP
4443   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4444               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4446               "[$s, \\{$x, $y, $z, $z\\}];",
4447               []>;
4448 def SULD_3D_V4I32_TRAP
4449   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4450               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4451               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4452               "[$s, \\{$x, $y, $z, $z\\}];",
4453               []>;
4456 // .zero variant
4457 let IsSuld = true in {
4458 def SULD_1D_I8_ZERO
4459   : NVPTXInst<(outs Int16Regs:$r),
4460               (ins Int64Regs:$s, Int32Regs:$x),
4461               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4462               []>;
4463 def SULD_1D_I16_ZERO
4464   : NVPTXInst<(outs Int16Regs:$r),
4465               (ins Int64Regs:$s, Int32Regs:$x),
4466               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4467               []>;
4468 def SULD_1D_I32_ZERO
4469   : NVPTXInst<(outs Int32Regs:$r),
4470               (ins Int64Regs:$s, Int32Regs:$x),
4471               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4472               []>;
4473 def SULD_1D_I64_ZERO
4474   : NVPTXInst<(outs Int64Regs:$r),
4475               (ins Int64Regs:$s, Int32Regs:$x),
4476               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4477               []>;
4479 def SULD_1D_ARRAY_I8_ZERO
4480   : NVPTXInst<(outs Int16Regs:$r),
4481               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4482               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4483               []>;
4484 def SULD_1D_ARRAY_I16_ZERO
4485   : NVPTXInst<(outs Int16Regs:$r),
4486               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4487               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4488               []>;
4489 def SULD_1D_ARRAY_I32_ZERO
4490   : NVPTXInst<(outs Int32Regs:$r),
4491               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4492               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4493               []>;
4494 def SULD_1D_ARRAY_I64_ZERO
4495   : NVPTXInst<(outs Int64Regs:$r),
4496               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4497               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4498               []>;
4500 def SULD_2D_I8_ZERO
4501   : NVPTXInst<(outs Int16Regs:$r),
4502               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4503               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4504               []>;
4505 def SULD_2D_I16_ZERO
4506   : NVPTXInst<(outs Int16Regs:$r),
4507               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4508               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4509               []>;
4510 def SULD_2D_I32_ZERO
4511   : NVPTXInst<(outs Int32Regs:$r),
4512               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4513               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4514               []>;
4515 def SULD_2D_I64_ZERO
4516   : NVPTXInst<(outs Int64Regs:$r),
4517               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4518               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4519               []>;
4521 def SULD_2D_ARRAY_I8_ZERO
4522   : NVPTXInst<(outs Int16Regs:$r),
4523               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4524               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4525               []>;
4526 def SULD_2D_ARRAY_I16_ZERO
4527   : NVPTXInst<(outs Int16Regs:$r),
4528               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4529               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4530               []>;
4531 def SULD_2D_ARRAY_I32_ZERO
4532   : NVPTXInst<(outs Int32Regs:$r),
4533               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4534               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4535               []>;
4536 def SULD_2D_ARRAY_I64_ZERO
4537   : NVPTXInst<(outs Int64Regs:$r),
4538               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4539               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4540               []>;
4542 def SULD_3D_I8_ZERO
4543   : NVPTXInst<(outs Int16Regs:$r),
4544               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4545               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4546               []>;
4547 def SULD_3D_I16_ZERO
4548   : NVPTXInst<(outs Int16Regs:$r),
4549               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4550               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4551               []>;
4552 def SULD_3D_I32_ZERO
4553   : NVPTXInst<(outs Int32Regs:$r),
4554               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4555               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4556               []>;
4557 def SULD_3D_I64_ZERO
4558   : NVPTXInst<(outs Int64Regs:$r),
4559               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4560               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4561               []>;
4564 let IsSuld = 2 in {
4565 def SULD_1D_V2I8_ZERO
4566   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4567               (ins Int64Regs:$s, Int32Regs:$x),
4568               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4569               []>;
4570 def SULD_1D_V2I16_ZERO
4571   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4572               (ins Int64Regs:$s, Int32Regs:$x),
4573               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4574               []>;
4575 def SULD_1D_V2I32_ZERO
4576   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4577               (ins Int64Regs:$s, Int32Regs:$x),
4578               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4579               []>;
4580 def SULD_1D_V2I64_ZERO
4581   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4582               (ins Int64Regs:$s, Int32Regs:$x),
4583               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4584               []>;
4586 def SULD_1D_ARRAY_V2I8_ZERO
4587   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4588               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4589               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4590               []>;
4591 def SULD_1D_ARRAY_V2I16_ZERO
4592   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4593               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4594               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4595               []>;
4596 def SULD_1D_ARRAY_V2I32_ZERO
4597   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4598               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4599               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4600               []>;
4601 def SULD_1D_ARRAY_V2I64_ZERO
4602   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4603               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4604               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4605               []>;
4607 def SULD_2D_V2I8_ZERO
4608   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4609               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4610               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4611               []>;
4612 def SULD_2D_V2I16_ZERO
4613   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4614               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4615               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4616               []>;
4617 def SULD_2D_V2I32_ZERO
4618   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4619               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4620               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4621               []>;
4622 def SULD_2D_V2I64_ZERO
4623   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4624               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4625               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4626               []>;
4628 def SULD_2D_ARRAY_V2I8_ZERO
4629   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4630               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4631               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4632               "[$s, \\{$l, $x, $y, $y\\}];",
4633               []>;
4634 def SULD_2D_ARRAY_V2I16_ZERO
4635   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4636               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4637               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4638               "[$s, \\{$l, $x, $y, $y\\}];",
4639               []>;
4640 def SULD_2D_ARRAY_V2I32_ZERO
4641   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4642               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4643               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4644               "[$s, \\{$l, $x, $y, $y\\}];",
4645               []>;
4646 def SULD_2D_ARRAY_V2I64_ZERO
4647   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4648               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4649               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4650               "[$s, \\{$l, $x, $y, $y\\}];",
4651               []>;
4653 def SULD_3D_V2I8_ZERO
4654   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4655               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4656               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4657               []>;
4658 def SULD_3D_V2I16_ZERO
4659   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4660               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4661               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4662               []>;
4663 def SULD_3D_V2I32_ZERO
4664   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4665               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4666               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4667               []>;
4668 def SULD_3D_V2I64_ZERO
4669   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4670               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4671               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4672               []>;
4675 let IsSuld = 3 in {
4676 def SULD_1D_V4I8_ZERO
4677   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4678               (ins Int64Regs:$s, Int32Regs:$x),
4679               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4680               []>;
4681 def SULD_1D_V4I16_ZERO
4682   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4683               (ins Int64Regs:$s, Int32Regs:$x),
4684               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4685               []>;
4686 def SULD_1D_V4I32_ZERO
4687   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4688               (ins Int64Regs:$s, Int32Regs:$x),
4689               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4690               []>;
4692 def SULD_1D_ARRAY_V4I8_ZERO
4693   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4694               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4695               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4696               "[$s, \\{$l, $x\\}];",
4697               []>;
4698 def SULD_1D_ARRAY_V4I16_ZERO
4699   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4700               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4701               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4702               "[$s, \\{$l, $x\\}];",
4703               []>;
4704 def SULD_1D_ARRAY_V4I32_ZERO
4705   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4706               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4707               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4708               "[$s, \\{$l, $x\\}];",
4709               []>;
4711 def SULD_2D_V4I8_ZERO
4712   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4713               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4714               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4715               []>;
4716 def SULD_2D_V4I16_ZERO
4717   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4718               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4719               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4720               []>;
4721 def SULD_2D_V4I32_ZERO
4722   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4723               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4724               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4725               []>;
4727 def SULD_2D_ARRAY_V4I8_ZERO
4728   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4729               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4730               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4731               "[$s, \\{$l, $x, $y, $y\\}];",
4732               []>;
4733 def SULD_2D_ARRAY_V4I16_ZERO
4734   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4735               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4736               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4737               "[$s, \\{$l, $x, $y, $y\\}];",
4738               []>;
4739 def SULD_2D_ARRAY_V4I32_ZERO
4740   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4741               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4742               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4743               "[$s, \\{$l, $x, $y, $y\\}];",
4744               []>;
4747 def SULD_3D_V4I8_ZERO
4748   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4749               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4750               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4751               "[$s, \\{$x, $y, $z, $z\\}];",
4752               []>;
4753 def SULD_3D_V4I16_ZERO
4754   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4755               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4756               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4757               "[$s, \\{$x, $y, $z, $z\\}];",
4758               []>;
4759 def SULD_3D_V4I32_ZERO
4760   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4761               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4762               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4763               "[$s, \\{$x, $y, $z, $z\\}];",
4764               []>;
4767 //-----------------------------------
4768 // Texture Query Intrinsics
4769 //-----------------------------------
4771 let IsSurfTexQuery = true in {
4772 def TXQ_CHANNEL_ORDER
4773   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4774               "txq.channel_order.b32 \t$d, [$a];",
4775               []>;
4776 def TXQ_CHANNEL_DATA_TYPE
4777   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4778               "txq.channel_data_type.b32 \t$d, [$a];",
4779               []>;
4780 def TXQ_WIDTH
4781   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4782               "txq.width.b32 \t$d, [$a];",
4783               []>;
4784 def TXQ_HEIGHT
4785   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4786               "txq.height.b32 \t$d, [$a];",
4787               []>;
4788 def TXQ_DEPTH
4789   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4790               "txq.depth.b32 \t$d, [$a];",
4791               []>;
4792 def TXQ_ARRAY_SIZE
4793   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4794               "txq.array_size.b32 \t$d, [$a];",
4795               []>;
4796 def TXQ_NUM_SAMPLES
4797   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4798               "txq.num_samples.b32 \t$d, [$a];",
4799               []>;
4800 def TXQ_NUM_MIPMAP_LEVELS
4801   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4802               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4803               []>;
4806 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4807           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4808 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4809           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4810 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4811           (TXQ_WIDTH Int64Regs:$a)>;
4812 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4813           (TXQ_HEIGHT Int64Regs:$a)>;
4814 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4815           (TXQ_DEPTH Int64Regs:$a)>;
4816 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4817           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4818 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4819           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4820 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4821           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4824 //-----------------------------------
4825 // Surface Query Intrinsics
4826 //-----------------------------------
4828 let IsSurfTexQuery = true in {
4829 def SUQ_CHANNEL_ORDER
4830   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4831               "suq.channel_order.b32 \t$d, [$a];",
4832               []>;
4833 def SUQ_CHANNEL_DATA_TYPE
4834   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4835               "suq.channel_data_type.b32 \t$d, [$a];",
4836               []>;
4837 def SUQ_WIDTH
4838   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4839               "suq.width.b32 \t$d, [$a];",
4840               []>;
4841 def SUQ_HEIGHT
4842   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4843               "suq.height.b32 \t$d, [$a];",
4844               []>;
4845 def SUQ_DEPTH
4846   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4847               "suq.depth.b32 \t$d, [$a];",
4848               []>;
4849 def SUQ_ARRAY_SIZE
4850   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4851               "suq.array_size.b32 \t$d, [$a];",
4852               []>;
4855 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4856           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4857 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4858           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4859 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4860           (SUQ_WIDTH Int64Regs:$a)>;
4861 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4862           (SUQ_HEIGHT Int64Regs:$a)>;
4863 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4864           (SUQ_DEPTH Int64Regs:$a)>;
4865 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4866           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4869 //===- Handle Query -------------------------------------------------------===//
4871 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4872 def ISTYPEP_SAMPLER
4873   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4874               "istypep.samplerref \t$d, $a;",
4875               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4876 def ISTYPEP_SURFACE
4877   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4878               "istypep.surfref \t$d, $a;",
4879               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4880 def ISTYPEP_TEXTURE
4881   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4882               "istypep.texref \t$d, $a;",
4883               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4885 //===- Surface Stores -----------------------------------------------------===//
4887 let IsSust = true in {
4888 // Unformatted
4889 // .clamp variant
4890 def SUST_B_1D_B8_CLAMP
4891   : NVPTXInst<(outs),
4892               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4893               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4894               []>;
4895 def SUST_B_1D_B16_CLAMP
4896   : NVPTXInst<(outs),
4897               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4898               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4899               []>;
4900 def SUST_B_1D_B32_CLAMP
4901   : NVPTXInst<(outs),
4902               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4903               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4904               []>;
4905 def SUST_B_1D_B64_CLAMP
4906   : NVPTXInst<(outs),
4907               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4908               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4909               []>;
4910 def SUST_B_1D_V2B8_CLAMP
4911   : NVPTXInst<(outs),
4912               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4913               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4914               []>;
4915 def SUST_B_1D_V2B16_CLAMP
4916   : NVPTXInst<(outs),
4917               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4918               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4919               []>;
4920 def SUST_B_1D_V2B32_CLAMP
4921   : NVPTXInst<(outs),
4922               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4923               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4924               []>;
4925 def SUST_B_1D_V2B64_CLAMP
4926   : NVPTXInst<(outs),
4927               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4928               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4929               []>;
4930 def SUST_B_1D_V4B8_CLAMP
4931   : NVPTXInst<(outs),
4932               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4933                    Int16Regs:$b, Int16Regs:$a),
4934               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4935               []>;
4936 def SUST_B_1D_V4B16_CLAMP
4937   : NVPTXInst<(outs),
4938               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4939                    Int16Regs:$b, Int16Regs:$a),
4940               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4941               []>;
4942 def SUST_B_1D_V4B32_CLAMP
4943   : NVPTXInst<(outs),
4944               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4945                    Int32Regs:$b, Int32Regs:$a),
4946               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4947               []>;
4950 def SUST_B_1D_ARRAY_B8_CLAMP
4951   : NVPTXInst<(outs),
4952               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4953               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4954               []>;
4955 def SUST_B_1D_ARRAY_B16_CLAMP
4956   : NVPTXInst<(outs),
4957               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4958               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4959               []>;
4960 def SUST_B_1D_ARRAY_B32_CLAMP
4961   : NVPTXInst<(outs),
4962               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4963               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4964               []>;
4965 def SUST_B_1D_ARRAY_B64_CLAMP
4966   : NVPTXInst<(outs),
4967               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4968               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4969               []>;
4970 def SUST_B_1D_ARRAY_V2B8_CLAMP
4971   : NVPTXInst<(outs),
4972               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4973                    Int16Regs:$g),
4974               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4975               []>;
4976 def SUST_B_1D_ARRAY_V2B16_CLAMP
4977   : NVPTXInst<(outs),
4978               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4979                    Int16Regs:$g),
4980               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4981               []>;
4982 def SUST_B_1D_ARRAY_V2B32_CLAMP
4983   : NVPTXInst<(outs),
4984               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4985                    Int32Regs:$g),
4986               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4987               []>;
4988 def SUST_B_1D_ARRAY_V2B64_CLAMP
4989   : NVPTXInst<(outs),
4990               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4991                    Int64Regs:$g),
4992               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4993               []>;
4994 def SUST_B_1D_ARRAY_V4B8_CLAMP
4995   : NVPTXInst<(outs),
4996               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4997                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4998               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4999               "\\{$r, $g, $b, $a\\};",
5000               []>;
5001 def SUST_B_1D_ARRAY_V4B16_CLAMP
5002   : NVPTXInst<(outs),
5003               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5004                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5005              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
5006              "\\{$r, $g, $b, $a\\};",
5007               []>;
5008 def SUST_B_1D_ARRAY_V4B32_CLAMP
5009   : NVPTXInst<(outs),
5010               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5011                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5012              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
5013              "\\{$r, $g, $b, $a\\};",
5014               []>;
5017 def SUST_B_2D_B8_CLAMP
5018   : NVPTXInst<(outs),
5019               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5020               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5021               []>;
5022 def SUST_B_2D_B16_CLAMP
5023   : NVPTXInst<(outs),
5024               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5025               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5026               []>;
5027 def SUST_B_2D_B32_CLAMP
5028   : NVPTXInst<(outs),
5029               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5030               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5031               []>;
5032 def SUST_B_2D_B64_CLAMP
5033   : NVPTXInst<(outs),
5034               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5035               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5036               []>;
5037 def SUST_B_2D_V2B8_CLAMP
5038   : NVPTXInst<(outs),
5039               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5040                    Int16Regs:$g),
5041               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5042               []>;
5043 def SUST_B_2D_V2B16_CLAMP
5044   : NVPTXInst<(outs),
5045               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5046                    Int16Regs:$g),
5047               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5048               []>;
5049 def SUST_B_2D_V2B32_CLAMP
5050   : NVPTXInst<(outs),
5051               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5052                    Int32Regs:$g),
5053               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5054               []>;
5055 def SUST_B_2D_V2B64_CLAMP
5056   : NVPTXInst<(outs),
5057               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5058                    Int64Regs:$g),
5059               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5060               []>;
5061 def SUST_B_2D_V4B8_CLAMP
5062   : NVPTXInst<(outs),
5063               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5064                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5065               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
5066               "\\{$r, $g, $b, $a\\};",
5067               []>;
5068 def SUST_B_2D_V4B16_CLAMP
5069   : NVPTXInst<(outs),
5070               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5071                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5072              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
5073              "\\{$r, $g, $b, $a\\};",
5074               []>;
5075 def SUST_B_2D_V4B32_CLAMP
5076   : NVPTXInst<(outs),
5077               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5078                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5079              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
5080              "\\{$r, $g, $b, $a\\};",
5081               []>;
5084 def SUST_B_2D_ARRAY_B8_CLAMP
5085   : NVPTXInst<(outs),
5086               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5087                    Int16Regs:$r),
5088               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5089               []>;
5090 def SUST_B_2D_ARRAY_B16_CLAMP
5091   : NVPTXInst<(outs),
5092               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5093                    Int16Regs:$r),
5094               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5095               []>;
5096 def SUST_B_2D_ARRAY_B32_CLAMP
5097   : NVPTXInst<(outs),
5098               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5099                    Int32Regs:$r),
5100               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5101               []>;
5102 def SUST_B_2D_ARRAY_B64_CLAMP
5103   : NVPTXInst<(outs),
5104               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5105                    Int64Regs:$r),
5106               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5107               []>;
5108 def SUST_B_2D_ARRAY_V2B8_CLAMP
5109   : NVPTXInst<(outs),
5110               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5111                    Int16Regs:$r, Int16Regs:$g),
5112               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5113               "\\{$r, $g\\};",
5114               []>;
5115 def SUST_B_2D_ARRAY_V2B16_CLAMP
5116   : NVPTXInst<(outs),
5117               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5118                    Int16Regs:$r, Int16Regs:$g),
5119              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5120              "\\{$r, $g\\};",
5121               []>;
5122 def SUST_B_2D_ARRAY_V2B32_CLAMP
5123   : NVPTXInst<(outs),
5124               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5125                    Int32Regs:$r, Int32Regs:$g),
5126              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5127              "\\{$r, $g\\};",
5128               []>;
5129 def SUST_B_2D_ARRAY_V2B64_CLAMP
5130   : NVPTXInst<(outs),
5131               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5132                    Int64Regs:$r, Int64Regs:$g),
5133              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5134              "\\{$r, $g\\};",
5135               []>;
5136 def SUST_B_2D_ARRAY_V4B8_CLAMP
5137   : NVPTXInst<(outs),
5138               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5139                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5140       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5141       "\\{$r, $g, $b, $a\\};",
5142               []>;
5143 def SUST_B_2D_ARRAY_V4B16_CLAMP
5144   : NVPTXInst<(outs),
5145               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5146                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5147      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5148      "\\{$r, $g, $b, $a\\};",
5149               []>;
5150 def SUST_B_2D_ARRAY_V4B32_CLAMP
5151   : NVPTXInst<(outs),
5152               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5153                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5154      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5155      "\\{$r, $g, $b, $a\\};",
5156               []>;
5159 def SUST_B_3D_B8_CLAMP
5160   : NVPTXInst<(outs),
5161               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5162                    Int16Regs:$r),
5163               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5164               []>;
5165 def SUST_B_3D_B16_CLAMP
5166   : NVPTXInst<(outs),
5167               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5168                    Int16Regs:$r),
5169               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5170               []>;
5171 def SUST_B_3D_B32_CLAMP
5172   : NVPTXInst<(outs),
5173               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5174                    Int32Regs:$r),
5175               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5176               []>;
5177 def SUST_B_3D_B64_CLAMP
5178   : NVPTXInst<(outs),
5179               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5180                    Int64Regs:$r),
5181               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5182               []>;
5183 def SUST_B_3D_V2B8_CLAMP
5184   : NVPTXInst<(outs),
5185               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5186                    Int16Regs:$r, Int16Regs:$g),
5187               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5188               "\\{$r, $g\\};",
5189               []>;
5190 def SUST_B_3D_V2B16_CLAMP
5191   : NVPTXInst<(outs),
5192               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5193                    Int16Regs:$r, Int16Regs:$g),
5194               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5195               "\\{$r, $g\\};",
5196               []>;
5197 def SUST_B_3D_V2B32_CLAMP
5198   : NVPTXInst<(outs),
5199               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5200                    Int32Regs:$r, Int32Regs:$g),
5201               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5202               "\\{$r, $g\\};",
5203               []>;
5204 def SUST_B_3D_V2B64_CLAMP
5205   : NVPTXInst<(outs),
5206               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5207                    Int64Regs:$r, Int64Regs:$g),
5208               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5209               "\\{$r, $g\\};",
5210               []>;
5211 def SUST_B_3D_V4B8_CLAMP
5212   : NVPTXInst<(outs),
5213               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5214                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5215          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5216          "\\{$r, $g, $b, $a\\};",
5217               []>;
5218 def SUST_B_3D_V4B16_CLAMP
5219   : NVPTXInst<(outs),
5220               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5221                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5222         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5223         "\\{$r, $g, $b, $a\\};",
5224               []>;
5225 def SUST_B_3D_V4B32_CLAMP
5226   : NVPTXInst<(outs),
5227               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5228                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5229         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5230         "\\{$r, $g, $b, $a\\};",
5231               []>;
5234 // .trap variant
5235 def SUST_B_1D_B8_TRAP
5236   : NVPTXInst<(outs),
5237               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5238               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5239               []>;
5240 def SUST_B_1D_B16_TRAP
5241   : NVPTXInst<(outs),
5242               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5243               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5244               []>;
5245 def SUST_B_1D_B32_TRAP
5246   : NVPTXInst<(outs),
5247               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5248               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5249               []>;
5250 def SUST_B_1D_B64_TRAP
5251   : NVPTXInst<(outs),
5252               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5253               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5254               []>;
5255 def SUST_B_1D_V2B8_TRAP
5256   : NVPTXInst<(outs),
5257               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5258               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5259               []>;
5260 def SUST_B_1D_V2B16_TRAP
5261   : NVPTXInst<(outs),
5262               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5263               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5264               []>;
5265 def SUST_B_1D_V2B32_TRAP
5266   : NVPTXInst<(outs),
5267               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5268               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5269               []>;
5270 def SUST_B_1D_V2B64_TRAP
5271   : NVPTXInst<(outs),
5272               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5273               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5274               []>;
5275 def SUST_B_1D_V4B8_TRAP
5276   : NVPTXInst<(outs),
5277               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5278                    Int16Regs:$b, Int16Regs:$a),
5279               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5280               []>;
5281 def SUST_B_1D_V4B16_TRAP
5282   : NVPTXInst<(outs),
5283               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5284                    Int16Regs:$b, Int16Regs:$a),
5285               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5286               []>;
5287 def SUST_B_1D_V4B32_TRAP
5288   : NVPTXInst<(outs),
5289               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5290                    Int32Regs:$b, Int32Regs:$a),
5291               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5292               []>;
5295 def SUST_B_1D_ARRAY_B8_TRAP
5296   : NVPTXInst<(outs),
5297               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5298               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5299               []>;
5300 def SUST_B_1D_ARRAY_B16_TRAP
5301   : NVPTXInst<(outs),
5302               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5303               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5304               []>;
5305 def SUST_B_1D_ARRAY_B32_TRAP
5306   : NVPTXInst<(outs),
5307               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5308               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5309               []>;
5310 def SUST_B_1D_ARRAY_B64_TRAP
5311   : NVPTXInst<(outs),
5312               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5313               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5314               []>;
5315 def SUST_B_1D_ARRAY_V2B8_TRAP
5316   : NVPTXInst<(outs),
5317               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5318                    Int16Regs:$g),
5319               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5320               []>;
5321 def SUST_B_1D_ARRAY_V2B16_TRAP
5322   : NVPTXInst<(outs),
5323               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5324                    Int16Regs:$g),
5325               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5326               []>;
5327 def SUST_B_1D_ARRAY_V2B32_TRAP
5328   : NVPTXInst<(outs),
5329               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5330                    Int32Regs:$g),
5331               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5332               []>;
5333 def SUST_B_1D_ARRAY_V2B64_TRAP
5334   : NVPTXInst<(outs),
5335               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5336                    Int64Regs:$g),
5337               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5338               []>;
5339 def SUST_B_1D_ARRAY_V4B8_TRAP
5340   : NVPTXInst<(outs),
5341               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5342                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5343               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5344               "\\{$r, $g, $b, $a\\};",
5345               []>;
5346 def SUST_B_1D_ARRAY_V4B16_TRAP
5347   : NVPTXInst<(outs),
5348               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5349                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5350              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5351              "\\{$r, $g, $b, $a\\};",
5352               []>;
5353 def SUST_B_1D_ARRAY_V4B32_TRAP
5354   : NVPTXInst<(outs),
5355               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5356                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5357              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5358              "\\{$r, $g, $b, $a\\};",
5359               []>;
5362 def SUST_B_2D_B8_TRAP
5363   : NVPTXInst<(outs),
5364               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5365               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5366               []>;
5367 def SUST_B_2D_B16_TRAP
5368   : NVPTXInst<(outs),
5369               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5370               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5371               []>;
5372 def SUST_B_2D_B32_TRAP
5373   : NVPTXInst<(outs),
5374               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5375               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5376               []>;
5377 def SUST_B_2D_B64_TRAP
5378   : NVPTXInst<(outs),
5379               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5380               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5381               []>;
5382 def SUST_B_2D_V2B8_TRAP
5383   : NVPTXInst<(outs),
5384               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5385                    Int16Regs:$g),
5386               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5387               []>;
5388 def SUST_B_2D_V2B16_TRAP
5389   : NVPTXInst<(outs),
5390               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5391                    Int16Regs:$g),
5392               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5393               []>;
5394 def SUST_B_2D_V2B32_TRAP
5395   : NVPTXInst<(outs),
5396               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5397                    Int32Regs:$g),
5398               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5399               []>;
5400 def SUST_B_2D_V2B64_TRAP
5401   : NVPTXInst<(outs),
5402               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5403                    Int64Regs:$g),
5404               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5405               []>;
5406 def SUST_B_2D_V4B8_TRAP
5407   : NVPTXInst<(outs),
5408               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5409                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5410               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5411               "\\{$r, $g, $b, $a\\};",
5412               []>;
5413 def SUST_B_2D_V4B16_TRAP
5414   : NVPTXInst<(outs),
5415               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5416                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5417              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5418              "\\{$r, $g, $b, $a\\};",
5419               []>;
5420 def SUST_B_2D_V4B32_TRAP
5421   : NVPTXInst<(outs),
5422               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5423                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5424              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5425              "\\{$r, $g, $b, $a\\};",
5426               []>;
5429 def SUST_B_2D_ARRAY_B8_TRAP
5430   : NVPTXInst<(outs),
5431               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5432                    Int16Regs:$r),
5433               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5434               []>;
5435 def SUST_B_2D_ARRAY_B16_TRAP
5436   : NVPTXInst<(outs),
5437               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5438                    Int16Regs:$r),
5439               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5440               []>;
5441 def SUST_B_2D_ARRAY_B32_TRAP
5442   : NVPTXInst<(outs),
5443               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5444                    Int32Regs:$r),
5445               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5446               []>;
5447 def SUST_B_2D_ARRAY_B64_TRAP
5448   : NVPTXInst<(outs),
5449               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5450                    Int64Regs:$r),
5451               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5452               []>;
5453 def SUST_B_2D_ARRAY_V2B8_TRAP
5454   : NVPTXInst<(outs),
5455               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5456                    Int16Regs:$r, Int16Regs:$g),
5457               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5458               "\\{$r, $g\\};",
5459               []>;
5460 def SUST_B_2D_ARRAY_V2B16_TRAP
5461   : NVPTXInst<(outs),
5462               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5463                    Int16Regs:$r, Int16Regs:$g),
5464              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5465              "\\{$r, $g\\};",
5466               []>;
5467 def SUST_B_2D_ARRAY_V2B32_TRAP
5468   : NVPTXInst<(outs),
5469               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5470                    Int32Regs:$r, Int32Regs:$g),
5471              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5472              "\\{$r, $g\\};",
5473               []>;
5474 def SUST_B_2D_ARRAY_V2B64_TRAP
5475   : NVPTXInst<(outs),
5476               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5477                    Int64Regs:$r, Int64Regs:$g),
5478              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5479              "\\{$r, $g\\};",
5480               []>;
5481 def SUST_B_2D_ARRAY_V4B8_TRAP
5482   : NVPTXInst<(outs),
5483               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5484                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5485       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5486       "\\{$r, $g, $b, $a\\};",
5487               []>;
5488 def SUST_B_2D_ARRAY_V4B16_TRAP
5489   : NVPTXInst<(outs),
5490               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5491                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5492      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5493      "\\{$r, $g, $b, $a\\};",
5494               []>;
5495 def SUST_B_2D_ARRAY_V4B32_TRAP
5496   : NVPTXInst<(outs),
5497               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5498                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5499      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5500      "\\{$r, $g, $b, $a\\};",
5501               []>;
5504 def SUST_B_3D_B8_TRAP
5505   : NVPTXInst<(outs),
5506               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5507                    Int16Regs:$r),
5508               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5509               []>;
5510 def SUST_B_3D_B16_TRAP
5511   : NVPTXInst<(outs),
5512               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5513                    Int16Regs:$r),
5514               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5515               []>;
5516 def SUST_B_3D_B32_TRAP
5517   : NVPTXInst<(outs),
5518               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5519                    Int32Regs:$r),
5520               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5521               []>;
5522 def SUST_B_3D_B64_TRAP
5523   : NVPTXInst<(outs),
5524               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5525                    Int64Regs:$r),
5526               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5527               []>;
5528 def SUST_B_3D_V2B8_TRAP
5529   : NVPTXInst<(outs),
5530               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5531                    Int16Regs:$r, Int16Regs:$g),
5532               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5533               "\\{$r, $g\\};",
5534               []>;
5535 def SUST_B_3D_V2B16_TRAP
5536   : NVPTXInst<(outs),
5537               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5538                    Int16Regs:$r, Int16Regs:$g),
5539               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5540               "\\{$r, $g\\};",
5541               []>;
5542 def SUST_B_3D_V2B32_TRAP
5543   : NVPTXInst<(outs),
5544               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5545                    Int32Regs:$r, Int32Regs:$g),
5546               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5547               "\\{$r, $g\\};",
5548               []>;
5549 def SUST_B_3D_V2B64_TRAP
5550   : NVPTXInst<(outs),
5551               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5552                    Int64Regs:$r, Int64Regs:$g),
5553               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5554               "\\{$r, $g\\};",
5555               []>;
5556 def SUST_B_3D_V4B8_TRAP
5557   : NVPTXInst<(outs),
5558               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5559                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5560          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5561          "\\{$r, $g, $b, $a\\};",
5562               []>;
5563 def SUST_B_3D_V4B16_TRAP
5564   : NVPTXInst<(outs),
5565               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5566                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5567         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5568         "\\{$r, $g, $b, $a\\};",
5569               []>;
5570 def SUST_B_3D_V4B32_TRAP
5571   : NVPTXInst<(outs),
5572               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5573                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5574         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5575         "\\{$r, $g, $b, $a\\};",
5576               []>;
5579 // .zero variant
5580 def SUST_B_1D_B8_ZERO
5581   : NVPTXInst<(outs),
5582               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5583               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5584               []>;
5585 def SUST_B_1D_B16_ZERO
5586   : NVPTXInst<(outs),
5587               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5588               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5589               []>;
5590 def SUST_B_1D_B32_ZERO
5591   : NVPTXInst<(outs),
5592               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5593               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5594               []>;
5595 def SUST_B_1D_B64_ZERO
5596   : NVPTXInst<(outs),
5597               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5598               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5599               []>;
5600 def SUST_B_1D_V2B8_ZERO
5601   : NVPTXInst<(outs),
5602               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5603               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5604               []>;
5605 def SUST_B_1D_V2B16_ZERO
5606   : NVPTXInst<(outs),
5607               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5608               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5609               []>;
5610 def SUST_B_1D_V2B32_ZERO
5611   : NVPTXInst<(outs),
5612               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5613               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5614               []>;
5615 def SUST_B_1D_V2B64_ZERO
5616   : NVPTXInst<(outs),
5617               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5618               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5619               []>;
5620 def SUST_B_1D_V4B8_ZERO
5621   : NVPTXInst<(outs),
5622               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5623                    Int16Regs:$b, Int16Regs:$a),
5624               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5625               []>;
5626 def SUST_B_1D_V4B16_ZERO
5627   : NVPTXInst<(outs),
5628               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5629                    Int16Regs:$b, Int16Regs:$a),
5630               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5631               []>;
5632 def SUST_B_1D_V4B32_ZERO
5633   : NVPTXInst<(outs),
5634               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5635                    Int32Regs:$b, Int32Regs:$a),
5636               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5637               []>;
5640 def SUST_B_1D_ARRAY_B8_ZERO
5641   : NVPTXInst<(outs),
5642               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5643               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5644               []>;
5645 def SUST_B_1D_ARRAY_B16_ZERO
5646   : NVPTXInst<(outs),
5647               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5648               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5649               []>;
5650 def SUST_B_1D_ARRAY_B32_ZERO
5651   : NVPTXInst<(outs),
5652               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5653               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5654               []>;
5655 def SUST_B_1D_ARRAY_B64_ZERO
5656   : NVPTXInst<(outs),
5657               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5658               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5659               []>;
5660 def SUST_B_1D_ARRAY_V2B8_ZERO
5661   : NVPTXInst<(outs),
5662               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5663                    Int16Regs:$g),
5664               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5665               []>;
5666 def SUST_B_1D_ARRAY_V2B16_ZERO
5667   : NVPTXInst<(outs),
5668               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5669                    Int16Regs:$g),
5670               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5671               []>;
5672 def SUST_B_1D_ARRAY_V2B32_ZERO
5673   : NVPTXInst<(outs),
5674               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5675                    Int32Regs:$g),
5676               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5677               []>;
5678 def SUST_B_1D_ARRAY_V2B64_ZERO
5679   : NVPTXInst<(outs),
5680               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5681                    Int64Regs:$g),
5682               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5683               []>;
5684 def SUST_B_1D_ARRAY_V4B8_ZERO
5685   : NVPTXInst<(outs),
5686               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5687                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5688               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5689               "\\{$r, $g, $b, $a\\};",
5690               []>;
5691 def SUST_B_1D_ARRAY_V4B16_ZERO
5692   : NVPTXInst<(outs),
5693               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5694                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5695              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5696              "\\{$r, $g, $b, $a\\};",
5697               []>;
5698 def SUST_B_1D_ARRAY_V4B32_ZERO
5699   : NVPTXInst<(outs),
5700               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5701                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5702              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5703              "\\{$r, $g, $b, $a\\};",
5704               []>;
5707 def SUST_B_2D_B8_ZERO
5708   : NVPTXInst<(outs),
5709               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5710               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5711               []>;
5712 def SUST_B_2D_B16_ZERO
5713   : NVPTXInst<(outs),
5714               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5715               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5716               []>;
5717 def SUST_B_2D_B32_ZERO
5718   : NVPTXInst<(outs),
5719               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5720               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5721               []>;
5722 def SUST_B_2D_B64_ZERO
5723   : NVPTXInst<(outs),
5724               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5725               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5726               []>;
5727 def SUST_B_2D_V2B8_ZERO
5728   : NVPTXInst<(outs),
5729               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5730                    Int16Regs:$g),
5731               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5732               []>;
5733 def SUST_B_2D_V2B16_ZERO
5734   : NVPTXInst<(outs),
5735               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5736                    Int16Regs:$g),
5737               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5738               []>;
5739 def SUST_B_2D_V2B32_ZERO
5740   : NVPTXInst<(outs),
5741               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5742                    Int32Regs:$g),
5743               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5744               []>;
5745 def SUST_B_2D_V2B64_ZERO
5746   : NVPTXInst<(outs),
5747               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5748                    Int64Regs:$g),
5749               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5750               []>;
5751 def SUST_B_2D_V4B8_ZERO
5752   : NVPTXInst<(outs),
5753               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5754                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5755               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5756               "\\{$r, $g, $b, $a\\};",
5757               []>;
5758 def SUST_B_2D_V4B16_ZERO
5759   : NVPTXInst<(outs),
5760               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5761                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5762              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5763              "\\{$r, $g, $b, $a\\};",
5764               []>;
5765 def SUST_B_2D_V4B32_ZERO
5766   : NVPTXInst<(outs),
5767               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5768                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5769              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5770              "\\{$r, $g, $b, $a\\};",
5771               []>;
5774 def SUST_B_2D_ARRAY_B8_ZERO
5775   : NVPTXInst<(outs),
5776               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5777                    Int16Regs:$r),
5778               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5779               []>;
5780 def SUST_B_2D_ARRAY_B16_ZERO
5781   : NVPTXInst<(outs),
5782               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5783                    Int16Regs:$r),
5784               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5785               []>;
5786 def SUST_B_2D_ARRAY_B32_ZERO
5787   : NVPTXInst<(outs),
5788               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5789                    Int32Regs:$r),
5790               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5791               []>;
5792 def SUST_B_2D_ARRAY_B64_ZERO
5793   : NVPTXInst<(outs),
5794               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5795                    Int64Regs:$r),
5796               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5797               []>;
5798 def SUST_B_2D_ARRAY_V2B8_ZERO
5799   : NVPTXInst<(outs),
5800               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5801                    Int16Regs:$r, Int16Regs:$g),
5802               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5803               "\\{$r, $g\\};",
5804               []>;
5805 def SUST_B_2D_ARRAY_V2B16_ZERO
5806   : NVPTXInst<(outs),
5807               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5808                    Int16Regs:$r, Int16Regs:$g),
5809              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5810              "\\{$r, $g\\};",
5811               []>;
5812 def SUST_B_2D_ARRAY_V2B32_ZERO
5813   : NVPTXInst<(outs),
5814               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5815                    Int32Regs:$r, Int32Regs:$g),
5816              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5817              "\\{$r, $g\\};",
5818               []>;
5819 def SUST_B_2D_ARRAY_V2B64_ZERO
5820   : NVPTXInst<(outs),
5821               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5822                    Int64Regs:$r, Int64Regs:$g),
5823              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5824              "\\{$r, $g\\};",
5825               []>;
5826 def SUST_B_2D_ARRAY_V4B8_ZERO
5827   : NVPTXInst<(outs),
5828               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5829                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5830       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5831       "\\{$r, $g, $b, $a\\};",
5832               []>;
5833 def SUST_B_2D_ARRAY_V4B16_ZERO
5834   : NVPTXInst<(outs),
5835               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5836                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5837      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5838      "\\{$r, $g, $b, $a\\};",
5839               []>;
5840 def SUST_B_2D_ARRAY_V4B32_ZERO
5841   : NVPTXInst<(outs),
5842               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5843                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5844      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5845      "\\{$r, $g, $b, $a\\};",
5846               []>;
5849 def SUST_B_3D_B8_ZERO
5850   : NVPTXInst<(outs),
5851               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5852                    Int16Regs:$r),
5853               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5854               []>;
5855 def SUST_B_3D_B16_ZERO
5856   : NVPTXInst<(outs),
5857               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5858                    Int16Regs:$r),
5859               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5860               []>;
5861 def SUST_B_3D_B32_ZERO
5862   : NVPTXInst<(outs),
5863               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5864                    Int32Regs:$r),
5865               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5866               []>;
5867 def SUST_B_3D_B64_ZERO
5868   : NVPTXInst<(outs),
5869               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5870                    Int64Regs:$r),
5871               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5872               []>;
5873 def SUST_B_3D_V2B8_ZERO
5874   : NVPTXInst<(outs),
5875               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5876                    Int16Regs:$r, Int16Regs:$g),
5877               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5878               "\\{$r, $g\\};",
5879               []>;
5880 def SUST_B_3D_V2B16_ZERO
5881   : NVPTXInst<(outs),
5882               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5883                    Int16Regs:$r, Int16Regs:$g),
5884               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5885               "\\{$r, $g\\};",
5886               []>;
5887 def SUST_B_3D_V2B32_ZERO
5888   : NVPTXInst<(outs),
5889               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5890                    Int32Regs:$r, Int32Regs:$g),
5891               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5892               "\\{$r, $g\\};",
5893               []>;
5894 def SUST_B_3D_V2B64_ZERO
5895   : NVPTXInst<(outs),
5896               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5897                    Int64Regs:$r, Int64Regs:$g),
5898               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5899               "\\{$r, $g\\};",
5900               []>;
5901 def SUST_B_3D_V4B8_ZERO
5902   : NVPTXInst<(outs),
5903               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5904                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5905          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5906          "\\{$r, $g, $b, $a\\};",
5907               []>;
5908 def SUST_B_3D_V4B16_ZERO
5909   : NVPTXInst<(outs),
5910               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5911                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5912         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5913         "\\{$r, $g, $b, $a\\};",
5914               []>;
5915 def SUST_B_3D_V4B32_ZERO
5916   : NVPTXInst<(outs),
5917               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5918                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5919         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5920         "\\{$r, $g, $b, $a\\};",
5921               []>;
5925 // Formatted
5927 def SUST_P_1D_B8_TRAP
5928   : NVPTXInst<(outs),
5929               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5930               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5931               []>;
5932 def SUST_P_1D_B16_TRAP
5933   : NVPTXInst<(outs),
5934               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5935               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5936               []>;
5937 def SUST_P_1D_B32_TRAP
5938   : NVPTXInst<(outs),
5939               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5940               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5941               []>;
5942 def SUST_P_1D_V2B8_TRAP
5943   : NVPTXInst<(outs),
5944               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5945               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5946               []>;
5947 def SUST_P_1D_V2B16_TRAP
5948   : NVPTXInst<(outs),
5949               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5950               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5951               []>;
5952 def SUST_P_1D_V2B32_TRAP
5953   : NVPTXInst<(outs),
5954               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5955               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5956               []>;
5957 def SUST_P_1D_V4B8_TRAP
5958   : NVPTXInst<(outs),
5959               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5960                    Int16Regs:$b, Int16Regs:$a),
5961               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5962               []>;
5963 def SUST_P_1D_V4B16_TRAP
5964   : NVPTXInst<(outs),
5965               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5966                    Int16Regs:$b, Int16Regs:$a),
5967               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5968               []>;
5969 def SUST_P_1D_V4B32_TRAP
5970   : NVPTXInst<(outs),
5971               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5972                    Int32Regs:$b, Int32Regs:$a),
5973               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5974               []>;
5977 def SUST_P_1D_ARRAY_B8_TRAP
5978   : NVPTXInst<(outs),
5979               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5980               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5981               []>;
5982 def SUST_P_1D_ARRAY_B16_TRAP
5983   : NVPTXInst<(outs),
5984               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5985               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5986               []>;
5987 def SUST_P_1D_ARRAY_B32_TRAP
5988   : NVPTXInst<(outs),
5989               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5990               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5991               []>;
5992 def SUST_P_1D_ARRAY_V2B8_TRAP
5993   : NVPTXInst<(outs),
5994               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5995                    Int16Regs:$g),
5996               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5997               []>;
5998 def SUST_P_1D_ARRAY_V2B16_TRAP
5999   : NVPTXInst<(outs),
6000               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6001                    Int16Regs:$g),
6002               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6003               []>;
6004 def SUST_P_1D_ARRAY_V2B32_TRAP
6005   : NVPTXInst<(outs),
6006               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6007                    Int32Regs:$g),
6008               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6009               []>;
6010 def SUST_P_1D_ARRAY_V4B8_TRAP
6011   : NVPTXInst<(outs),
6012               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6013                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6014               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
6015               "\\{$r, $g, $b, $a\\};",
6016               []>;
6017 def SUST_P_1D_ARRAY_V4B16_TRAP
6018   : NVPTXInst<(outs),
6019               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6020                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6021              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
6022              "\\{$r, $g, $b, $a\\};",
6023               []>;
6024 def SUST_P_1D_ARRAY_V4B32_TRAP
6025   : NVPTXInst<(outs),
6026               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6027                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6028              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
6029              "\\{$r, $g, $b, $a\\};",
6030               []>;
6033 def SUST_P_2D_B8_TRAP
6034   : NVPTXInst<(outs),
6035               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6036               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6037               []>;
6038 def SUST_P_2D_B16_TRAP
6039   : NVPTXInst<(outs),
6040               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6041               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6042               []>;
6043 def SUST_P_2D_B32_TRAP
6044   : NVPTXInst<(outs),
6045               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6046               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6047               []>;
6048 def SUST_P_2D_V2B8_TRAP
6049   : NVPTXInst<(outs),
6050               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6051                    Int16Regs:$g),
6052               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6053               []>;
6054 def SUST_P_2D_V2B16_TRAP
6055   : NVPTXInst<(outs),
6056               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6057                    Int16Regs:$g),
6058               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6059               []>;
6060 def SUST_P_2D_V2B32_TRAP
6061   : NVPTXInst<(outs),
6062               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6063                    Int32Regs:$g),
6064               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6065               []>;
6066 def SUST_P_2D_V4B8_TRAP
6067   : NVPTXInst<(outs),
6068               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6069                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6070               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
6071               "\\{$r, $g, $b, $a\\};",
6072               []>;
6073 def SUST_P_2D_V4B16_TRAP
6074   : NVPTXInst<(outs),
6075               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6076                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6077              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
6078              "\\{$r, $g, $b, $a\\};",
6079               []>;
6080 def SUST_P_2D_V4B32_TRAP
6081   : NVPTXInst<(outs),
6082               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6083                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6084              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
6085              "\\{$r, $g, $b, $a\\};",
6086               []>;
6089 def SUST_P_2D_ARRAY_B8_TRAP
6090   : NVPTXInst<(outs),
6091               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6092                    Int16Regs:$r),
6093               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6094               []>;
6095 def SUST_P_2D_ARRAY_B16_TRAP
6096   : NVPTXInst<(outs),
6097               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6098                    Int16Regs:$r),
6099               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6100               []>;
6101 def SUST_P_2D_ARRAY_B32_TRAP
6102   : NVPTXInst<(outs),
6103               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6104                    Int32Regs:$r),
6105               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6106               []>;
6107 def SUST_P_2D_ARRAY_V2B8_TRAP
6108   : NVPTXInst<(outs),
6109               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6110                    Int16Regs:$r, Int16Regs:$g),
6111               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6112               "\\{$r, $g\\};",
6113               []>;
6114 def SUST_P_2D_ARRAY_V2B16_TRAP
6115   : NVPTXInst<(outs),
6116               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6117                    Int16Regs:$r, Int16Regs:$g),
6118              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6119              "\\{$r, $g\\};",
6120               []>;
6121 def SUST_P_2D_ARRAY_V2B32_TRAP
6122   : NVPTXInst<(outs),
6123               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6124                    Int32Regs:$r, Int32Regs:$g),
6125              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6126              "\\{$r, $g\\};",
6127               []>;
6128 def SUST_P_2D_ARRAY_V4B8_TRAP
6129   : NVPTXInst<(outs),
6130               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6131                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6132       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6133       "\\{$r, $g, $b, $a\\};",
6134               []>;
6135 def SUST_P_2D_ARRAY_V4B16_TRAP
6136   : NVPTXInst<(outs),
6137               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6138                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6139      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6140      "\\{$r, $g, $b, $a\\};",
6141               []>;
6142 def SUST_P_2D_ARRAY_V4B32_TRAP
6143   : NVPTXInst<(outs),
6144               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6145                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6146      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6147      "\\{$r, $g, $b, $a\\};",
6148               []>;
6151 def SUST_P_3D_B8_TRAP
6152   : NVPTXInst<(outs),
6153               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6154                    Int16Regs:$r),
6155               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6156               []>;
6157 def SUST_P_3D_B16_TRAP
6158   : NVPTXInst<(outs),
6159               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6160                    Int16Regs:$r),
6161               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6162               []>;
6163 def SUST_P_3D_B32_TRAP
6164   : NVPTXInst<(outs),
6165               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6166                    Int32Regs:$r),
6167               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6168               []>;
6169 def SUST_P_3D_V2B8_TRAP
6170   : NVPTXInst<(outs),
6171               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6172                    Int16Regs:$r, Int16Regs:$g),
6173               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6174               "\\{$r, $g\\};",
6175               []>;
6176 def SUST_P_3D_V2B16_TRAP
6177   : NVPTXInst<(outs),
6178               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6179                    Int16Regs:$r, Int16Regs:$g),
6180               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6181               "\\{$r, $g\\};",
6182               []>;
6183 def SUST_P_3D_V2B32_TRAP
6184   : NVPTXInst<(outs),
6185               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6186                    Int32Regs:$r, Int32Regs:$g),
6187               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6188               "\\{$r, $g\\};",
6189               []>;
6190 def SUST_P_3D_V4B8_TRAP
6191   : NVPTXInst<(outs),
6192               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6193                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6194          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6195          "\\{$r, $g, $b, $a\\};",
6196               []>;
6197 def SUST_P_3D_V4B16_TRAP
6198   : NVPTXInst<(outs),
6199               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6200                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6201         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6202         "\\{$r, $g, $b, $a\\};",
6203               []>;
6204 def SUST_P_3D_V4B32_TRAP
6205   : NVPTXInst<(outs),
6206               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6207                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6208         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6209         "\\{$r, $g, $b, $a\\};",
6210               []>;
6213 // Surface store instruction patterns
6214 // I'm not sure why we can't just include these in the instruction definitions,
6215 // but TableGen complains of type errors :(
6217 // .clamp variant
6218 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6219            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6220           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6222 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6223            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6224           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6226 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6227            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6228           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6230 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6231            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6232           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6234 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6235            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6236           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6237            Int16Regs:$r, Int16Regs:$g)>;
6239 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6240            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6241           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6242            Int16Regs:$r, Int16Regs:$g)>;
6244 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6245            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6246           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6247            Int32Regs:$r, Int32Regs:$g)>;
6249 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6250            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6251           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6252            Int64Regs:$r, Int64Regs:$g)>;
6254 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6255            Int64Regs:$s, Int32Regs:$x,
6256            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6257           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6258            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6260 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6261            Int64Regs:$s, Int32Regs:$x,
6262            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6263           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6264            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6266 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6267            Int64Regs:$s, Int32Regs:$x,
6268            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6269           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6270            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6274 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6275            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6276           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6277            Int16Regs:$r)>;
6279 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6280            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6281           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6282            Int16Regs:$r)>;
6284 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6285            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6286           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6287            Int32Regs:$r)>;
6289 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6290            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6291           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6292            Int64Regs:$r)>;
6294 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6295           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6296           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6297            Int16Regs:$r, Int16Regs:$g)>;
6299 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6300           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6301           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6302            Int16Regs:$r, Int16Regs:$g)>;
6304 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6305           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6306           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6307            Int32Regs:$r, Int32Regs:$g)>;
6309 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6310           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6311           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6312            Int64Regs:$r, Int64Regs:$g)>;
6314 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6315            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6316            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6317           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6318            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6320 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6321            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6322            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6323           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6324            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6326 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6327            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6328            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6329           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6330            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6334 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6335            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6336           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6337            Int16Regs:$r)>;
6339 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6340            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6341           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6342            Int16Regs:$r)>;
6344 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6345            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6346           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6347            Int32Regs:$r)>;
6349 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6350            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6351           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6352            Int64Regs:$r)>;
6354 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6355           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6356           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6357            Int16Regs:$r, Int16Regs:$g)>;
6359 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6360           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6361           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6362            Int16Regs:$r, Int16Regs:$g)>;
6364 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6365           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6366           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6367            Int32Regs:$r, Int32Regs:$g)>;
6369 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6370           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6371           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6372            Int64Regs:$r, Int64Regs:$g)>;
6374 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6375            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6376            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6377           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6378            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6380 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6381            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6382            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6383           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6384            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6386 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6387            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6388            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6389           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6390            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6394 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6395           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6396           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6397            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6398            Int16Regs:$r)>;
6400 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6401           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6402           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6403            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6404            Int16Regs:$r)>;
6406 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6407           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6408           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6409            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6410            Int32Regs:$r)>;
6412 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6413           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6414           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6415            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6416            Int64Regs:$r)>;
6418 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6419            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6420            Int16Regs:$r, Int16Regs:$g),
6421           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6422            Int32Regs:$x, Int32Regs:$y,
6423            Int16Regs:$r, Int16Regs:$g)>;
6425 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6426            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6427            Int16Regs:$r, Int16Regs:$g),
6428           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6429            Int32Regs:$x, Int32Regs:$y,
6430            Int16Regs:$r, Int16Regs:$g)>;
6432 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6433            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6434            Int32Regs:$g),
6435           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6436            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6438 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6439            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6440            Int64Regs:$g),
6441           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6442            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6444 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6445            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6446            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6447           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6448            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6449            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6451 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6452            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6453            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6454           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6455            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6456            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6458 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6459            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6460            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6461           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6462            Int32Regs:$x, Int32Regs:$y,
6463            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6467 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6468            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6469            Int16Regs:$r),
6470           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6471            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6472            Int16Regs:$r)>;
6474 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6475            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6476            Int16Regs:$r),
6477           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6478            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6479            Int16Regs:$r)>;
6481 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6482            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6483            Int32Regs:$r),
6484           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6485            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6486            Int32Regs:$r)>;
6488 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6489            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6490            Int64Regs:$r),
6491           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6492            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6493            Int64Regs:$r)>;
6495 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6496            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6497            Int16Regs:$r, Int16Regs:$g),
6498           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6499            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6500            Int16Regs:$r, Int16Regs:$g)>;
6502 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6503            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6504            Int16Regs:$r, Int16Regs:$g),
6505           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6506            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6507            Int16Regs:$r, Int16Regs:$g)>;
6509 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6510            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6511            Int32Regs:$r, Int32Regs:$g),
6512           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6513            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6514            Int32Regs:$r, Int32Regs:$g)>;
6516 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6517            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6518            Int64Regs:$r, Int64Regs:$g),
6519           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6520            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6521            Int64Regs:$r, Int64Regs:$g)>;
6523 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6524            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6525            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6526           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6527            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6528            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6530 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6531            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6532            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6533           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6534            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6535            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6537 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6538            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6539            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6540           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6541            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6542            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6545 // .trap variant
6546 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6547            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6548           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6550 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6551            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6552           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6554 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6555            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6556           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6558 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6559            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6560           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6562 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6563            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6564           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6565            Int16Regs:$r, Int16Regs:$g)>;
6567 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6568            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6569           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6570            Int16Regs:$r, Int16Regs:$g)>;
6572 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6573            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6574           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6575            Int32Regs:$r, Int32Regs:$g)>;
6577 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6578            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6579           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6580            Int64Regs:$r, Int64Regs:$g)>;
6582 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6583            Int64Regs:$s, Int32Regs:$x,
6584            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6585           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6586            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6588 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6589            Int64Regs:$s, Int32Regs:$x,
6590            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6591           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6592            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6594 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6595            Int64Regs:$s, Int32Regs:$x,
6596            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6597           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6598            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6602 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6603            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6604           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6605            Int16Regs:$r)>;
6607 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6608            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6609           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6610            Int16Regs:$r)>;
6612 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6613            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6614           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6615            Int32Regs:$r)>;
6617 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6618            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6619           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6620            Int64Regs:$r)>;
6622 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6623           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6624           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6625            Int16Regs:$r, Int16Regs:$g)>;
6627 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6628           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6629           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6630            Int16Regs:$r, Int16Regs:$g)>;
6632 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6633           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6634           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6635            Int32Regs:$r, Int32Regs:$g)>;
6637 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6638           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6639           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6640            Int64Regs:$r, Int64Regs:$g)>;
6642 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6643            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6644            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6645           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6646            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6648 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6649            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6650            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6651           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6652            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6654 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6655            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6656            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6657           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6658            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6662 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6663            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6664           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6665            Int16Regs:$r)>;
6667 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6668            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6669           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6670            Int16Regs:$r)>;
6672 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6673            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6674           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6675            Int32Regs:$r)>;
6677 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6678            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6679           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6680            Int64Regs:$r)>;
6682 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6683           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6684           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6685            Int16Regs:$r, Int16Regs:$g)>;
6687 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6688           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6689           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6690            Int16Regs:$r, Int16Regs:$g)>;
6692 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6693           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6694           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6695            Int32Regs:$r, Int32Regs:$g)>;
6697 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6698           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6699           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6700            Int64Regs:$r, Int64Regs:$g)>;
6702 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6703            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6704            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6705           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6706            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6708 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6709            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6710            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6711           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6712            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6714 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6715            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6716            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6717           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6718            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6722 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6723           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6724           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6725            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6726            Int16Regs:$r)>;
6728 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6729           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6730           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6731            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6732            Int16Regs:$r)>;
6734 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6735           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6736           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6737            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6738            Int32Regs:$r)>;
6740 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6741           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6742           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6743            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6744            Int64Regs:$r)>;
6746 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6747            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6748            Int16Regs:$r, Int16Regs:$g),
6749           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6750            Int32Regs:$x, Int32Regs:$y,
6751            Int16Regs:$r, Int16Regs:$g)>;
6753 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6754            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6755            Int16Regs:$r, Int16Regs:$g),
6756           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6757            Int32Regs:$x, Int32Regs:$y,
6758            Int16Regs:$r, Int16Regs:$g)>;
6760 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6761            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6762            Int32Regs:$g),
6763           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6764            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6766 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6767            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6768            Int64Regs:$g),
6769           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6770            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6772 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6773            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6774            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6775           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6776            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6777            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6779 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6780            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6781            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6782           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6783            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6784            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6786 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6787            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6788            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6789           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6790            Int32Regs:$x, Int32Regs:$y,
6791            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6795 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6796            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6797            Int16Regs:$r),
6798           (SUST_B_3D_B8_TRAP Int64Regs:$s,
6799            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6800            Int16Regs:$r)>;
6802 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6803            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6804            Int16Regs:$r),
6805           (SUST_B_3D_B16_TRAP Int64Regs:$s,
6806            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6807            Int16Regs:$r)>;
6809 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6810            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6811            Int32Regs:$r),
6812           (SUST_B_3D_B32_TRAP Int64Regs:$s,
6813            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6814            Int32Regs:$r)>;
6816 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6817            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6818            Int64Regs:$r),
6819           (SUST_B_3D_B64_TRAP Int64Regs:$s,
6820            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6821            Int64Regs:$r)>;
6823 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6824            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6825            Int16Regs:$r, Int16Regs:$g),
6826           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6827            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6828            Int16Regs:$r, Int16Regs:$g)>;
6830 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6831            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6832            Int16Regs:$r, Int16Regs:$g),
6833           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6834            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6835            Int16Regs:$r, Int16Regs:$g)>;
6837 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6838            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6839            Int32Regs:$r, Int32Regs:$g),
6840           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6841            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6842            Int32Regs:$r, Int32Regs:$g)>;
6844 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6845            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6846            Int64Regs:$r, Int64Regs:$g),
6847           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6848            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6849            Int64Regs:$r, Int64Regs:$g)>;
6851 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6852            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6853            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6854           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6855            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6856            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6858 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6859            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6860            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6861           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6862            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6863            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6865 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6866            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6867            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6868           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6869            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6870            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6873 // .zero variant
6874 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6875            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6876           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6878 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6879            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6880           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6882 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6883            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6884           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6886 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6887            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6888           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6890 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6891            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6892           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6893            Int16Regs:$r, Int16Regs:$g)>;
6895 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6896            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6897           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6898            Int16Regs:$r, Int16Regs:$g)>;
6900 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6901            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6902           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6903            Int32Regs:$r, Int32Regs:$g)>;
6905 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6906            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6907           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6908            Int64Regs:$r, Int64Regs:$g)>;
6910 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6911            Int64Regs:$s, Int32Regs:$x,
6912            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6913           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6914            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6916 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6917            Int64Regs:$s, Int32Regs:$x,
6918            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6919           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6920            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6922 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6923            Int64Regs:$s, Int32Regs:$x,
6924            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6925           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6926            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6930 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6931            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6932           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6933            Int16Regs:$r)>;
6935 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6936            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6937           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6938            Int16Regs:$r)>;
6940 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6941            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6942           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6943            Int32Regs:$r)>;
6945 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6946            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6947           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6948            Int64Regs:$r)>;
6950 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6951           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6952           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6953            Int16Regs:$r, Int16Regs:$g)>;
6955 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6956           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6957           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6958            Int16Regs:$r, Int16Regs:$g)>;
6960 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6961           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6962           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6963            Int32Regs:$r, Int32Regs:$g)>;
6965 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6966           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6967           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6968            Int64Regs:$r, Int64Regs:$g)>;
6970 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6971            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6972            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6973           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6974            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6976 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6977            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6978            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6979           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6980            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6982 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6983            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6984            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6985           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6986            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6990 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6991            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6992           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6993            Int16Regs:$r)>;
6995 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6996            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6997           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6998            Int16Regs:$r)>;
7000 def : Pat<(int_nvvm_sust_b_2d_i32_zero
7001            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7002           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7003            Int32Regs:$r)>;
7005 def : Pat<(int_nvvm_sust_b_2d_i64_zero
7006            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7007           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7008            Int64Regs:$r)>;
7010 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
7011           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7012           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7013            Int16Regs:$r, Int16Regs:$g)>;
7015 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
7016           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7017           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7018            Int16Regs:$r, Int16Regs:$g)>;
7020 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
7021           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7022           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7023            Int32Regs:$r, Int32Regs:$g)>;
7025 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
7026           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
7027           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7028            Int64Regs:$r, Int64Regs:$g)>;
7030 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
7031            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7032            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7033           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7034            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7036 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
7037            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7038            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7039           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7040            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7042 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
7043            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7044            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7045           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7046            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7050 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
7051           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7052           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
7053            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7054            Int16Regs:$r)>;
7056 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
7057           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7058           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
7059            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7060            Int16Regs:$r)>;
7062 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
7063           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7064           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
7065            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7066            Int32Regs:$r)>;
7068 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
7069           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7070           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
7071            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7072            Int64Regs:$r)>;
7074 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
7075            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7076            Int16Regs:$r, Int16Regs:$g),
7077           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
7078            Int32Regs:$x, Int32Regs:$y,
7079            Int16Regs:$r, Int16Regs:$g)>;
7081 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
7082            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7083            Int16Regs:$r, Int16Regs:$g),
7084           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
7085            Int32Regs:$x, Int32Regs:$y,
7086            Int16Regs:$r, Int16Regs:$g)>;
7088 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
7089            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7090            Int32Regs:$g),
7091           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
7092            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7094 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
7095            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
7096            Int64Regs:$g),
7097           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
7098            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
7100 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
7101            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7102            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7103           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
7104            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7105            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7107 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
7108            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7109            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7110           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
7111            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7112            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7114 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
7115            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7116            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7117           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
7118            Int32Regs:$x, Int32Regs:$y,
7119            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7123 def : Pat<(int_nvvm_sust_b_3d_i8_zero
7124            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7125            Int16Regs:$r),
7126           (SUST_B_3D_B8_ZERO Int64Regs:$s,
7127            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7128            Int16Regs:$r)>;
7130 def : Pat<(int_nvvm_sust_b_3d_i16_zero
7131            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7132            Int16Regs:$r),
7133           (SUST_B_3D_B16_ZERO Int64Regs:$s,
7134            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7135            Int16Regs:$r)>;
7137 def : Pat<(int_nvvm_sust_b_3d_i32_zero
7138            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7139            Int32Regs:$r),
7140           (SUST_B_3D_B32_ZERO Int64Regs:$s,
7141            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7142            Int32Regs:$r)>;
7144 def : Pat<(int_nvvm_sust_b_3d_i64_zero
7145            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7146            Int64Regs:$r),
7147           (SUST_B_3D_B64_ZERO Int64Regs:$s,
7148            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7149            Int64Regs:$r)>;
7151 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
7152            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7153            Int16Regs:$r, Int16Regs:$g),
7154           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
7155            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7156            Int16Regs:$r, Int16Regs:$g)>;
7158 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
7159            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7160            Int16Regs:$r, Int16Regs:$g),
7161           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
7162            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7163            Int16Regs:$r, Int16Regs:$g)>;
7165 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
7166            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7167            Int32Regs:$r, Int32Regs:$g),
7168           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
7169            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7170            Int32Regs:$r, Int32Regs:$g)>;
7172 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
7173            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7174            Int64Regs:$r, Int64Regs:$g),
7175           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
7176            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7177            Int64Regs:$r, Int64Regs:$g)>;
7179 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
7180            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7181            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7182           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7183            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7184            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7186 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7187            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7188            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7189           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7190            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7191            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7193 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7194            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7195            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7196           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7197            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7198            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7203 def : Pat<(int_nvvm_sust_p_1d_i8_trap
7204            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7205           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7207 def : Pat<(int_nvvm_sust_p_1d_i16_trap
7208            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7209           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7211 def : Pat<(int_nvvm_sust_p_1d_i32_trap
7212            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7213           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7215 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7216            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7217           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7218            Int16Regs:$r, Int16Regs:$g)>;
7220 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7221            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7222           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7223            Int16Regs:$r, Int16Regs:$g)>;
7225 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7226            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7227           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7228            Int32Regs:$r, Int32Regs:$g)>;
7230 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7231            Int64Regs:$s, Int32Regs:$x,
7232            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7233           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7234            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7236 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7237            Int64Regs:$s, Int32Regs:$x,
7238            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7239           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7240            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7242 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7243            Int64Regs:$s, Int32Regs:$x,
7244            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7245           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7246            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7250 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7251            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7252           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7253            Int16Regs:$r)>;
7255 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7256            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7257           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7258            Int16Regs:$r)>;
7260 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7261            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7262           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7263            Int32Regs:$r)>;
7265 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7266           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7267           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7268            Int16Regs:$r, Int16Regs:$g)>;
7270 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7271           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7272           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7273            Int16Regs:$r, Int16Regs:$g)>;
7275 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7276           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7277           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7278            Int32Regs:$r, Int32Regs:$g)>;
7280 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7281            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7282            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7283           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7284            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7286 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7287            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7288            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7289           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7290            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7292 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7293            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7294            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7295           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7296            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7300 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7301            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7302           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7303            Int16Regs:$r)>;
7305 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7306            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7307           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7308            Int16Regs:$r)>;
7310 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7311            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7312           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7313            Int32Regs:$r)>;
7315 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7316           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7317           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7318            Int16Regs:$r, Int16Regs:$g)>;
7320 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7321           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7322           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7323            Int16Regs:$r, Int16Regs:$g)>;
7325 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7326           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7327           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7328            Int32Regs:$r, Int32Regs:$g)>;
7330 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7331            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7332            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7333           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7334            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7336 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7337            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7338            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7339           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7340            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7342 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7343            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7344            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7345           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7346            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7350 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7351           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7352           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7353            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7354            Int16Regs:$r)>;
7356 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7357           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7358           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7359            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7360            Int16Regs:$r)>;
7362 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7363           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7364           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7365            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7366            Int32Regs:$r)>;
7368 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7369            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7370            Int16Regs:$r, Int16Regs:$g),
7371           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7372            Int32Regs:$x, Int32Regs:$y,
7373            Int16Regs:$r, Int16Regs:$g)>;
7375 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7376            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7377            Int16Regs:$r, Int16Regs:$g),
7378           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7379            Int32Regs:$x, Int32Regs:$y,
7380            Int16Regs:$r, Int16Regs:$g)>;
7382 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7383            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7384            Int32Regs:$g),
7385           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7386            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7388 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7389            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7390            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7391           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7392            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7393            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7395 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7396            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7397            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7398           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7399            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7400            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7402 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7403            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7404            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7405           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7406            Int32Regs:$x, Int32Regs:$y,
7407            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7411 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7412            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7413            Int16Regs:$r),
7414           (SUST_P_3D_B8_TRAP Int64Regs:$s,
7415            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7416            Int16Regs:$r)>;
7418 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7419            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7420            Int16Regs:$r),
7421           (SUST_P_3D_B16_TRAP Int64Regs:$s,
7422            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7423            Int16Regs:$r)>;
7425 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7426            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7427            Int32Regs:$r),
7428           (SUST_P_3D_B32_TRAP Int64Regs:$s,
7429            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7430            Int32Regs:$r)>;
7432 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7433            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7434            Int16Regs:$r, Int16Regs:$g),
7435           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7436            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7437            Int16Regs:$r, Int16Regs:$g)>;
7439 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7440            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7441            Int16Regs:$r, Int16Regs:$g),
7442           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7443            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7444            Int16Regs:$r, Int16Regs:$g)>;
7446 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7447            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7448            Int32Regs:$r, Int32Regs:$g),
7449           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7450            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7451            Int32Regs:$r, Int32Regs:$g)>;
7453 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7454            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7455            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7456           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7457            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7458            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7460 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7461            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7462            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7463           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7464            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7465            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7467 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7468            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7469            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7470           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7471            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7472            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7474 //-----------------------------------
7475 // Read Special Registers
7476 //-----------------------------------
7478 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7479   : NVPTXInst<(outs Int64Regs:$d), (ins),
7480               !strconcat("mov.u64 \t$d, %", regname, ";"),
7481               [(set Int64Regs:$d, (intop))]>;
7483 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7484   : NVPTXInst<(outs Int32Regs:$d), (ins),
7485               !strconcat("mov.u32 \t$d, %", regname, ";"),
7486               [(set Int32Regs:$d, (intop))]>;
7488 // TODO Add read vector-version of special registers
7490 def INT_PTX_SREG_TID_X :
7491     PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7492 def INT_PTX_SREG_TID_Y :
7493     PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7494 def INT_PTX_SREG_TID_Z :
7495     PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7496 def INT_PTX_SREG_TID_W :
7497     PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7499 def INT_PTX_SREG_NTID_X :
7500     PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7501 def INT_PTX_SREG_NTID_Y :
7502     PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7503 def INT_PTX_SREG_NTID_Z :
7504     PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7505 def INT_PTX_SREG_NTID_W :
7506     PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7508 def INT_PTX_SREG_LANEID :
7509     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7510 def INT_PTX_SREG_WARPID :
7511     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7512 def INT_PTX_SREG_NWARPID :
7513     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7515 def INT_PTX_SREG_CTAID_X :
7516     PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7517 def INT_PTX_SREG_CTAID_Y :
7518     PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7519 def INT_PTX_SREG_CTAID_Z :
7520     PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7521 def INT_PTX_SREG_CTAID_W :
7522     PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7524 def INT_PTX_SREG_NCTAID_X :
7525     PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7526 def INT_PTX_SREG_NCTAID_Y :
7527     PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7528 def INT_PTX_SREG_NCTAID_Z :
7529     PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7530 def INT_PTX_SREG_NCTAID_W :
7531     PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7533 def INT_PTX_SREG_SMID :
7534     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7535 def INT_PTX_SREG_NSMID :
7536     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7537 def INT_PTX_SREG_GRIDID :
7538     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7540 def INT_PTX_SREG_LANEMASK_EQ :
7541     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7542 def INT_PTX_SREG_LANEMASK_LE :
7543     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7544 def INT_PTX_SREG_LANEMASK_LT :
7545     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7546 def INT_PTX_SREG_LANEMASK_GE :
7547     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7548 def INT_PTX_SREG_LANEMASK_GT :
7549     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7551 def INT_PTX_SREG_CLOCK :
7552     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7553 def INT_PTX_SREG_CLOCK64 :
7554     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7556 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7557 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7558 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7559 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7561 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7562 // handle the constant.
7563 def INT_PTX_SREG_WARPSIZE :
7564     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7565               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7567 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7568 // In addition to target-independent fields provided by WMMA_REGS, it adds
7569 // the fields commonly used to implement specific PTX instruction -- register
7570 // types and names, constraints, parts of assembly, etc.
7571 class WMMA_REGINFO<WMMA_REGS r, string op>
7572       : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7573   // NVPTX register types used to carry fragment data.
7574   NVPTXRegClass regclass = !cond(
7575     !eq(ptx_elt_type, "f16") : Float16x2Regs,
7576     !eq(ptx_elt_type, "f32") : Float32Regs,
7577     !eq(ptx_elt_type, "f64") : Float64Regs,
7578     !eq(ptx_elt_type, "bf16") : Int32Regs,
7579     !eq(ptx_elt_type, "tf32") : Int32Regs,
7580     !eq(ptx_elt_type, "s32") : Int32Regs,
7581     !eq(ptx_elt_type, "b16") : Int32Regs,
7582     !eq(ptx_elt_type, "s8") : Int32Regs,
7583     !eq(ptx_elt_type, "u8") : Int32Regs,
7584     !eq(ptx_elt_type, "s4") : Int32Regs,
7585     !eq(ptx_elt_type, "u4") : Int32Regs,
7586     !eq(ptx_elt_type, "b1") : Int32Regs);
7588   // Instruction input/output arguments for the fragment.
7589   list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
7591   // List of register names for the fragment -- ["ra0", "ra1",...]
7592   list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7594   // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7595   string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
7597   // Predicates for particular fragment variant. Technically those are
7598   // per-instruction predicates, but currently all fragments that can be used in
7599   // a given instruction are subject to the same constraints, so an instruction
7600   // can use predicates from any of its fragments. If/when this is no
7601   // longer the case, we can concat all per-fragment predicates to enforce that
7602   // all fragments of the instruction are viable.
7603   list<Predicate> Predicates = !cond(
7604     // fp16 -> fp16/fp32 @ m16n16k16
7605     !and(!eq(geom, "m16n16k16"),
7606          !or(!eq(ptx_elt_type, "f16"),
7607              !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7609     !and(!eq(geom,"m8n8k4"),
7610          !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
7612     // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7613     !and(!or(!eq(geom, "m8n32k16"),
7614              !eq(geom, "m32n8k16")),
7615          !or(!eq(ptx_elt_type, "f16"),
7616              !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7618     // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7619     !and(!or(!eq(geom,"m16n16k16"),
7620              !eq(geom,"m8n32k16"),
7621              !eq(geom,"m32n8k16")),
7622          !or(!eq(ptx_elt_type, "u8"),
7623              !eq(ptx_elt_type, "s8"),
7624              !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7626     !and(!or(!eq(geom,"m16n16k16"),
7627              !eq(geom,"m8n32k16"),
7628              !eq(geom,"m32n8k16")),
7629          !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
7631     !and(!eq(geom,"m16n16k8"),
7632          !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
7634     !and(!eq(geom,"m16n16k8"),
7635          !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
7637     // b1 -> s32 @ m8n8k128(b1)
7638     !and(!ne(op,"mma"),
7639          !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
7641     // u4/s4 -> s32 @ m8n8k32 (u4/s4)
7642     !and(!ne(op,"mma"),
7643          !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
7645     !or(!eq(geom,"m16n8k8"),
7646         !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
7648     !and(!ne(ptx_elt_type,"f64"),
7649          !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
7651     // mma m8n8k32 requires higher PTX version
7652     !and(!eq(op,"mma"),
7653          !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
7655     !and(!eq(ptx_elt_type,"f64"),
7656          !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
7658     !and(!eq(op,"mma"),
7659          !or(!eq(geom, "m16n8k16"),
7660              !eq(geom, "m16n8k4"),
7661              !eq(geom, "m16n8k32"),
7662              !eq(geom, "m16n8k64"),
7663              !eq(geom, "m8n8k128"),
7664              !eq(geom, "m16n8k128"),
7665              !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
7667     !and(!eq(op,"ldmatrix"),
7668          !eq(ptx_elt_type,"b16"),
7669          !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
7671   // template DAGs for instruction inputs/output.
7672   dag Outs = !dag(outs, ptx_regs, reg_names);
7673   dag Ins = !dag(ins, ptx_regs, reg_names);
7676 // Convert dag of arguments into a dag to match given intrinsic.
7677 class BuildPatternI<Intrinsic Intr, dag Ins> {
7678   // Build a dag pattern that matches the intrinsic call.
7679   dag ret = !foreach(tmp, Ins,
7680                           !subst(imem, ADDRvar,
7681                           !subst(MEMri64, ADDRri64,
7682                           !subst(MEMri, ADDRri,
7683                           !subst(ins, Intr, tmp)))));
7686 // Same as above, but uses PatFrag instead of an Intrinsic.
7687 class BuildPatternPF<PatFrag Intr, dag Ins> {
7688   // Build a dag pattern that matches the intrinsic call.
7689   dag ret = !foreach(tmp, Ins,
7690                           !subst(imem, ADDRvar,
7691                           !subst(MEMri64, ADDRri64,
7692                           !subst(MEMri, ADDRri,
7693                           !subst(ins, Intr, tmp)))));
7696 // Common WMMA-related fields used for building patterns for all MMA instructions.
7697 class WMMA_INSTR<string _Intr, list<dag> _Args>
7698   : NVPTXInst<(outs), (ins), "?", []> {
7699   Intrinsic Intr = !cast<Intrinsic>(_Intr);
7700   // Concatenate all arguments into a single dag.
7701   dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7702   // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7703   dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7707 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7710 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7711                 DAGOperand SrcOp>
7712   : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7713                               [!con((ins SrcOp:$src),
7714                                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7715     Requires<Frag.Predicates> {
7716   // Load/store intrinsics are overloaded on pointer's address space.
7717   // To match the right intrinsic, we need to build AS-constrained PatFrag.
7718   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7719   dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7720   dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
7721   // Build PatFrag that only matches particular address space.
7722   PatFrag IntrFrag = PatFrag<PFOperands,
7723                              PFOperandsIntr,
7724                              !cond(!eq(Space, ".shared"): AS_match.shared,
7725                                    !eq(Space, ".global"): AS_match.global,
7726                                    true: AS_match.generic)>;
7727   // Build AS-constrained pattern.
7728   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7730   let OutOperandList = Frag.Outs;
7731   let InOperandList = !con(Args, (ins MmaCode:$ptx));
7732   let AsmString = "wmma.load."
7733                   # Frag.frag
7734                   # ".sync"
7735                   # "${ptx:aligned}"
7736                   # "." # Layout
7737                   # "." # Frag.geom
7738                   # Space
7739                   # "." # Frag.ptx_elt_type # " \t"
7740                   # Frag.regstring
7741                   # ", [$src]"
7742                   # !if(WithStride, ", $ldm", "")
7743                   # ";";
7747 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7749 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7750                    bit WithStride, DAGOperand DstOp>
7751   : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7752                [!con((ins DstOp:$dst),
7753                      Frag.Ins,
7754                      !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7755     Requires<Frag.Predicates> {
7757   // Load/store intrinsics are overloaded on pointer's address space.
7758   // To match the right intrinsic, we need to build AS-constrained PatFrag.
7759   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7760   dag PFOperands = !con((ops node:$dst),
7761                         !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
7762                         !if(WithStride, (ops node:$ldm), (ops)));
7763   // Build PatFrag that only matches particular address space.
7764   PatFrag IntrFrag = PatFrag<PFOperands,
7765                              !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7766                              !cond(!eq(Space, ".shared"): AS_match.shared,
7767                                    !eq(Space, ".global"): AS_match.global,
7768                                    true: AS_match.generic)>;
7769   // Build AS-constrained pattern.
7770   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7772   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7773   let OutOperandList = (outs);
7774   let AsmString = "wmma.store.d.sync"
7775                   # "${ptx:aligned}"
7776                   # "." # Layout
7777                   # "." # Frag.geom
7778                   # Space
7779                   # "." # Frag.ptx_elt_type
7780                   # " \t[$dst],"
7781                   # Frag.regstring
7782                   # !if(WithStride, ", $ldm", "")
7783                   # ";";
7786 // Create all load/store variants
7787 defset list<WMMA_INSTR> MMA_LDSTs  = {
7788   foreach layout = ["row", "col"] in {
7789     foreach stride = [false, true] in {
7790       foreach space = [".global", ".shared", ""] in {
7791         foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7792           foreach frag = NVVM_MMA_OPS.all_ld_ops in
7793             if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7794               def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
7795           foreach frag = NVVM_MMA_OPS.all_st_ops in
7796             if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7797               def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
7798         } // addr
7799       } // space
7800     } // stride
7801   } // layout
7802 } // defset
7804 // B1 instruction variants need extra constraints.
7805 class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
7806   string Op = b1op;
7807   WMMA_REGINFO Frag = FragA;
7808   list<Predicate> ret = !listconcat(
7809     FragA.Predicates,
7810     !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
7811   );
7813 // WMMA.MMA
7814 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7815                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7816                string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
7817   : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
7818                          [FragA.Ins, FragB.Ins, FragC.Ins]>,
7819     // Requires does not seem to have effect on Instruction w/o Patterns.
7820     // We set it here anyways and propagate to the Pat<> we construct below.
7821     Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7822   let OutOperandList = FragD.Outs;
7823   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7824   string TypeList = !cond(
7825     !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
7826                                      # "." # FragC.ptx_elt_type,
7827     1: "." # FragD.ptx_elt_type
7828        # "." # FragA.ptx_elt_type
7829        # "." # FragB.ptx_elt_type
7830        # "." # FragC.ptx_elt_type,
7831   );
7832   let AsmString = "wmma.mma"
7833                   # b1op
7834                   # ".sync"
7835                   # "${ptx:aligned}"
7836                   # "." # ALayout
7837                   # "." # BLayout
7838                   # "." # FragA.geom
7839                   # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
7840                   # TypeList
7841                   # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7842                   # FragD.regstring # ",\n\t\t"
7843                   # FragA.regstring # ",\n\t\t"
7844                   # FragB.regstring # ",\n\t\t"
7845                   # FragC.regstring # ";";
7848 defset list<WMMA_INSTR> WMMAs  = {
7849   foreach layout_a = ["row", "col"] in {
7850     foreach layout_b = ["row", "col"] in {
7851       foreach satf = [0, 1] in {
7852         foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
7853           foreach op = NVVM_MMA_OPS.all_wmma_ops in {
7854             foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7855               if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
7856                 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
7857                               WMMA_REGINFO<op[1], "wmma.mma">,
7858                               WMMA_REGINFO<op[2], "wmma.mma">,
7859                               WMMA_REGINFO<op[3], "wmma.mma">,
7860                               layout_a, layout_b, satf, rnd, b1op>;
7861               }
7862             } // b1op
7863           } // op
7864         } // rnd
7865       } // satf
7866     } // layout_b
7867   } // layout_a
7868 } // defset
7870 // MMA
7871 class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7872                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7873                string ALayout, string BLayout, int Satfinite, string b1op>
7874   : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
7875                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
7876     // Requires does not seem to have effect on Instruction w/o Patterns.
7877     // We set it here anyways and propagate to the Pat<> we construct below.
7878   Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7879   let OutOperandList = FragD.Outs;
7880   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7881   string TypeList = "." # FragD.ptx_elt_type
7882                     # "." # FragA.ptx_elt_type
7883                     # "." # FragB.ptx_elt_type
7884                     # "." # FragC.ptx_elt_type;
7885   let AsmString = "mma.sync.aligned."
7886                   # FragA.geom
7887                   # "." # ALayout
7888                   # "." # BLayout
7889                   # !if(Satfinite, ".satfinite", "")
7890                   # TypeList
7891                   # b1op # "\n\t\t"
7892                   # FragD.regstring # ",\n\t\t"
7893                   # FragA.regstring # ",\n\t\t"
7894                   # FragB.regstring # ",\n\t\t"
7895                   # FragC.regstring # ";";
7898 defset list<WMMA_INSTR> MMAs  = {
7899   foreach layout_a = ["row", "col"] in {
7900     foreach layout_b = ["row", "col"] in {
7901       foreach satf = [0, 1] in {
7902         foreach op = NVVM_MMA_OPS.all_mma_ops in {
7903           foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7904             if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
7905               def : MMA<WMMA_REGINFO<op[0], "mma">,
7906                         WMMA_REGINFO<op[1], "mma">,
7907                         WMMA_REGINFO<op[2], "mma">,
7908                         WMMA_REGINFO<op[3], "mma">,
7909                         layout_a, layout_b, satf, b1op>;
7910             }
7911           } // b1op
7912         } // op
7913       } // satf
7914     } // layout_b
7915   } // layout_a
7916 } // defset
7919 // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
7921 class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
7922                DAGOperand SrcOp>
7923   : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
7924     Requires<Frag.Predicates> {
7925   // Build PatFrag that only matches particular address space.
7926   PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
7927                              !cond(!eq(Space, ".shared"): AS_match.shared,
7928                                    true: AS_match.generic)>;
7929   // Build AS-constrained pattern.
7930   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7932   let OutOperandList = Frag.Outs;
7933   let InOperandList = !con(Args, (ins MmaCode:$ptx));
7934   let AsmString = "ldmatrix.sync.aligned."
7935                   # Frag.geom
7936                   # "." # Frag.frag
7937                   # !if(Transposed, ".trans", "")
7938                   # Space
7939                   # "." # Frag.ptx_elt_type
7940                   # " " # Frag.regstring # ", [$src];";
7943 // Create all ldmatrix variants
7944 defset list<WMMA_INSTR> LDMATRIXs  = {
7945   foreach transposed = [false, true] in {
7946     foreach space = [".shared", ""] in {
7947       foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7948         foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
7949           if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
7950             def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
7951                             addr>;
7952       } // addr
7953     } // space
7954   } // transposed
7955 } // defset
7957 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7958 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7959 // the instruction record.
7960 class MMA_PAT<WMMA_INSTR wi>
7961       : Pat<wi.IntrinsicPattern,
7962             !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7963                  (wi ptx.version))>,
7964         Requires<wi.Predicates>;
7966 // Build intrinsic->instruction patterns for all MMA instructions.
7967 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
7968   def : MMA_PAT<mma>;