Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
blob85eae44f349aa379e66e9fbcac359c6bff37a6e7
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10     float f = (float)N->getValueAPF().convertToFloat();
11     return (f==0.0f);
12 }]>;
14 def immFloat1 : PatLeaf<(fpimm), [{
15     float f = (float)N->getValueAPF().convertToFloat();
16     return (f==1.0f);
17 }]>;
19 def immDouble0 : PatLeaf<(fpimm), [{
20     double d = (double)N->getValueAPF().convertToDouble();
21     return (d==0.0);
22 }]>;
24 def immDouble1 : PatLeaf<(fpimm), [{
25     double d = (double)N->getValueAPF().convertToDouble();
26     return (d==1.0);
27 }]>;
29 def AS_match {
30   code generic = [{
31    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32   }];
33   code shared = [{
34    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35   }];
36   code global = [{
37    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38   }];
41 // A node that will be replaced with the current PTX version.
42 class PTX {
43   SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44     return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45   }]>;
46   // (i32 0) will be XForm'ed to the currently used PTX version.
47   dag version = (PTXVerXform (i32 0));
49 def ptx : PTX;
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54   list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                         [prefix # !sub(n, 1)]),
56                             []);
59 class THREADMASK_INFO<bit sync> {
60   list<bit> ret = !if(sync, [0, 1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = true in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                   "bar.sync \t0;",
69       [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                   "bar.sync \t$src1;",
72       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                   "bar.sync \t$src1, $src2;",
75       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77   !strconcat("{{ \n\t",
78              ".reg .pred \t%p1; \n\t",
79              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81              "}}"),
82       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84   !strconcat("{{ \n\t",
85              ".reg .pred \t%p1; \n\t",
86              ".reg .pred \t%p2; \n\t",
87              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90              "}}"),
91       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93   !strconcat("{{ \n\t",
94              ".reg .pred \t%p1; \n\t",
95              ".reg .pred \t%p2; \n\t",
96              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99              "}}"),
100       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                              [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                              [(int_nvvm_bar_warp_sync imm:$i)]>,
107         Requires<[hasPTX<60>, hasSM<30>]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                              [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110         Requires<[hasPTX<60>, hasSM<30>]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                    [(int_nvvm_barrier_sync imm:$i)]>,
114         Requires<[hasPTX<60>, hasSM<30>]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                    [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117         Requires<[hasPTX<60>, hasSM<30>]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                  "barrier.sync \t$id, $cnt;",
121                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122         Requires<[hasPTX<60>, hasSM<30>]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                  "barrier.sync \t$id, $cnt;",
125                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126         Requires<[hasPTX<60>, hasSM<30>]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                  "barrier.sync \t$id, $cnt;",
129                  [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130         Requires<[hasPTX<60>, hasSM<30>]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                  "barrier.sync \t$id, $cnt;",
133                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134         Requires<[hasPTX<60>, hasSM<30>]>;
135 class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
136                           list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
137         NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
138         Requires<Preds>;
140 def barrier_cluster_arrive:
141         INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
142 def barrier_cluster_arrive_relaxed:
143         INT_BARRIER_CLUSTER<"arrive.relaxed",
144         int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
145 def barrier_cluster_wait:
146         INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
148 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
149                  bit offset_imm, bit mask_imm, bit threadmask_imm>
150       : NVPTXInst<(outs), (ins), "?", []> {
151   NVPTXRegClass rc = !cond(
152     !eq(reg, "i32"): Int32Regs,
153     !eq(reg, "f32"): Float32Regs);
154   string IntrName = "int_nvvm_shfl_"
155                     # !if(sync, "sync_", "")
156                     # mode
157                     # "_" # reg
158                     # !if(return_pred, "p", "");
159   Intrinsic Intr = !cast<Intrinsic>(IntrName);
160   let InOperandList = !con(
161     !if(sync,
162         !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
163         (ins)),
164     (ins rc:$src),
165     !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
166     !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
167     );
168   let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
169   let AsmString = "shfl."
170      # !if(sync, "sync.", "")
171      # mode # ".b32\t"
172      # "$dst"
173      # !if(return_pred, "|$pred", "") # ", "
174      # "$src, $offset, $mask"
175      # !if(sync, ", $threadmask", "")
176      # ";"
177      ;
178   let Pattern = [!con(
179       !foreach(tmp, OutOperandList,
180              !subst(outs, set,
181              !subst(i32imm, imm, tmp))),
182       (set !foreach(tmp, InOperandList,
183              !subst(ins, Intr,
184              !subst(i32imm, imm, tmp))))
185   )];
188 foreach sync = [false, true] in {
189   foreach mode = ["up", "down", "bfly", "idx"] in {
190     foreach regclass = ["i32", "f32"] in {
191       foreach return_pred = [false, true] in {
192         foreach offset_imm = [false, true] in {
193           foreach mask_imm = [false, true] in {
194             foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
195               def : SHFL_INSTR<sync, mode, regclass, return_pred,
196                                offset_imm, mask_imm, threadmask_imm>,
197                     Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
198             }
199           }
200         }
201       }
202     }
203   }
206 // vote.{all,any,uni,ballot}
207 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
208   def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
209               "vote." # mode # " \t$dest, $pred;",
210               [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
211         Requires<[hasPTX<60>, hasSM<30>]>;
214 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
215 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
216 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
217 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
219 // vote.sync.{all,any,uni,ballot}
220 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
221   def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
222               "vote.sync." # mode # " \t$dest, $pred, $mask;",
223               [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
224           Requires<[hasPTX<60>, hasSM<30>]>;
225   def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
226               "vote.sync." # mode #" \t$dest, $pred, $mask;",
227               [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
228           Requires<[hasPTX<60>, hasSM<30>]>;
231 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
232 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
233 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
234 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
236 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
237                           Operand ImmOp> {
238   def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
239               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240               [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
241            Requires<[hasPTX<60>, hasSM<70>]>;
242   def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
243               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
244               [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
245            Requires<[hasPTX<60>, hasSM<70>]>;
246   def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
247               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
248               [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
249            Requires<[hasPTX<60>, hasSM<70>]>;
250   def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
251               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
252               [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
253            Requires<[hasPTX<60>, hasSM<70>]>;
256 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
257                                         i32imm>;
258 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
259                                         i64imm>;
261 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
262                           Operand ImmOp> {
263   def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
264                      (ins i32imm:$mask, ImmOp:$value),
265               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
266               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
267            Requires<[hasPTX<60>, hasSM<70>]>;
268   def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
269                      (ins Int32Regs:$mask, ImmOp:$value),
270               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
271               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
272            Requires<[hasPTX<60>, hasSM<70>]>;
273   def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
274                      (ins i32imm:$mask, regclass:$value),
275               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
276               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
277            Requires<[hasPTX<60>, hasSM<70>]>;
278   def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
279                      (ins Int32Regs:$mask, regclass:$value),
280               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
281               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
282            Requires<[hasPTX<60>, hasSM<70>]>;
284 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
285                                          i32imm>;
286 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
287                                          i64imm>;
289 multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
290   def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
291           "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
292           [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
293         Requires<[hasPTX<70>, hasSM<80>]>;
296 defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
297 defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
298 defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
299 defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
300 defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
301 defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
302 defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
303 defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
305 } // isConvergent = true
307 //-----------------------------------
308 // Explicit Memory Fence Functions
309 //-----------------------------------
310 class MEMBAR<string StrOp, Intrinsic IntOP> :
311               NVPTXInst<(outs), (ins),
312             StrOp, [(IntOP)]>;
314 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
315 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
316 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
318 def INT_FENCE_SC_CLUSTER:
319        MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
320        Requires<[hasPTX<78>, hasSM<90>]>;
322 //-----------------------------------
323 // Async Copy Functions
324 //-----------------------------------
326 multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
327   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
328             !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
329             [(Intrin Int32Regs:$addr)]>,
330     Requires<[hasPTX<70>, hasSM<80>]>;
331   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
332             !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
333             [(Intrin Int64Regs:$addr)]>,
334     Requires<[hasPTX<70>, hasSM<80>]>;
337 defm CP_ASYNC_MBARRIER_ARRIVE :
338   CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
339 defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
340   CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
341 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
342   CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
343 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
344   CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
346 multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
347   def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
348             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
349             [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
350     Requires<[hasPTX<70>, hasSM<80>]>;
351   def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
352             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
353             [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
354     Requires<[hasPTX<70>, hasSM<80>]>;
355   // Variant with src_size parameter
356   def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
357              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
358              [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
359     Requires<[hasPTX<70>, hasSM<80>]>;
360   def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
361              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
362              [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
363     Requires<[hasPTX<70>, hasSM<80>]>;
364   def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
365              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
366              [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
367     Requires<[hasPTX<70>, hasSM<80>]>;
368   def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
369              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
370              [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
371     Requires<[hasPTX<70>, hasSM<80>]>;
374 defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
375   CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
376                                       int_nvvm_cp_async_ca_shared_global_4_s>;
378 defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
379   CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
380                                       int_nvvm_cp_async_ca_shared_global_8_s>;
382 defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
383   CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
384                                        int_nvvm_cp_async_ca_shared_global_16_s>;
386 defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
387   CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
388                                        int_nvvm_cp_async_cg_shared_global_16_s>;
390 def CP_ASYNC_COMMIT_GROUP :
391   NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
392   Requires<[hasPTX<70>, hasSM<80>]>;
394 def CP_ASYNC_WAIT_GROUP :
395   NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
396   [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
397   Requires<[hasPTX<70>, hasSM<80>]>;
399 def CP_ASYNC_WAIT_ALL :
400   NVPTXInst<(outs), (ins), "cp.async.wait_all;",
401   [(int_nvvm_cp_async_wait_all)]>,
402   Requires<[hasPTX<70>, hasSM<80>]>;
404 //-----------------------------------
405 // MBarrier Functions
406 //-----------------------------------
408 multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
409   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
410            !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
411     [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
412     Requires<[hasPTX<70>, hasSM<80>]>;
413   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
414            !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
415     [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
416     Requires<[hasPTX<70>, hasSM<80>]>;
419 defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
420 defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
421                                           int_nvvm_mbarrier_init_shared>;
423 multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
424   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
425            !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
426     [(Intrin Int32Regs:$addr)]>,
427     Requires<[hasPTX<70>, hasSM<80>]>;
428   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
429            !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
430     [(Intrin Int64Regs:$addr)]>,
431     Requires<[hasPTX<70>, hasSM<80>]>;
434 defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
435 defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
436                                             int_nvvm_mbarrier_inval_shared>;
438 multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
439   def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
440            !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
441     [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
442     Requires<[hasPTX<70>, hasSM<80>]>;
443   def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
444            !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
445     [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
446     Requires<[hasPTX<70>, hasSM<80>]>;
449 defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
450 defm MBARRIER_ARRIVE_SHARED :
451   MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
453 multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
454   def _32 : NVPTXInst<(outs Int64Regs:$state),
455            (ins Int32Regs:$addr, Int32Regs:$count),
456            !strconcat("mbarrier.arrive.noComplete", AddrSpace,
457                       ".b64 $state, [$addr], $count;"),
458     [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
459     Requires<[hasPTX<70>, hasSM<80>]>;
460   def _64 : NVPTXInst<(outs Int64Regs:$state),
461            (ins Int64Regs:$addr, Int32Regs:$count),
462            !strconcat("mbarrier.arrive.noComplete", AddrSpace,
463                       ".b64 $state, [$addr], $count;"),
464     [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
465     Requires<[hasPTX<70>, hasSM<80>]>;
468 defm MBARRIER_ARRIVE_NOCOMPLETE :
469   MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
470 defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
471   MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
473 multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
474   def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
475            !strconcat("mbarrier.arrive_drop", AddrSpace,
476                       ".b64 $state, [$addr];"),
477            [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
478     Requires<[hasPTX<70>, hasSM<80>]>;
479   def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
480            !strconcat("mbarrier.arrive_drop", AddrSpace,
481                       ".b64 $state, [$addr];"),
482            [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
483     Requires<[hasPTX<70>, hasSM<80>]>;
486 defm MBARRIER_ARRIVE_DROP :
487   MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
488 defm MBARRIER_ARRIVE_DROP_SHARED :
489   MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
491 multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
492   def _32 : NVPTXInst<(outs Int64Regs:$state),
493            (ins Int32Regs:$addr, Int32Regs:$count),
494            !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
495                       ".b64 $state, [$addr], $count;"),
496            [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
497     Requires<[hasPTX<70>, hasSM<80>]>;
498   def _64 : NVPTXInst<(outs Int64Regs:$state),
499            (ins Int64Regs:$addr, Int32Regs:$count),
500            !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
501                       ".b64 $state, [$addr], $count;"),
502            [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
503     Requires<[hasPTX<70>, hasSM<80>]>;
506 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
507   MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
508 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
509   MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
510                        int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
512 multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
513   def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
514            !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
515            [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
516     Requires<[hasPTX<70>, hasSM<80>]>;
517   def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
518            !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
519            [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
520     Requires<[hasPTX<70>, hasSM<80>]>;
523 defm MBARRIER_TEST_WAIT :
524   MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
525 defm MBARRIER_TEST_WAIT_SHARED :
526   MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
528 class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
529            NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
530            "mbarrier.pending_count.b64 $res, $state;",
531            [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
532     Requires<[hasPTX<70>, hasSM<80>]>;
534 def MBARRIER_PENDING_COUNT :
535   MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
537 //-----------------------------------
538 // Math Functions
539 //-----------------------------------
541 // Map min(1.0, max(0.0, x)) to sat(x)
542 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
543 // NaN
544 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
545 // Same story for fmax, fmin.
547 def : Pat<(int_nvvm_fmin_f immFloat1,
548             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
549           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
550 def : Pat<(int_nvvm_fmin_f immFloat1,
551             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
552           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
553 def : Pat<(int_nvvm_fmin_f
554             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
555           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
556 def : Pat<(int_nvvm_fmin_f
557             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
558           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
560 def : Pat<(int_nvvm_fmin_d immDouble1,
561             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
562           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
563 def : Pat<(int_nvvm_fmin_d immDouble1,
564             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
565           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
566 def : Pat<(int_nvvm_fmin_d
567             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
568           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
569 def : Pat<(int_nvvm_fmin_d
570             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
571           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
574 // We need a full string for OpcStr here because we need to deal with case like
575 // INT_PTX_RECIP.
576 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
577   NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
578             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
579             OpcStr,
580         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
581         Requires<Preds>;
583 // We need a full string for OpcStr here because we need to deal with the case
584 // like INT_PTX_NATIVE_POWR_F.
585 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
586   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
587   list<Predicate> Preds = []>
588             : NVPTXInst<(outs t_regclass:$dst),
589               (ins s0_regclass:$src0, s1_regclass:$src1),
590             OpcStr,
591         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
592         Requires<Preds>;
594 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
595   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
596   NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
597             : NVPTXInst<(outs t_regclass:$dst),
598               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
599             OpcStr,
600         [(set t_regclass:$dst,
601           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
602           Requires<Preds>;
605 // MISC
608 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
609   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
612 // Min Max
615 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
616   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
617 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
618   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
619 def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
620   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
621   [hasPTX<70>, hasSM<80>]>;
622 def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
623   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
624   [hasPTX<70>, hasSM<80>]>;
625 def INT_NVVM_FMIN_XORSIGN_ABS_F :
626   F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
627     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
628     [hasPTX<72>, hasSM<86>]>;
629 def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
630   F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
631     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
632     [hasPTX<72>, hasSM<86>]>;
633 def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
634   F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
635     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
636     [hasPTX<72>, hasSM<86>]>;
637 def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
638   F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
639     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
640     [hasPTX<72>, hasSM<86>]>;
642 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
643   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
644 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
645   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
646 def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
647   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
648   [hasPTX<70>, hasSM<80>]>;
649 def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
650   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
651   [hasPTX<70>, hasSM<80>]>;
652 def INT_NVVM_FMAX_XORSIGN_ABS_F :
653   F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
654     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
655     [hasPTX<72>, hasSM<86>]>;
656 def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
657   F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
658     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
659     [hasPTX<72>, hasSM<86>]>;
660 def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
661   F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
662     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
663     [hasPTX<72>, hasSM<86>]>;
664 def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
665   F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
666     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
667     [hasPTX<72>, hasSM<86>]>;
669 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
670   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
671 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
672   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
675 // Min Max f16, f16x2, bf16, bf16x2
678 class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
679                     list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
680   string Variant = V;
681   Intrinsic Intr = I;
682   NVPTXRegClass RegClass = RC;
683   list<Predicate> Predicates = Preds;
686 multiclass MIN_MAX<string IntName> {
687   foreach P = [
688     MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
689       int_nvvm_fmax_f16), Int16Regs>,
690     MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
691       int_nvvm_fmax_ftz_f16), Int16Regs>,
692     MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
693       int_nvvm_fmax_nan_f16), Int16Regs>,
694     MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
695       int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
696     MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
697       int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
698       Int16Regs, [hasPTX<72>, hasSM<86>]>,
699     MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
700       int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
701       Int16Regs, [hasPTX<72>, hasSM<86>]>,
702     MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
703       int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
704       Int16Regs, [hasPTX<72>, hasSM<86>]>,
705     MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
706       int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
707       int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
708     MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
709       int_nvvm_fmax_f16x2), Int32Regs>,
710     MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
711       int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
712     MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
713       int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
714     MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
715       int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
716     MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
717       int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
718       Int32Regs, [hasPTX<72>, hasSM<86>]>,
719     MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
720       int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
721       Int32Regs, [hasPTX<72>, hasSM<86>]>,
722     MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
723       int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
724       Int32Regs, [hasPTX<72>, hasSM<86>]>,
725     MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
726       int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
727       int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
728       Int32Regs, [hasPTX<72>, hasSM<86>]>,
729     MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
730       int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
731     MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
732       int_nvvm_fmax_nan_bf16), Int16Regs>,
733     MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
734       int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
735       Int16Regs, [hasPTX<72>, hasSM<86>]>,
736     MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
737       int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
738       Int16Regs, [hasPTX<72>, hasSM<86>]>,
739     MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
740       int_nvvm_fmax_bf16x2), Int32Regs>,
741     MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
742       int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
743     MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
744       int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
745       Int32Regs, [hasPTX<72>, hasSM<86>]>,
746     MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
747       int_nvvm_fmin_nan_xorsign_abs_bf16x2,
748       int_nvvm_fmax_nan_xorsign_abs_bf16x2),
749       Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
750         def P.Variant : F_MATH_2<!strconcat(
751           IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
752           P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
753   }
756 defm INT_NVVM_FMIN : MIN_MAX<"min">;
757 defm INT_NVVM_FMAN : MIN_MAX<"max">;
760 // Multiplication
763 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
764   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
765 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
766   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
768 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
769   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
770 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
771   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
773 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
774   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
775 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
776   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
777 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
778   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
779 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
780   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
781 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
782   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
783 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
784   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
785 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
786   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
787 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
788   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
790 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
791   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
792 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
793   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
794 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
795   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
796 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
797   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
799 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
800   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
801 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
802   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
805 // Div
808 def INT_NVVM_DIV_APPROX_FTZ_F
809   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
810     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
811 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
812   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
814 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
815   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
816 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
817   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
818 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
819   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
820 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
821   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
822 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
823   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
824 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
825   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
826 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
827   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
828 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
829   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
831 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
832   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
833 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
834   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
835 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
836   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
837 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
838   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
841 // Sad
844 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
845   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
846 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
847   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
850 // Floor  Ceil
853 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
854           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
855 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
856           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
857 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
858           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
860 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
861           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
862 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
863           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
864 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
865           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
868 // Abs
871 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
872   Float32Regs, int_nvvm_fabs_ftz_f>;
873 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
874   Float32Regs, int_nvvm_fabs_f>;
876 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
877   Float64Regs, int_nvvm_fabs_d>;
880 // Abs, Neg bf16, bf16x2
883 def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
884   Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
885 def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
886   Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
887 def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
888   Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
889 def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
890   Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
893 // Round
896 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
897           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
898 def : Pat<(int_nvvm_round_f Float32Regs:$a),
899           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
900 def : Pat<(int_nvvm_round_d Float64Regs:$a),
901           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
904 // Trunc
907 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
908           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
909 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
910           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
911 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
912           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
915 // Saturate
918 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
919           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
920 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
921           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
922 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
923           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
926 // Exp2  Log2
929 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
930   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
931 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
932   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
933 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
934   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
935 def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
936   Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
937 def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
938   Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
940 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
941   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
942 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
943   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
944 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
945   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
948 // Sin  Cos
951 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
952   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
953 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
954   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
956 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
957   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
958 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
959   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
962 // Fma
965 class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
966                 list<Predicate> Preds = []> {
967   string Variant = V;
968   Intrinsic Intr = I;
969   NVPTXRegClass RegClass = RC;
970   list<Predicate> Predicates = Preds;
973 multiclass FMA_INST {
974   foreach P = [
975     FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
976     FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
977     FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
978     FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
980     FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
981     FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
982     FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
983     FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
984     FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
985     FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
986     FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
987     FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
989     FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
990     FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
991       [hasPTX<42>, hasSM<53>]>,
992     FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
993       [hasPTX<42>, hasSM<53>]>,
994     FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
995       [hasPTX<42>, hasSM<53>]>,
996     FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
997       [hasPTX<70>, hasSM<80>]>,
998     FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
999       [hasPTX<70>, hasSM<80>]>,
1001     FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1002     FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1003       [hasPTX<70>, hasSM<80>]>,
1004     FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1005       [hasPTX<70>, hasSM<80>]>,
1006     FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1007       [hasPTX<70>, hasSM<80>]>,
1008     FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1009       [hasPTX<70>, hasSM<80>]>,
1010     FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1011       [hasPTX<70>, hasSM<80>]>,
1013     FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1014       [hasPTX<42>, hasSM<53>]>,
1015     FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1016       [hasPTX<42>, hasSM<53>]>,
1017     FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1018       [hasPTX<42>, hasSM<53>]>,
1019     FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1020       Int32Regs, [hasPTX<42>, hasSM<53>]>,
1021     FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1022       [hasPTX<70>, hasSM<80>]>,
1023     FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1024       Int32Regs, [hasPTX<70>, hasSM<80>]>,
1025     FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1026       [hasPTX<70>, hasSM<80>]>,
1027     FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1028       [hasPTX<70>, hasSM<80>]>
1029   ] in {
1030     def P.Variant :
1031       F_MATH_3<!strconcat("fma",
1032         !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1033         P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1034   }
1037 defm INT_NVVM_FMA : FMA_INST;
1040 // Rcp
1043 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1044   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1045 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1046   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1047 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1048   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1049 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1050   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1051 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1052   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1053 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1054   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1055 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1056   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1057 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1058   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1060 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1061   Float64Regs, int_nvvm_rcp_rn_d>;
1062 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1063   Float64Regs, int_nvvm_rcp_rz_d>;
1064 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1065   Float64Regs, int_nvvm_rcp_rm_d>;
1066 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1067   Float64Regs, int_nvvm_rcp_rp_d>;
1069 def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1070   Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1071 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1072   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1075 // Sqrt
1078 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1079   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1080 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1081   Float32Regs, int_nvvm_sqrt_rn_f>;
1082 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1083   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1084 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1085   Float32Regs, int_nvvm_sqrt_rz_f>;
1086 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1087   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1088 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1089   Float32Regs, int_nvvm_sqrt_rm_f>;
1090 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1091   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1092 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1093   Float32Regs, int_nvvm_sqrt_rp_f>;
1094 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1095   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1096 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1097   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1099 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1100   Float64Regs, int_nvvm_sqrt_rn_d>;
1101 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1102   Float64Regs, int_nvvm_sqrt_rz_d>;
1103 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1104   Float64Regs, int_nvvm_sqrt_rm_d>;
1105 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1106   Float64Regs, int_nvvm_sqrt_rp_d>;
1108 // nvvm_sqrt intrinsic
1109 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1110           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1111 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1112           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1113 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1114           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1115 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1116           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1119 // Rsqrt
1122 def INT_NVVM_RSQRT_APPROX_FTZ_F
1123   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1124     int_nvvm_rsqrt_approx_ftz_f>;
1125 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1126   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1127 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1128   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1131 // Add
1134 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1135   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1136 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1137   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1138 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1139   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1140 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1141   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1142 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1143   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1144 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1145   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1146 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1147   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1148 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1149   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1151 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1152   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1153 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1154   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1155 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1156   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1157 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1158   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1161 // Convert
1164 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1165           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1166 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1167           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1168 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1169           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1170 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1171           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1172 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1173           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1174 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1175           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1176 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1177           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1178 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1179           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1181 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1182           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1183 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1184           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1185 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1186           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1187 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1188           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1190 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1191           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1192 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1193           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1194 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1195           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1196 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1197           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1199 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1200           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1201 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1202           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1203 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1204           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1205 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1206           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1208 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1209           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1210 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1211           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1212 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1213           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1214 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1215           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1217 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1218           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1219 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1220           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1221 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1222           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1223 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1224           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1225 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1226           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1227 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1228           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1229 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1230           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1231 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1232           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1234 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1235           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1236 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1237           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1238 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1239           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1240 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1241           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1242 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1243           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1244 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1245           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1246 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1247           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1248 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1249           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1251 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1252           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1253 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1254           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1255 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1256           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1257 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1258           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1260 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1261           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1262 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1263           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1264 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1265           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1266 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1267           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1269 def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1270           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1271 def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1272           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1273 def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1274           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1275 def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1276           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1278 def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1279           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1280 def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1281           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1282 def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1283           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1284 def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1285           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1287 def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1288           (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1289 def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1290           (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1291 def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1292           (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1293 def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1294           (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1296 def CVT_tf32_f32 :
1297    NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1298                    "cvt.rna.tf32.f32 \t$dest, $a;",
1299        [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1301 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1302   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1304 def INT_NVVM_D2I_LO : F_MATH_1<
1305   !strconcat("{{\n\t",
1306              ".reg .b32 %temp; \n\t",
1307              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1308              "}}"),
1309   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1310 def INT_NVVM_D2I_HI : F_MATH_1<
1311   !strconcat("{{\n\t",
1312              ".reg .b32 %temp; \n\t",
1313              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1314              "}}"),
1315   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1317 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1318           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1319 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1320           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1321 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1322           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1323 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1324           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1325 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1326           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1327 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1328           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1329 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1330           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1331 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1332           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1334 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1335           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1336 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1337           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1338 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1339           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1340 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1341           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1342 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1343           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1344 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1345           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1346 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1347           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1348 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1349           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1351 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1352           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1353 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1354           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1355 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1356           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1357 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1358           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1360 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1361           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1362 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1363           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1364 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1365           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1366 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1367           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1369 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1370           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1371 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1372           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1373 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1374           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1375 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1376           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1378 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1379           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1380 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1381           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1382 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1383           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1384 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1385           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1387 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1388           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1389 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1390           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1391 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1392           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1393 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1394           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1396 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1397           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1398 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1399           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1400 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1401           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1402 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1403           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1406 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1407           (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1408 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1409           (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1412 // Bitcast
1415 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1416   Float32Regs, int_nvvm_bitcast_f2i>;
1417 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1418   Int32Regs, int_nvvm_bitcast_i2f>;
1420 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1421   Int64Regs, int_nvvm_bitcast_ll2d>;
1422 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1423   Float64Regs, int_nvvm_bitcast_d2ll>;
1426 // FNS
1429 class INT_FNS_MBO<dag ins, dag Operands>
1430   : NVPTXInst<(outs Int32Regs:$dst), ins,
1431                "fns.b32 \t$dst, $mask, $base, $offset;",
1432                [(set Int32Regs:$dst, Operands )]>,
1433     Requires<[hasPTX<60>, hasSM<30>]>;
1435 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1436                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1437 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1438                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1439 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1440                      (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1441 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1442                      (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1443 def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1444                      (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1445 def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1446                      (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1447 def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1448                      (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1449 def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1450                      (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1452 //-----------------------------------
1453 // Atomic Functions
1454 //-----------------------------------
1456 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1457  : PatFrag<ops, frag, AS_match.global>;
1458 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1459  : PatFrag<ops, frag, AS_match.shared>;
1460 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1461  : PatFrag<ops, frag, AS_match.generic>;
1463 multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1464   ValueType regT, NVPTXRegClass regclass,
1465   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1466   Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1467   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1468     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1469     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1470   Requires<Pred>;
1471   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1472     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1473     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1474   Requires<Pred>;
1476 multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1477   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1478   list<Predicate> Pred = []> {
1479   defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1480     IntOp, IMMType, IMM, Pred>;
1481   defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1482     IntOp, IMMType, IMM, Pred>;
1485 // has 2 operands, neg the second one
1486 multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1487   ValueType regT, NVPTXRegClass regclass,
1488   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1489   list<Predicate> Pred> {
1490   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1491     !strconcat(
1492       "{{ \n\t",
1493       ".reg \t.s", TypeStr, " temp; \n\t",
1494       "neg.s", TypeStr, " \ttemp, $b; \n\t",
1495       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1496       "}}"),
1497     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1498   Requires<Pred>;
1500 multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1501   string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1502  defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1503    IntOp, Pred> ;
1504  defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1505    IntOp, Pred> ;
1508 // has 3 operands
1509 multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1510   ValueType regT, NVPTXRegClass regclass,
1511   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1512   Operand IMMType, list<Predicate> Pred> {
1513   def reg : NVPTXInst<(outs regclass:$dst),
1514     (ins ptrclass:$addr, regclass:$b, regclass:$c),
1515     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1516     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1517   Requires<Pred>;
1519   def imm1 : NVPTXInst<(outs regclass:$dst),
1520     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1521     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1522     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1523   Requires<Pred>;
1525   def imm2 : NVPTXInst<(outs regclass:$dst),
1526     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1527     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1528     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1529   Requires<Pred>;
1531   def imm3 : NVPTXInst<(outs regclass:$dst),
1532     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1533     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1534     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1535   Requires<Pred>;
1537 multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1538   string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1539   defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1540     IntOp, IMMType, Pred>;
1541   defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1542     IntOp, IMMType, Pred>;
1545 // atom_add
1547 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1548   (atomic_load_add_32 node:$a, node:$b)>;
1549 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1550   (atomic_load_add_32 node:$a, node:$b)>;
1551 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1552   (atomic_load_add_32 node:$a, node:$b)>;
1553 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1554   (atomic_load_add_64 node:$a, node:$b)>;
1555 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1556   (atomic_load_add_64 node:$a, node:$b)>;
1557 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1558   (atomic_load_add_64 node:$a, node:$b)>;
1559 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1560   (atomic_load_fadd node:$a, node:$b)>;
1561 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1562   (atomic_load_fadd node:$a, node:$b)>;
1563 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1564   (atomic_load_fadd node:$a, node:$b)>;
1566 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1567   atomic_load_add_32_g, i32imm, imm>;
1568 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1569   atomic_load_add_32_s, i32imm, imm>;
1570 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1571   atomic_load_add_32_gen, i32imm, imm>;
1572 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1573   ".add", atomic_load_add_32_gen, i32imm, imm>;
1575 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1576   atomic_load_add_64_g, i64imm, imm>;
1577 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1578   atomic_load_add_64_s, i64imm, imm>;
1579 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1580   atomic_load_add_64_gen, i64imm, imm>;
1581 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1582   ".add", atomic_load_add_64_gen, i64imm, imm>;
1584 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1585   atomic_load_add_g, f32imm, fpimm>;
1586 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1587   atomic_load_add_s, f32imm, fpimm>;
1588 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1589   atomic_load_add_gen, f32imm, fpimm>;
1591 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1592   atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1593 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1594   atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1595 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1596   atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1598 // atom_sub
1600 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1601   (atomic_load_sub_32 node:$a, node:$b)>;
1602 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1603   (atomic_load_sub_32 node:$a, node:$b)>;
1604 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1605   (atomic_load_sub_32 node:$a, node:$b)>;
1606 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1607   (atomic_load_sub_64 node:$a, node:$b)>;
1608 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1609   (atomic_load_sub_64 node:$a, node:$b)>;
1610 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1611   (atomic_load_sub_64 node:$a, node:$b)>;
1613 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1614   atomic_load_sub_32_g>;
1615 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1616   atomic_load_sub_64_g>;
1617 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1618   atomic_load_sub_32_gen>;
1619 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1620   ".add", atomic_load_sub_32_gen>;
1621 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1622   atomic_load_sub_32_s>;
1623 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1624   atomic_load_sub_64_s>;
1625 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1626   atomic_load_sub_64_gen>;
1627 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1628   ".add", atomic_load_sub_64_gen>;
1630 // atom_swap
1632 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1633   (atomic_swap_32 node:$a, node:$b)>;
1634 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1635   (atomic_swap_32 node:$a, node:$b)>;
1636 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1637   (atomic_swap_32 node:$a, node:$b)>;
1638 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1639   (atomic_swap_64 node:$a, node:$b)>;
1640 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1641   (atomic_swap_64 node:$a, node:$b)>;
1642 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1643   (atomic_swap_64 node:$a, node:$b)>;
1645 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1646   atomic_swap_32_g, i32imm, imm>;
1647 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1648   atomic_swap_32_s, i32imm, imm>;
1649 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1650   atomic_swap_32_gen, i32imm, imm>;
1651 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1652   ".exch", atomic_swap_32_gen, i32imm, imm>;
1653 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1654   atomic_swap_64_g, i64imm, imm>;
1655 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1656   atomic_swap_64_s, i64imm, imm>;
1657 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1658   atomic_swap_64_gen, i64imm, imm>;
1659 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1660   ".exch", atomic_swap_64_gen, i64imm, imm>;
1662 // atom_max
1664 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1665   , (atomic_load_max_32 node:$a, node:$b)>;
1666 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1667   (atomic_load_max_32 node:$a, node:$b)>;
1668 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1669   (atomic_load_max_32 node:$a, node:$b)>;
1670 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1671   , (atomic_load_max_64 node:$a, node:$b)>;
1672 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1673   (atomic_load_max_64 node:$a, node:$b)>;
1674 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1675   (atomic_load_max_64 node:$a, node:$b)>;
1676 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1677   (atomic_load_umax_32 node:$a, node:$b)>;
1678 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1679   (atomic_load_umax_32 node:$a, node:$b)>;
1680 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1681   (atomic_load_umax_32 node:$a, node:$b)>;
1682 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1683   (atomic_load_umax_64 node:$a, node:$b)>;
1684 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1685   (atomic_load_umax_64 node:$a, node:$b)>;
1686 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1687   (atomic_load_umax_64 node:$a, node:$b)>;
1689 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1690   ".max", atomic_load_max_32_g, i32imm, imm>;
1691 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1692   ".max", atomic_load_max_32_s, i32imm, imm>;
1693 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1694   atomic_load_max_32_gen, i32imm, imm>;
1695 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1696   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1697 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1698   ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
1699 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1700   ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
1701 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1702   atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1703 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1704   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1705 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1706   ".max", atomic_load_umax_32_g, i32imm, imm>;
1707 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1708   ".max", atomic_load_umax_32_s, i32imm, imm>;
1709 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1710   atomic_load_umax_32_gen, i32imm, imm>;
1711 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1712   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1713 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1714   ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
1715 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1716   ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
1717 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1718   atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1719 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1720   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1722 // atom_min
1724 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1725   (atomic_load_min_32 node:$a, node:$b)>;
1726 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1727   (atomic_load_min_32 node:$a, node:$b)>;
1728 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1729   (atomic_load_min_32 node:$a, node:$b)>;
1730 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1731   (atomic_load_min_64 node:$a, node:$b)>;
1732 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1733   (atomic_load_min_64 node:$a, node:$b)>;
1734 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1735   (atomic_load_min_64 node:$a, node:$b)>;
1736 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1737   (atomic_load_umin_32 node:$a, node:$b)>;
1738 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1739   (atomic_load_umin_32 node:$a, node:$b)>;
1740 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1741   (atomic_load_umin_32 node:$a, node:$b)>;
1742 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1743   (atomic_load_umin_64 node:$a, node:$b)>;
1744 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1745   (atomic_load_umin_64 node:$a, node:$b)>;
1746 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1747   (atomic_load_umin_64 node:$a, node:$b)>;
1749 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1750   ".min", atomic_load_min_32_g, i32imm, imm>;
1751 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1752   ".min", atomic_load_min_32_s, i32imm, imm>;
1753 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1754   atomic_load_min_32_gen, i32imm, imm>;
1755 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1756   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1757 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1758   ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
1759 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1760   ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
1761 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1762   atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1763 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1764   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1765 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1766   ".min", atomic_load_umin_32_g, i32imm, imm>;
1767 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1768   ".min", atomic_load_umin_32_s, i32imm, imm>;
1769 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1770   atomic_load_umin_32_gen, i32imm, imm>;
1771 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1772   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1773 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1774   ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
1775 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1776   ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
1777 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1778   atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1779 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1780   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1782 // atom_inc  atom_dec
1784 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1785   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1786 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1787   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1788 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1789   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1790 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1791   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1792 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1793   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1794 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1795   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1797 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1798   atomic_load_inc_32_g, i32imm, imm>;
1799 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1800   atomic_load_inc_32_s, i32imm, imm>;
1801 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1802   atomic_load_inc_32_gen, i32imm, imm>;
1803 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1804   ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1805 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1806   atomic_load_dec_32_g, i32imm, imm>;
1807 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1808   atomic_load_dec_32_s, i32imm, imm>;
1809 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1810   atomic_load_dec_32_gen, i32imm, imm>;
1811 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1812   ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1814 // atom_and
1816 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1817   (atomic_load_and_32 node:$a, node:$b)>;
1818 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1819   (atomic_load_and_32 node:$a, node:$b)>;
1820 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1821   (atomic_load_and_32 node:$a, node:$b)>;
1822 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1823   (atomic_load_and_64 node:$a, node:$b)>;
1824 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1825   (atomic_load_and_64 node:$a, node:$b)>;
1826 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1827   (atomic_load_and_64 node:$a, node:$b)>;
1829 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1830   atomic_load_and_32_g, i32imm, imm>;
1831 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1832   atomic_load_and_32_s, i32imm, imm>;
1833 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1834   atomic_load_and_32_gen, i32imm, imm>;
1835 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1836   ".and", atomic_load_and_32_gen, i32imm, imm>;
1837 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1838   atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
1839 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1840   atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
1841 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1842   atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1843 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1844   ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1846 // atom_or
1848 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1849   (atomic_load_or_32 node:$a, node:$b)>;
1850 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1851   (atomic_load_or_32 node:$a, node:$b)>;
1852 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1853   (atomic_load_or_32 node:$a, node:$b)>;
1854 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1855   (atomic_load_or_64 node:$a, node:$b)>;
1856 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1857   (atomic_load_or_64 node:$a, node:$b)>;
1858 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1859   (atomic_load_or_64 node:$a, node:$b)>;
1861 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1862   atomic_load_or_32_g, i32imm, imm>;
1863 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1864   atomic_load_or_32_gen, i32imm, imm>;
1865 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1866   ".or", atomic_load_or_32_gen, i32imm, imm>;
1867 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1868   atomic_load_or_32_s, i32imm, imm>;
1869 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1870   atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
1871 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1872   atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1873 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1874   ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1875 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1876   atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
1878 // atom_xor
1880 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1881   (atomic_load_xor_32 node:$a, node:$b)>;
1882 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1883   (atomic_load_xor_32 node:$a, node:$b)>;
1884 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1885   (atomic_load_xor_32 node:$a, node:$b)>;
1886 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1887   (atomic_load_xor_64 node:$a, node:$b)>;
1888 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1889   (atomic_load_xor_64 node:$a, node:$b)>;
1890 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1891   (atomic_load_xor_64 node:$a, node:$b)>;
1893 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1894   atomic_load_xor_32_g, i32imm, imm>;
1895 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1896   atomic_load_xor_32_s, i32imm, imm>;
1897 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1898   atomic_load_xor_32_gen, i32imm, imm>;
1899 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1900   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1901 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1902   atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
1903 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1904   atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
1905 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1906   atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1907 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1908   ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1910 // atom_cas
1912 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1913   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1914 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1915   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1916 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1917   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1918 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1919   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1920 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1921   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1922 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1923   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1925 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
1926   atomic_cmp_swap_32_g, i32imm>;
1927 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
1928   atomic_cmp_swap_32_s, i32imm>;
1929 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
1930   atomic_cmp_swap_32_gen, i32imm>;
1931 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
1932   ".cas", atomic_cmp_swap_32_gen, i32imm>;
1933 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
1934   atomic_cmp_swap_64_g, i64imm>;
1935 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
1936   atomic_cmp_swap_64_s, i64imm>;
1937 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
1938   atomic_cmp_swap_64_gen, i64imm>;
1939 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
1940   ".cas", atomic_cmp_swap_64_gen, i64imm>;
1942 // Support for scoped atomic operations.  Matches
1943 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1944 // and converts it into the appropriate instruction.
1945 // NOTE: not all possible combinations are implemented
1946 //  'space' is limited to generic as it's the only one needed to support CUDA.
1947 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1948 class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
1949                   dag ins, dag Operands>
1950       : NVPTXInst<(outs regclass:$result), ins,
1951                   AsmStr,
1952                   [(set (regT regclass:$result), Operands)]>,
1953         Requires<Preds>;
1955 // Define instruction variants for all addressing modes.
1956 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1957                        ValueType regT, NVPTXRegClass regclass, Operand ImmType,
1958                        SDNode Imm, ValueType ImmTy,
1959                        list<Predicate> Preds> {
1960   let AddedComplexity = 1 in {
1961     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1962                       (ins Int32Regs:$src, regclass:$b),
1963                       (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
1964     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1965                       (ins Int64Regs:$src, regclass:$b),
1966                       (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
1967   }
1968   // tablegen can't infer argument types from Intrinsic (though it can
1969   // from Instruction) so we have to enforce specific type on
1970   // immediates via explicit cast to ImmTy.
1971   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1972                     (ins Int32Regs:$src, ImmType:$b),
1973                     (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
1974   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1975                     (ins Int64Regs:$src, ImmType:$b),
1976                     (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
1979 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1980                        ValueType regT, NVPTXRegClass regclass,
1981                        Operand ImmType, SDNode Imm, ValueType ImmTy,
1982                        list<Predicate> Preds> {
1983   // Variants for register/immediate permutations of $b and $c
1984   let AddedComplexity = 2 in {
1985     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1986                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
1987                       (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
1988     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1989                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
1990                       (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
1991   }
1992   let AddedComplexity = 1 in {
1993     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1994                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1995                       (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
1996     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1997                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1998                       (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
1999     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2000                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2001                       (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2002     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2003                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2004                       (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2005   }
2006   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2007                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2008                     (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2009   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2010                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2011                     (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2014 // Constructs intrinsic name and instruction asm strings.
2015 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2016                        string ScopeStr, string SpaceStr,
2017                        ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2018                        ValueType ImmTy, list<Predicate> Preds> {
2019   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2020                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2021                             # "." # OpStr # "." # TypeStr
2022                             # " \t$result, [$src], $b;",
2023                      !cast<Intrinsic>(
2024                             "int_nvvm_atomic_" # OpStr
2025                             # "_" # SpaceStr # "_" # IntTypeStr
2026                             # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2027                      regT, regclass, ImmType, Imm, ImmTy, Preds>;
2029 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2030                        string ScopeStr, string SpaceStr,
2031                        ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2032                        ValueType ImmTy, list<Predicate> Preds> {
2033   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2034                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2035                             # "." # OpStr # "." # TypeStr
2036                             # " \t$result, [$src], $b, $c;",
2037                      !cast<Intrinsic>(
2038                             "int_nvvm_atomic_" # OpStr
2039                             # "_" # SpaceStr # "_" # IntTypeStr
2040                             # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2041                      regT, regclass, ImmType, Imm, ImmTy, Preds>;
2044 // Constructs variants for different address spaces.
2045 // For now we only need variants for generic space pointers.
2046 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2047                        string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2048                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2049    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2050                             regT, regclass, ImmType, Imm, ImmTy, Preds>;
2052 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2053                        string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2054                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2055    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2056                             regT, regclass, ImmType, Imm, ImmTy, Preds>;
2059 // Constructs variants for different scopes of atomic op.
2060 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2061                        ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2062                        ValueType ImmTy, list<Predicate> Preds> {
2063    // .gpu scope is default and is currently covered by existing
2064    // atomics w/o explicitly specified scope.
2065    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2066                            regT, regclass, ImmType, Imm, ImmTy,
2067                            !listconcat(Preds,[hasAtomScope])>;
2068    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2069                            regT, regclass, ImmType, Imm, ImmTy,
2070                            !listconcat(Preds,[hasAtomScope])>;
2072 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2073            ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2074            list<Predicate> Preds> {
2075    // No need to define ".gpu"-scoped atomics.  They do the same thing
2076    // as the regular, non-scoped atomics defined elsewhere.
2077    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2078                            regT, regclass, ImmType, Imm, ImmTy,
2079                            !listconcat(Preds,[hasAtomScope])>;
2080    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2081                            regT, regclass, ImmType, Imm, ImmTy,
2082                            !listconcat(Preds,[hasAtomScope])>;
2085 // atom.add
2086 multiclass ATOM2_add_impl<string OpStr> {
2087    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2088    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2089    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2090    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2091                             []>;
2092    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2093                             [hasAtomAddF64]>;
2096 // atom.{and,or,xor}
2097 multiclass ATOM2_bitwise_impl<string OpStr> {
2098    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2099    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2100                             [hasAtomBitwise64]>;
2103 // atom.exch
2104 multiclass ATOM2_exch_impl<string OpStr> {
2105    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2106    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2109 // atom.{min,max}
2110 multiclass ATOM2_minmax_impl<string OpStr> {
2111    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2112    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2113    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2114                             [hasAtomMinMax64]>;
2115    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2116                             [hasAtomMinMax64]>;
2119 // atom.{inc,dec}
2120 multiclass ATOM2_incdec_impl<string OpStr> {
2121    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2124 // atom.cas
2125 multiclass ATOM3_cas_impl<string OpStr> {
2126    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2127    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2130 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2131 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2132 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2133 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2134 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2135 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2136 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2137 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2138 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
2139 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2141 //-----------------------------------
2142 // Support for ldu on sm_20 or later
2143 //-----------------------------------
2145 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
2146 // read-only in a kernel.
2148 // Scalar
2150 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2151   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2152                !strconcat("ldu.global.", TyStr),
2153                       []>, Requires<[hasLDU]>;
2154   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2155                !strconcat("ldu.global.", TyStr),
2156                         []>, Requires<[hasLDU]>;
2157  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2158                !strconcat("ldu.global.", TyStr),
2159                       []>, Requires<[hasLDU]>;
2160  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2161                !strconcat("ldu.global.", TyStr),
2162                       []>, Requires<[hasLDU]>;
2163  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2164                !strconcat("ldu.global.", TyStr),
2165                         []>, Requires<[hasLDU]>;
2168 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2169 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2170 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2171 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2172 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2173 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2175 // vector
2177 // Elementized vector ldu
2178 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2179  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2180                      (ins Int32Regs:$src),
2181                      !strconcat("ldu.global.", TyStr), []>;
2182  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2183                      (ins Int64Regs:$src),
2184                      !strconcat("ldu.global.", TyStr), []>;
2185  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2186                      (ins MEMri:$src),
2187                      !strconcat("ldu.global.", TyStr), []>;
2188  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2189                      (ins MEMri64:$src),
2190                      !strconcat("ldu.global.", TyStr), []>;
2191  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2192                      (ins imemAny:$src),
2193                      !strconcat("ldu.global.", TyStr), []>;
2196 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2197  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2198                             regclass:$dst4), (ins Int32Regs:$src),
2199                !strconcat("ldu.global.", TyStr), []>;
2200  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2201                             regclass:$dst4), (ins Int64Regs:$src),
2202                !strconcat("ldu.global.", TyStr), []>;
2203  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2204                             regclass:$dst4), (ins MEMri:$src),
2205                !strconcat("ldu.global.", TyStr), []>;
2206  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2207                             regclass:$dst4), (ins MEMri64:$src),
2208                !strconcat("ldu.global.", TyStr), []>;
2209  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2210                             regclass:$dst4), (ins imemAny:$src),
2211                !strconcat("ldu.global.", TyStr), []>;
2214 defm INT_PTX_LDU_G_v2i8_ELE
2215   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2216 defm INT_PTX_LDU_G_v2i16_ELE
2217   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2218 defm INT_PTX_LDU_G_v2i32_ELE
2219   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2220 defm INT_PTX_LDU_G_v2f32_ELE
2221   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2222 defm INT_PTX_LDU_G_v2i64_ELE
2223   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2224 defm INT_PTX_LDU_G_v2f64_ELE
2225   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2226 defm INT_PTX_LDU_G_v4i8_ELE
2227   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2228 defm INT_PTX_LDU_G_v4i16_ELE
2229   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2230     Int16Regs>;
2231 defm INT_PTX_LDU_G_v4i32_ELE
2232   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2233     Int32Regs>;
2234 defm INT_PTX_LDU_G_v4f16_ELE
2235   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2236     Int16Regs>;
2237 defm INT_PTX_LDU_G_v4f16x2_ELE
2238   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2239     Int32Regs>;
2240 defm INT_PTX_LDU_G_v4f32_ELE
2241   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2242     Float32Regs>;
2245 //-----------------------------------
2246 // Support for ldg on sm_35 or later
2247 //-----------------------------------
2249 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
2250 // non-coherent texture cache, and therefore the values read must be read-only
2251 // during the lifetime of the kernel.
2253 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2254   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2255                !strconcat("ld.global.nc.", TyStr),
2256                       []>, Requires<[hasLDG]>;
2257   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2258                !strconcat("ld.global.nc.", TyStr),
2259                         []>, Requires<[hasLDG]>;
2260  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2261                !strconcat("ld.global.nc.", TyStr),
2262                       []>, Requires<[hasLDG]>;
2263  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2264                !strconcat("ld.global.nc.", TyStr),
2265                       []>, Requires<[hasLDG]>;
2266  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2267                !strconcat("ld.global.nc.", TyStr),
2268                         []>, Requires<[hasLDG]>;
2271 defm INT_PTX_LDG_GLOBAL_i8
2272   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2273 defm INT_PTX_LDG_GLOBAL_i16
2274   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2275 defm INT_PTX_LDG_GLOBAL_i32
2276   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2277 defm INT_PTX_LDG_GLOBAL_i64
2278   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2279 defm INT_PTX_LDG_GLOBAL_f32
2280   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2281 defm INT_PTX_LDG_GLOBAL_f64
2282   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2284 // vector
2286 // Elementized vector ldg
2287 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2288  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2289                      (ins Int32Regs:$src),
2290                      !strconcat("ld.global.nc.", TyStr), []>;
2291  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2292                      (ins Int64Regs:$src),
2293                      !strconcat("ld.global.nc.", TyStr), []>;
2294  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2295                      (ins MEMri:$src),
2296                      !strconcat("ld.global.nc.", TyStr), []>;
2297  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2298                      (ins MEMri64:$src),
2299                      !strconcat("ld.global.nc.", TyStr), []>;
2300  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2301                      (ins imemAny:$src),
2302                      !strconcat("ld.global.nc.", TyStr), []>;
2305 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2306   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2307                               regclass:$dst4), (ins Int32Regs:$src),
2308                !strconcat("ld.global.nc.", TyStr), []>;
2309   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2310                                regclass:$dst4), (ins Int64Regs:$src),
2311                !strconcat("ld.global.nc.", TyStr), []>;
2312   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2313                               regclass:$dst4), (ins MEMri:$src),
2314                !strconcat("ld.global.nc.", TyStr), []>;
2315   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2316                               regclass:$dst4), (ins MEMri64:$src),
2317                !strconcat("ld.global.nc.", TyStr), []>;
2318   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2319                              regclass:$dst4), (ins imemAny:$src),
2320                !strconcat("ld.global.nc.", TyStr), []>;
2323 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2324 defm INT_PTX_LDG_G_v2i8_ELE
2325   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2326 defm INT_PTX_LDG_G_v2i16_ELE
2327   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2328 defm INT_PTX_LDG_G_v2i32_ELE
2329   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2330 defm INT_PTX_LDG_G_v2f32_ELE
2331   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2332 defm INT_PTX_LDG_G_v2i64_ELE
2333   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2334 defm INT_PTX_LDG_G_v2f64_ELE
2335   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2336 defm INT_PTX_LDG_G_v4i8_ELE
2337   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2338 defm INT_PTX_LDG_G_v4i16_ELE
2339   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2340 defm INT_PTX_LDG_G_v4i32_ELE
2341   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2342 defm INT_PTX_LDG_G_v4f32_ELE
2343   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2346 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2347    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2348           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2349       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2350    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2351           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2352       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2353    def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2354           "{{ .reg .b64 %tmp;\n\t"
2355           #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2356           #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2357       [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2358       Requires<[useShortPtr]>;
2361 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2362    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2363           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2364       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2365    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2366           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2367       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2368    def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2369           "{{ .reg .b64 %tmp;\n\t"
2370           #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2371           #"  cvt.u32.u64 \t$result, %tmp; }}",
2372       [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2373       Requires<[useShortPtr]>;
2376 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2377 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2378 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2379 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2381 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2382 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2383 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2384 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2387 // nvvm.ptr.gen.to.param
2388 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2389   (ins Int32Regs:$src),
2390                         "mov.u32 \t$result, $src;",
2391                               [(set Int32Regs:$result,
2392                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2393 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2394   (ins Int64Regs:$src),
2395                         "mov.u64 \t$result, $src;",
2396                               [(set Int64Regs:$result,
2397                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2400 // nvvm.move intrinsicc
2401 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2402                              "mov.b16 \t$r, $s;",
2403                              [(set Int16Regs:$r,
2404                                (int_nvvm_move_i16 Int16Regs:$s))]>;
2405 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2406                              "mov.b32 \t$r, $s;",
2407                              [(set Int32Regs:$r,
2408                                (int_nvvm_move_i32 Int32Regs:$s))]>;
2409 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2410                              "mov.b64 \t$r, $s;",
2411                              [(set Int64Regs:$r,
2412                                (int_nvvm_move_i64 Int64Regs:$s))]>;
2413 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2414                              "mov.f32 \t$r, $s;",
2415                              [(set Float32Regs:$r,
2416                                (int_nvvm_move_float Float32Regs:$s))]>;
2417 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2418                              "mov.f64 \t$r, $s;",
2419                              [(set Float64Regs:$r,
2420                                (int_nvvm_move_double Float64Regs:$s))]>;
2421 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2422                              "mov.u32 \t$r, $s;",
2423                              [(set Int32Regs:$r,
2424                                (int_nvvm_move_ptr Int32Regs:$s))]>;
2425 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2426                              "mov.u64 \t$r, $s;",
2427                              [(set Int64Regs:$r,
2428                                (int_nvvm_move_ptr Int64Regs:$s))]>;
2430 // @TODO: Are these actually needed, or will we always just see symbols
2431 // copied to registers first?
2432 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2433                              "mov.u32 \t$r, $s;",
2434                              [(set Int32Regs:$r,
2435                              (int_nvvm_move_ptr texternalsym:$s))]>;
2436 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2437                              "mov.u64 \t$r, $s;",
2438                              [(set Int64Regs:$r,
2439                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
2442 // MoveParam        %r1, param
2443 // ptr_local_to_gen %r2, %r1
2444 // ptr_gen_to_local %r3, %r2
2445 // ->
2446 // mov %r1, param
2448 // @TODO: Revisit this.  There is a type
2449 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2450 // instructions are not currently defined. However, we can use the ptr
2451 // variants and the asm printer will do the right thing.
2452 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2453                 (MoveParam texternalsym:$src)))),
2454                (nvvm_move_ptr64  texternalsym:$src)>;
2455 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2456                 (MoveParam texternalsym:$src)))),
2457                (nvvm_move_ptr32  texternalsym:$src)>;
2459 def texsurf_handles
2460   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2461               "mov.u64 \t$result, $src;", []>;
2463 //-----------------------------------
2464 // Compiler Error Warn
2465 // - Just ignore them in codegen
2466 //-----------------------------------
2468 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2469                 "// llvm.nvvm.compiler.warn()",
2470                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2471 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2472                 "// llvm.nvvm.compiler.warn()",
2473                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2474 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2475                 "// llvm.nvvm.compiler.error()",
2476                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2477 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2478                 "// llvm.nvvm.compiler.error()",
2479                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2482 // isspacep
2484 multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2485   def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2486               "isspacep." # suffix # "\t$d, $a;",
2487               [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2488     Requires<Preds>;
2489   def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2490               "isspacep." # suffix # "\t$d, $a;",
2491               [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2492     Requires<Preds>;
2495 defm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2496 defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2497 defm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
2498 defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2499 defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2500                                        int_nvvm_isspacep_shared_cluster,
2501                                        [hasPTX<78>, hasSM<90>]>;
2503 // Special register reads
2504 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2505                             (ins SpecialRegs:$r),
2506                             "mov.b32 \t$d, $r;", []>;
2508 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2509 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2510 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2511 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2512 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2513 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2514 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2515 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2516 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2517 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2518 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2519 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2520 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2521 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2522 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2523 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2524 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2525 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2526 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2527 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2528 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2529 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2530 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2531 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2532 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2533 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2534 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2535 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2536 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2537 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2538 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2539 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2542 // rotate builtin support
2544 def ROTATE_B32_HW_IMM
2545   : NVPTXInst<(outs Int32Regs:$dst),
2546               (ins  Int32Regs:$src, i32imm:$amt),
2547               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2548               [(set Int32Regs:$dst,
2549                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2550               Requires<[hasHWROT32]> ;
2552 def ROTATE_B32_HW_REG
2553   : NVPTXInst<(outs Int32Regs:$dst),
2554               (ins  Int32Regs:$src, Int32Regs:$amt),
2555               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2556               [(set Int32Regs:$dst,
2557                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2558               Requires<[hasHWROT32]> ;
2560 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2561           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2562       Requires<[noHWROT32]> ;
2564 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2565           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2566       Requires<[noHWROT32]> ;
2568 let hasSideEffects = false in {
2569   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2570     !strconcat("{{\n\t",
2571                ".reg .b32 %dummy;\n\t",
2572                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2573                "}}"),
2574           []> ;
2576   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2577     !strconcat("{{\n\t",
2578                ".reg .b32 %dummy;\n\t",
2579                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2580                "}}"),
2581           []> ;
2584 let hasSideEffects = false in {
2585   def PACK_TWO_INT32
2586     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2587                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2590 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2591           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2592                           (GET_LO_INT64 Int64Regs:$src))> ;
2594 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2595 // no side effects.
2596 let hasSideEffects = false in {
2597   def SHF_L_WRAP_B32_IMM
2598     : NVPTXInst<(outs Int32Regs:$dst),
2599                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2600                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2601       Requires<[hasHWROT32]>;
2603   def SHF_L_WRAP_B32_REG
2604     : NVPTXInst<(outs Int32Regs:$dst),
2605                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2606                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2607       Requires<[hasHWROT32]>;
2609   def SHF_R_WRAP_B32_IMM
2610     : NVPTXInst<(outs Int32Regs:$dst),
2611                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2612                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2613       Requires<[hasHWROT32]>;
2615   def SHF_R_WRAP_B32_REG
2616     : NVPTXInst<(outs Int32Regs:$dst),
2617                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2618                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2619       Requires<[hasHWROT32]>;
2622 // HW version of rotate 64
2623 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2624           (PACK_TWO_INT32
2625             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2626                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2627             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2628                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2629       Requires<[hasHWROT32]>;
2631 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2632           (PACK_TWO_INT32
2633             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2634                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2635             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2636                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2637       Requires<[hasHWROT32]>;
2640 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2641           (PACK_TWO_INT32
2642             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2643                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2644             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2645                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2646       Requires<[hasHWROT32]>;
2648 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2649           (PACK_TWO_INT32
2650             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2651                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2652             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2653                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2654       Requires<[hasHWROT32]>;
2656 // SW version of rotate 64
2657 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2658           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2659       Requires<[noHWROT32]>;
2660 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2661           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2662       Requires<[noHWROT32]>;
2663 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2664           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2665       Requires<[noHWROT32]>;
2666 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2667           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2668       Requires<[noHWROT32]>;
2671 //-----------------------------------
2672 // Texture Intrinsics
2673 //-----------------------------------
2675 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2676 // also defined in NVPTXReplaceImageHandles.cpp
2678 // texmode_independent
2679 let IsTex = true, IsTexModeUnified = false in {
2680 // Texture fetch instructions using handles
2682 class TEX_1D_base<string inst, NVPTXRegClass outtype,
2683                   NVPTXRegClass intype, dag texsamp>
2684     : NVPTXInst<(outs outtype:$r, outtype:$g,
2685                       outtype:$b, outtype:$a),
2686                  !con(texsamp, (ins intype:$x)),
2687                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2688                  []>;
2690 multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2691   def _RR : TEX_1D_base<inst, outtype, intype,
2692                         (ins Int64Regs:$t, Int64Regs:$s)>;
2693   def _RI : TEX_1D_base<inst, outtype, intype,
2694                         (ins Int64Regs:$t, i64imm:$s)>;
2695   def _IR : TEX_1D_base<inst, outtype, intype,
2696                         (ins i64imm:$t, Int64Regs:$s)>;
2697   def _II : TEX_1D_base<inst, outtype, intype,
2698                         (ins i64imm:$t, i64imm:$s)>;
2701 defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2702 defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2703 defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2704 defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2705 defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2706 defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2708 class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2709                         NVPTXRegClass intype, dag texsamp>
2710     : NVPTXInst<(outs outtype:$r, outtype:$g,
2711                       outtype:$b, outtype:$a),
2712                  !con(texsamp, (ins intype:$x, intype:$lod)),
2713                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2714                  []>;
2716 multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2717                         NVPTXRegClass intype> {
2718   def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2719                               (ins Int64Regs:$t, Int64Regs:$s)>;
2720   def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2721                               (ins Int64Regs:$t, i64imm:$s)>;
2722   def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2723                               (ins i64imm:$t, Int64Regs:$s)>;
2724   def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2725                               (ins i64imm:$t, i64imm:$s)>;
2728 defm TEX_1D_F32_F32_LEVEL :
2729   TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2730 defm TEX_1D_S32_F32_LEVEL :
2731   TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2732 defm TEX_1D_U32_F32_LEVEL :
2733   TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2735 class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2736                        NVPTXRegClass intype, dag texsamp>
2737     : NVPTXInst<(outs outtype:$r, outtype:$g,
2738                       outtype:$b, outtype:$a),
2739                  !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2740                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2741                         " \\{$gradx\\}, \\{$grady\\};",
2742                  []>;
2744 multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2745                        NVPTXRegClass intype> {
2746   def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2747                              (ins Int64Regs:$t, Int64Regs:$s)>;
2748   def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2749                              (ins Int64Regs:$t, i64imm:$s)>;
2750   def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2751                              (ins i64imm:$t, Int64Regs:$s)>;
2752   def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2753                              (ins i64imm:$t, i64imm:$s)>;
2756 defm TEX_1D_F32_F32_GRAD
2757   : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2758 defm TEX_1D_S32_F32_GRAD
2759   : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2760 defm TEX_1D_U32_F32_GRAD
2761   : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2763 class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2764                         NVPTXRegClass intype, dag texsamp>
2765     : NVPTXInst<(outs outtype:$r, outtype:$g,
2766                       outtype:$b, outtype:$a),
2767                  !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2768                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2769                  []>;
2771 multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2772                         NVPTXRegClass intype> {
2773   def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2774                               (ins Int64Regs:$t, Int64Regs:$s)>;
2775   def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2776                               (ins Int64Regs:$t, i64imm:$s)>;
2777   def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2778                               (ins i64imm:$t, Int64Regs:$s)>;
2779   def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2780                               (ins i64imm:$t, i64imm:$s)>;
2783 defm TEX_1D_ARRAY_F32_F32
2784   : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2785 defm TEX_1D_ARRAY_F32_S32
2786   : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2787 defm TEX_1D_ARRAY_S32_S32
2788   : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2789 defm TEX_1D_ARRAY_S32_F32
2790   : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2791 defm TEX_1D_ARRAY_U32_S32
2792   : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2793 defm TEX_1D_ARRAY_U32_F32
2794   : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2796 class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2797                               NVPTXRegClass intype, dag texsamp>
2798     : NVPTXInst<(outs outtype:$r, outtype:$g,
2799                       outtype:$b, outtype:$a),
2800                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2801                  inst # " \t\\{$r, $g, $b, $a\\},"
2802                         " [$t, $s, \\{$l, $x\\}], $lod;",
2803                  []>;
2805 multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2806                               NVPTXRegClass intype> {
2807   def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2808                                     (ins Int64Regs:$t, Int64Regs:$s)>;
2809   def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2810                                     (ins Int64Regs:$t, i64imm:$s)>;
2811   def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2812                                     (ins i64imm:$t, Int64Regs:$s)>;
2813   def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2814                                     (ins i64imm:$t, i64imm:$s)>;
2817 defm TEX_1D_ARRAY_F32_F32_LEVEL
2818   : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2819 defm TEX_1D_ARRAY_S32_F32_LEVEL
2820   : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2821 defm TEX_1D_ARRAY_U32_F32_LEVEL
2822   : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2824 class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2825                              NVPTXRegClass intype, dag texsamp>
2826     : NVPTXInst<(outs outtype:$r, outtype:$g,
2827                       outtype:$b, outtype:$a),
2828                  !con(texsamp, (ins Int32Regs:$l, intype:$x,
2829                                     intype:$gradx, intype:$grady)),
2830                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2831                         " \\{$gradx\\}, \\{$grady\\};",
2832                  []>;
2834 multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2835                              NVPTXRegClass intype> {
2836   def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2837                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2838   def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2839                                    (ins Int64Regs:$t, i64imm:$s)>;
2840   def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2841                                    (ins i64imm:$t, Int64Regs:$s)>;
2842   def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2843                                    (ins i64imm:$t, i64imm:$s)>;
2846 defm TEX_1D_ARRAY_F32_F32_GRAD
2847   : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2848 defm TEX_1D_ARRAY_S32_F32_GRAD
2849   : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2850 defm TEX_1D_ARRAY_U32_F32_GRAD
2851   : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2853 class TEX_2D_base<string inst, NVPTXRegClass outtype,
2854                   NVPTXRegClass intype, dag texsamp>
2855     : NVPTXInst<(outs outtype:$r, outtype:$g,
2856                       outtype:$b, outtype:$a),
2857                  !con(texsamp, (ins intype:$x, intype:$y)),
2858                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2859                  []>;
2861 multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2862   def _RR : TEX_2D_base<inst, outtype, intype,
2863                         (ins Int64Regs:$t, Int64Regs:$s)>;
2864   def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2865   def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2866   def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2869 defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2870 defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2871 defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2872 defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2873 defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2874 defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2876 class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2877                         NVPTXRegClass intype, dag texsamp>
2878     : NVPTXInst<(outs outtype:$r, outtype:$g,
2879                       outtype:$b, outtype:$a),
2880                  !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2881                  inst # " \t\\{$r, $g, $b, $a\\},"
2882                         " [$t, $s, \\{$x, $y\\}], $lod;",
2883                  []>;
2885 multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2886                         NVPTXRegClass intype> {
2887   def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2888                               (ins Int64Regs:$t, Int64Regs:$s)>;
2889   def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2890                               (ins Int64Regs:$t, i64imm:$s)>;
2891   def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2892                               (ins i64imm:$t, Int64Regs:$s)>;
2893   def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2894                               (ins i64imm:$t, i64imm:$s)>;
2897 defm TEX_2D_F32_F32_LEVEL :
2898   TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2899 defm TEX_2D_S32_F32_LEVEL :
2900   TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2901 defm TEX_2D_U32_F32_LEVEL :
2902   TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2904 class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2905                        NVPTXRegClass intype, dag texsamp>
2906     : NVPTXInst<(outs outtype:$r, outtype:$g,
2907                       outtype:$b, outtype:$a),
2908                  !con(texsamp, (ins intype:$x, intype:$y,
2909                                     intype:$gradx0, intype:$gradx1,
2910                                     intype:$grady0, intype:$grady1)),
2911                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2912                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2913                  []>;
2915 multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2916                        NVPTXRegClass intype> {
2917   def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2918                               (ins Int64Regs:$t, Int64Regs:$s)>;
2919   def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2920                               (ins Int64Regs:$t, i64imm:$s)>;
2921   def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2922                               (ins i64imm:$t, Int64Regs:$s)>;
2923   def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2924                               (ins i64imm:$t, i64imm:$s)>;
2927 defm TEX_2D_F32_F32_GRAD :
2928   TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2929 defm TEX_2D_S32_F32_GRAD :
2930   TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2931 defm TEX_2D_U32_F32_GRAD :
2932   TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2934 class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2935                         NVPTXRegClass intype, dag texsamp>
2936     : NVPTXInst<(outs outtype:$r, outtype:$g,
2937                       outtype:$b, outtype:$a),
2938                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2939                  inst # " \t\\{$r, $g, $b, $a\\},"
2940                         " [$t, $s, \\{$l, $x, $y, $y\\}];",
2941                  []>;
2943 multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2944                         NVPTXRegClass intype> {
2945   def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2946                               (ins Int64Regs:$t, Int64Regs:$s)>;
2947   def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2948                               (ins Int64Regs:$t, i64imm:$s)>;
2949   def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2950                               (ins i64imm:$t, Int64Regs:$s)>;
2951   def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2952                               (ins i64imm:$t, i64imm:$s)>;
2955 defm TEX_2D_ARRAY_F32_F32
2956   : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2957 defm TEX_2D_ARRAY_F32_S32
2958   : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2959 defm TEX_2D_ARRAY_S32_S32
2960   : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2961 defm TEX_2D_ARRAY_S32_F32
2962   : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2963 defm TEX_2D_ARRAY_U32_S32
2964   : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
2965 defm TEX_2D_ARRAY_U32_F32
2966   : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2968 class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2969                               NVPTXRegClass intype, dag texsamp>
2970     : NVPTXInst<(outs outtype:$r, outtype:$g,
2971                       outtype:$b, outtype:$a),
2972                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2973                                     intype:$lod)),
2974                  inst # " \t\\{$r, $g, $b, $a\\},"
2975                         " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2976                  []>;
2978 multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2979                               NVPTXRegClass intype> {
2980   def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2981                               (ins Int64Regs:$t, Int64Regs:$s)>;
2982   def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2983                               (ins Int64Regs:$t, i64imm:$s)>;
2984   def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2985                               (ins i64imm:$t, Int64Regs:$s)>;
2986   def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2987                               (ins i64imm:$t, i64imm:$s)>;
2990 defm TEX_2D_ARRAY_F32_F32_LEVEL
2991   : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2992 defm TEX_2D_ARRAY_S32_F32_LEVEL
2993   : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2994 defm TEX_2D_ARRAY_U32_F32_LEVEL
2995   : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2997 class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2998                              NVPTXRegClass intype, dag texsamp>
2999     : NVPTXInst<(outs outtype:$r, outtype:$g,
3000                       outtype:$b, outtype:$a),
3001                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3002                                     intype:$gradx0, intype:$gradx1,
3003                                     intype:$grady0, intype:$grady1)),
3004                  inst # " \t\\{$r, $g, $b, $a\\},"
3005                         " [$t, $s, \\{$l, $x, $y, $y\\}],"
3006                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3007                  []>;
3009 multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3010                              NVPTXRegClass intype> {
3011   def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3012                               (ins Int64Regs:$t, Int64Regs:$s)>;
3013   def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3014                               (ins Int64Regs:$t, i64imm:$s)>;
3015   def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3016                               (ins i64imm:$t, Int64Regs:$s)>;
3017   def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3018                               (ins i64imm:$t, i64imm:$s)>;
3021 defm TEX_2D_ARRAY_F32_F32_GRAD
3022   : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3023 defm TEX_2D_ARRAY_S32_F32_GRAD
3024   : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3025 defm TEX_2D_ARRAY_U32_F32_GRAD
3026   : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3028 class TEX_3D_base<string inst, NVPTXRegClass outtype,
3029                   NVPTXRegClass intype, dag texsamp>
3030     : NVPTXInst<(outs outtype:$r, outtype:$g,
3031                       outtype:$b, outtype:$a),
3032                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3033                  inst # " \t\\{$r, $g, $b, $a\\},"
3034                         " [$t, $s, \\{$x, $y, $z, $z\\}];",
3035                  []>;
3037 multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3038   def _RR : TEX_3D_base<inst, outtype, intype,
3039                               (ins Int64Regs:$t, Int64Regs:$s)>;
3040   def _RI : TEX_3D_base<inst, outtype, intype,
3041                               (ins Int64Regs:$t, i64imm:$s)>;
3042   def _IR : TEX_3D_base<inst, outtype, intype,
3043                               (ins i64imm:$t, Int64Regs:$s)>;
3044   def _II : TEX_3D_base<inst, outtype, intype,
3045                               (ins i64imm:$t, i64imm:$s)>;
3048 defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3049 defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3050 defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3051 defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3052 defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3053 defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3055 class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3056                         NVPTXRegClass intype, dag texsamp>
3057     : NVPTXInst<(outs outtype:$r, outtype:$g,
3058                       outtype:$b, outtype:$a),
3059                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3060                                     intype:$lod)),
3061                  inst # " \t\\{$r, $g, $b, $a\\},"
3062                         " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3063                  []>;
3065 multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3066                         NVPTXRegClass intype> {
3067   def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3068                               (ins Int64Regs:$t, Int64Regs:$s)>;
3069   def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3070                               (ins Int64Regs:$t, i64imm:$s)>;
3071   def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3072                               (ins i64imm:$t, Int64Regs:$s)>;
3073   def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3074                               (ins i64imm:$t, i64imm:$s)>;
3077 defm TEX_3D_F32_F32_LEVEL
3078   : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3079 defm TEX_3D_S32_F32_LEVEL
3080   : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3081 defm TEX_3D_U32_F32_LEVEL
3082   : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3084 class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3085                        NVPTXRegClass intype, dag texsamp>
3086     : NVPTXInst<(outs outtype:$r, outtype:$g,
3087                       outtype:$b, outtype:$a),
3088                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3089                                     intype :$gradx0, intype:$gradx1,
3090                                     intype:$gradx2, intype:$grady0,
3091                                     intype:$grady1, intype:$grady2)),
3092                  inst # " \t\\{$r, $g, $b, $a\\},"
3093                         " [$t, $s, \\{$x, $y, $z, $z\\}],"
3094                         " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3095                         " \\{$grady0, $grady1, $grady2, $grady2\\};",
3096                  []>;
3098 multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3099                        NVPTXRegClass intype> {
3100   def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3101                              (ins Int64Regs:$t, Int64Regs:$s)>;
3102   def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3103                              (ins Int64Regs:$t, i64imm:$s)>;
3104   def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3105                              (ins i64imm:$t, Int64Regs:$s)>;
3106   def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3107                              (ins i64imm:$t, i64imm:$s)>;
3110 defm TEX_3D_F32_F32_GRAD
3111   : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3112 defm TEX_3D_S32_F32_GRAD
3113   : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3114 defm TEX_3D_U32_F32_GRAD
3115   : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3117 class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3118                     NVPTXRegClass intype, dag texsamp>
3119     : NVPTXInst<(outs outtype:$r, outtype:$g,
3120                       outtype:$b, outtype:$a),
3121                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3122                  inst # " \t\\{$r, $g, $b, $a\\},"
3123                         " [$t, $s, \\{$x, $y, $z, $z\\}];",
3124                  []>;
3126 multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3127   def _RR : TEX_CUBE_base<inst, outtype, intype,
3128                           (ins Int64Regs:$t, Int64Regs:$s)>;
3129   def _RI : TEX_CUBE_base<inst, outtype, intype,
3130                           (ins Int64Regs:$t, i64imm:$s)>;
3131   def _IR : TEX_CUBE_base<inst, outtype, intype,
3132                           (ins i64imm:$t, Int64Regs:$s)>;
3133   def _II : TEX_CUBE_base<inst, outtype, intype,
3134                           (ins i64imm:$t, i64imm:$s)>;
3137 defm TEX_CUBE_F32_F32
3138   : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3139 defm TEX_CUBE_S32_F32
3140   : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3141 defm TEX_CUBE_U32_F32
3142   : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3144 class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3145                           NVPTXRegClass intype, dag texsamp>
3146     : NVPTXInst<(outs outtype:$r, outtype:$g,
3147                       outtype:$b, outtype:$a),
3148                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3149                                     intype:$lod)),
3150                  inst # " \t\\{$r, $g, $b, $a\\},"
3151                         " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3152                  []>;
3154 multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3155                           NVPTXRegClass intype> {
3156   def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3157                                 (ins Int64Regs:$t, Int64Regs:$s)>;
3158   def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3159                                 (ins Int64Regs:$t, i64imm:$s)>;
3160   def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3161                                 (ins i64imm:$t, Int64Regs:$s)>;
3162   def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3163                                 (ins i64imm:$t, i64imm:$s)>;
3166 defm TEX_CUBE_F32_F32_LEVEL
3167   : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3168 defm TEX_CUBE_S32_F32_LEVEL
3169   : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3170 defm TEX_CUBE_U32_F32_LEVEL
3171   : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3173 class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3174                           NVPTXRegClass intype, dag texsamp>
3175     : NVPTXInst<(outs outtype:$r, outtype:$g,
3176                       outtype:$b, outtype:$a),
3177                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3178                                     intype:$z)),
3179                  inst # " \t\\{$r, $g, $b, $a\\},"
3180                         " [$t, $s, \\{$l, $x, $y, $z\\}];",
3181                  []>;
3183 multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3184                           NVPTXRegClass intype> {
3185   def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3186                                 (ins Int64Regs:$t, Int64Regs:$s)>;
3187   def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3188                                 (ins Int64Regs:$t, i64imm:$s)>;
3189   def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3190                                 (ins i64imm:$t, Int64Regs:$s)>;
3191   def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3192                                 (ins i64imm:$t, i64imm:$s)>;
3195 defm TEX_CUBE_ARRAY_F32_F32
3196   : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3197 defm TEX_CUBE_ARRAY_S32_F32
3198   : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3199 defm TEX_CUBE_ARRAY_U32_F32
3200   : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3202 class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3203                                 NVPTXRegClass intype, dag texsamp>
3204     : NVPTXInst<(outs outtype:$r, outtype:$g,
3205                       outtype:$b, outtype:$a),
3206                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3207                                     intype:$z, intype:$lod)),
3208                  inst # " \t\\{$r, $g, $b, $a\\},"
3209                         " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3210                  []>;
3212 multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3213                                 NVPTXRegClass intype> {
3214   def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3215                                       (ins Int64Regs:$t, Int64Regs:$s)>;
3216   def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3217                                       (ins Int64Regs:$t, i64imm:$s)>;
3218   def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3219                                       (ins i64imm:$t, Int64Regs:$s)>;
3220   def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3221                                       (ins i64imm:$t, i64imm:$s)>;
3224 defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3225   : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3226                          Float32Regs, Float32Regs>;
3227 defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3228   : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3229                          Int32Regs, Float32Regs>;
3230 defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3231   : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3232                          Int32Regs, Float32Regs>;
3234 class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3235                    NVPTXRegClass intype, dag texsamp>
3236     : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3237                       outtype:$v2, outtype:$v3),
3238                  !con(texsamp, (ins intype:$x, intype:$y)),
3239                  inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3240                  []>;
3242 multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3243   def _RR : TLD4_2D_base<inst, outtype, intype,
3244                          (ins Int64Regs:$t, Int64Regs:$s)>;
3245   def _RI : TLD4_2D_base<inst, outtype, intype,
3246                          (ins Int64Regs:$t, i64imm:$s)>;
3247   def _IR : TLD4_2D_base<inst, outtype, intype,
3248                          (ins i64imm:$t, Int64Regs:$s)>;
3249   def _II : TLD4_2D_base<inst, outtype, intype,
3250                          (ins i64imm:$t, i64imm:$s)>;
3253 defm TLD4_R_2D_F32_F32
3254   : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3255 defm TLD4_G_2D_F32_F32
3256   : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3257 defm TLD4_B_2D_F32_F32
3258   : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3259 defm TLD4_A_2D_F32_F32
3260   : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3262 defm TLD4_R_2D_S32_F32
3263   : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3264 defm TLD4_G_2D_S32_F32
3265   : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3266 defm TLD4_B_2D_S32_F32
3267   : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3268 defm TLD4_A_2D_S32_F32
3269   : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3271 defm TLD4_R_2D_U32_F32
3272   : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3273 defm TLD4_G_2D_U32_F32
3274   : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3275 defm TLD4_B_2D_U32_F32
3276   : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3277 defm TLD4_A_2D_U32_F32
3278   : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3283 // texmode_unified
3284 let IsTex = true, IsTexModeUnified = true in {
3285 // Texture fetch instructions using handles
3287 class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3288                           NVPTXRegClass intype, dag tex>
3289     : NVPTXInst<(outs outtype:$r, outtype:$g,
3290                       outtype:$b, outtype:$a),
3291                  !con(tex, (ins intype:$x)),
3292                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3293                  []>;
3295 multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3296                           NVPTXRegClass intype> {
3297   def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3298   def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3301 defm TEX_UNIFIED_1D_F32_S32
3302   : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3303 defm TEX_UNIFIED_1D_F32_F32
3304   : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3305 defm TEX_UNIFIED_1D_S32_S32
3306   : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3307 defm TEX_UNIFIED_1D_S32_F32
3308   : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3309 defm TEX_UNIFIED_1D_U32_S32
3310   : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3311 defm TEX_UNIFIED_1D_U32_F32
3312   : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3314 class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3315                                 NVPTXRegClass intype, dag tex>
3316     : NVPTXInst<(outs outtype:$r, outtype:$g,
3317                       outtype:$b, outtype:$a),
3318                  !con(tex, (ins intype:$x, intype:$lod)),
3319                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3320                  []>;
3322 multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3323                                 NVPTXRegClass intype> {
3324   def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3325   def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3328 defm TEX_UNIFIED_1D_F32_F32_LEVEL
3329   : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3330 defm TEX_UNIFIED_1D_S32_F32_LEVEL
3331   : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3332 defm TEX_UNIFIED_1D_U32_F32_LEVEL
3333   : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3335 class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3336                                NVPTXRegClass intype, dag tex>
3337     : NVPTXInst<(outs outtype:$r, outtype:$g,
3338                       outtype:$b, outtype:$a),
3339                  !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3340                  inst # " \t\\{$r, $g, $b, $a\\},"
3341                         " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3342                  []>;
3344 multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3345                                NVPTXRegClass intype> {
3346   def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3347   def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3350 defm TEX_UNIFIED_1D_F32_F32_GRAD
3351   : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3352 defm TEX_UNIFIED_1D_S32_F32_GRAD
3353   : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3354 defm TEX_UNIFIED_1D_U32_F32_GRAD
3355   : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3357 class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3358                                 NVPTXRegClass intype, dag tex>
3359     : NVPTXInst<(outs outtype:$r, outtype:$g,
3360                       outtype:$b, outtype:$a),
3361                  !con(tex, (ins Int32Regs:$l, intype:$x)),
3362                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3363                  []>;
3365 multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3366                                 NVPTXRegClass intype> {
3367   def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3368   def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3371 defm TEX_UNIFIED_1D_ARRAY_F32_S32
3372   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3373 defm TEX_UNIFIED_1D_ARRAY_F32_F32
3374   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3375 defm TEX_UNIFIED_1D_ARRAY_S32_S32
3376   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3377 defm TEX_UNIFIED_1D_ARRAY_S32_F32
3378   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3379 defm TEX_UNIFIED_1D_ARRAY_U32_S32
3380   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3381 defm TEX_UNIFIED_1D_ARRAY_U32_F32
3382   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3384 class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3385                                       NVPTXRegClass intype, dag tex>
3386     : NVPTXInst<(outs outtype:$r, outtype:$g,
3387                       outtype:$b, outtype:$a),
3388                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3389                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3390                  []>;
3392 multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3393                                       NVPTXRegClass intype> {
3394   def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3395                                            (ins Int64Regs:$t)>;
3396   def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3397                                            (ins i64imm:$t)>;
3400 defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3401   : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3402                                Float32Regs, Float32Regs>;
3403 defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3404   : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3405                                Int32Regs, Float32Regs>;
3406 defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3407   : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3408                                Int32Regs, Float32Regs>;
3410 class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3411                                      NVPTXRegClass intype, dag tex>
3412     : NVPTXInst<(outs outtype:$r, outtype:$g,
3413                       outtype:$b, outtype:$a),
3414                  !con(tex, (ins Int32Regs:$l, intype:$x,
3415                                 intype:$gradx, intype:$grady)),
3416                  inst # " \t\\{$r, $g, $b, $a\\},"
3417                         "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3418                  []>;
3420 multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3421                                      NVPTXRegClass intype> {
3422   def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3423                                           (ins Int64Regs:$t)>;
3424   def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3425                                           (ins i64imm:$t)>;
3428 defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3429   : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3430                               Float32Regs, Float32Regs>;
3431 defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3432   : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3433                               Int32Regs, Float32Regs>;
3434 defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3435   : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3436                               Int32Regs, Float32Regs>;
3438 class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3439                           NVPTXRegClass intype, dag tex>
3440     : NVPTXInst<(outs outtype:$r, outtype:$g,
3441                       outtype:$b, outtype:$a),
3442                  !con(tex, (ins intype:$x, intype:$y)),
3443                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3444                  []>;
3446 multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3447                           NVPTXRegClass intype> {
3448   def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3449   def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3452 defm TEX_UNIFIED_2D_F32_S32
3453   : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3454 defm TEX_UNIFIED_2D_F32_F32
3455   : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3456 defm TEX_UNIFIED_2D_S32_S32
3457   : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3458 defm TEX_UNIFIED_2D_S32_F32
3459   : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3460 defm TEX_UNIFIED_2D_U32_S32
3461   : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3462 defm TEX_UNIFIED_2D_U32_F32
3463   : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3465 class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3466                                 NVPTXRegClass intype, dag tex>
3467     : NVPTXInst<(outs outtype:$r, outtype:$g,
3468                       outtype:$b, outtype:$a),
3469                  !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3470                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3471                  []>;
3473 multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3474                                 NVPTXRegClass intype> {
3475   def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3476   def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3479 defm TEX_UNIFIED_2D_F32_F32_LEVEL
3480   : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3481 defm TEX_UNIFIED_2D_S32_F32_LEVEL
3482   : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3483 defm TEX_UNIFIED_2D_U32_F32_LEVEL
3484   : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3486 class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3487                                NVPTXRegClass intype, dag tex>
3488     : NVPTXInst<(outs outtype:$r, outtype:$g,
3489                       outtype:$b, outtype:$a),
3490                  !con(tex, (ins intype:$x, intype:$y,
3491                                 intype:$gradx0, intype:$gradx1,
3492                                 intype:$grady0, intype:$grady1)),
3493                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3494                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3495                  []>;
3496 multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3497                                NVPTXRegClass intype> {
3498   def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3499   def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3502 defm TEX_UNIFIED_2D_F32_F32_GRAD
3503   : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3504 defm TEX_UNIFIED_2D_S32_F32_GRAD
3505   : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3506 defm TEX_UNIFIED_2D_U32_F32_GRAD
3507   : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3509 class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3510                                 NVPTXRegClass intype, dag tex>
3511     : NVPTXInst<(outs outtype:$r, outtype:$g,
3512                       outtype:$b, outtype:$a),
3513                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3514                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3515                  []>;
3516 multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3517                                 NVPTXRegClass intype> {
3518   def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3519   def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3522 defm TEX_UNIFIED_2D_ARRAY_F32_S32
3523   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3524 defm TEX_UNIFIED_2D_ARRAY_F32_F32
3525   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3526 defm TEX_UNIFIED_2D_ARRAY_S32_S32
3527   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3528 defm TEX_UNIFIED_2D_ARRAY_S32_F32
3529   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3530 defm TEX_UNIFIED_2D_ARRAY_U32_S32
3531   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3532 defm TEX_UNIFIED_2D_ARRAY_U32_F32
3533   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3535 class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3536                                       NVPTXRegClass intype, dag tex>
3537     : NVPTXInst<(outs outtype:$r, outtype:$g,
3538                       outtype:$b, outtype:$a),
3539                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3540                                 intype:$lod)),
3541                  inst # " \t\\{$r, $g, $b, $a\\},"
3542                         "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3543                  []>;
3544 multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3545                                       NVPTXRegClass intype> {
3546   def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3547                                            (ins Int64Regs:$t)>;
3548   def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3549                                            (ins i64imm:$t)>;
3552 defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3553   : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3554                                Float32Regs, Float32Regs>;
3555 defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3556   : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3557                                Int32Regs, Float32Regs>;
3558 defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3559   : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3560                                Int32Regs, Float32Regs>;
3562 class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3563                                      NVPTXRegClass intype, dag tex>
3564     : NVPTXInst<(outs outtype:$r, outtype:$g,
3565                       outtype:$b, outtype:$a),
3566                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3567                                 intype:$gradx0, intype:$gradx1,
3568                                 intype:$grady0, intype:$grady1)),
3569                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3570                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3571                  []>;
3572 multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3573                                      NVPTXRegClass intype> {
3574   def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3575                                           (ins Int64Regs:$t)>;
3576   def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3577                                           (ins i64imm:$t)>;
3580 defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3581   : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3582                               Float32Regs, Float32Regs>;
3583 defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3584   : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3585                               Int32Regs, Float32Regs>;
3586 defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3587   : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3588                               Int32Regs, Float32Regs>;
3590 class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3591                           NVPTXRegClass intype, dag tex>
3592     : NVPTXInst<(outs outtype:$r, outtype:$g,
3593                       outtype:$b, outtype:$a),
3594                  !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3595                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3596                  []>;
3597 multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3598                           NVPTXRegClass intype> {
3599   def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3600   def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3603 defm TEX_UNIFIED_3D_F32_S32
3604   : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3605 defm TEX_UNIFIED_3D_F32_F32
3606   : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3607 defm TEX_UNIFIED_3D_S32_S32
3608   : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3609 defm TEX_UNIFIED_3D_S32_F32
3610   : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3611 defm TEX_UNIFIED_3D_U32_S32
3612   : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3613 defm TEX_UNIFIED_3D_U32_F32
3614   : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3616 class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3617                                 NVPTXRegClass intype, dag tex>
3618     : NVPTXInst<(outs outtype:$r, outtype:$g,
3619                       outtype:$b, outtype:$a),
3620                  !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3621                  inst # " \t\\{$r, $g, $b, $a\\},"
3622                         " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3623                  []>;
3624 multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3625                                 NVPTXRegClass intype> {
3626   def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3627   def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3630 defm TEX_UNIFIED_3D_F32_F32_LEVEL
3631   : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3632 defm TEX_UNIFIED_3D_S32_F32_LEVEL
3633   : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3634 defm TEX_UNIFIED_3D_U32_F32_LEVEL
3635   : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3637 class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3638                                NVPTXRegClass intype, dag tex>
3639     : NVPTXInst<(outs outtype:$r, outtype:$g,
3640                       outtype:$b, outtype:$a),
3641                  !con(tex, (ins intype:$x, intype:$y, intype:$z,
3642                                 intype:$gradx0, intype:$gradx1,
3643                                 intype:$gradx2, intype:$grady0,
3644                                 intype:$grady1, intype:$grady2)),
3645                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3646                         " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3647                         " \\{$grady0, $grady1, $grady2, $grady2\\};",
3648                  []>;
3649 multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3650                                NVPTXRegClass intype> {
3651   def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3652   def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3655 defm TEX_UNIFIED_3D_F32_F32_GRAD
3656   : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3657 defm TEX_UNIFIED_3D_S32_F32_GRAD
3658   : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3659 defm TEX_UNIFIED_3D_U32_F32_GRAD
3660   : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3662 class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3663                             NVPTXRegClass intype, dag tex>
3664     : NVPTXInst<(outs outtype:$r, outtype:$g,
3665                       outtype:$b, outtype:$a),
3666                  !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3667                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3668                  []>;
3669 multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3670                             NVPTXRegClass intype> {
3671   def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3672   def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3675 defm TEX_UNIFIED_CUBE_F32_F32
3676   : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3677 defm TEX_UNIFIED_CUBE_S32_F32
3678   : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3679 defm TEX_UNIFIED_CUBE_U32_F32
3680   : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3682 class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3683                                   NVPTXRegClass intype, dag tex>
3684     : NVPTXInst<(outs outtype:$r, outtype:$g,
3685                       outtype:$b, outtype:$a),
3686                  !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3687                  inst # " \t\\{$r, $g, $b, $a\\},"
3688                         " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3689                  []>;
3690 multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3691                                   NVPTXRegClass intype> {
3692   def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3693                                        (ins Int64Regs:$t)>;
3694   def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3695                                        (ins i64imm:$t)>;
3698 defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3699   : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3700                            Float32Regs, Float32Regs>;
3701 defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3702   : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3703                            Int32Regs, Float32Regs>;
3704 defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3705   : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3706                            Int32Regs, Float32Regs>;
3708 class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3709                                   NVPTXRegClass intype, dag tex>
3710     : NVPTXInst<(outs outtype:$r, outtype:$g,
3711                       outtype:$b, outtype:$a),
3712                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3713                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3714                  []>;
3715 multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3716                                   NVPTXRegClass intype> {
3717   def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3718                                        (ins Int64Regs:$t)>;
3719   def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3720                                        (ins i64imm:$t)>;
3723 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3724   : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3725 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3726   : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3727 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3728   : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3730 class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3731                                         NVPTXRegClass intype, dag tex>
3732     : NVPTXInst<(outs outtype:$r, outtype:$g,
3733                       outtype:$b, outtype:$a),
3734                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3735                                 intype:$lod)),
3736                  inst # " \t\\{$r, $g, $b, $a\\},"
3737                         " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3738                  []>;
3739 multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3740                                         NVPTXRegClass intype> {
3741   def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3742                                              (ins Int64Regs:$t)>;
3743   def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3744                                              (ins i64imm:$t)>;
3747 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3748   : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3749                                  Float32Regs, Float32Regs>;
3750 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3751   : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3752                                  Int32Regs, Float32Regs>;
3753 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3754   : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3755                                  Int32Regs, Float32Regs>;
3757 class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3758                            NVPTXRegClass intype, dag tex>
3759     : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3760                       outtype:$v2, outtype:$v3),
3761                  !con(tex, (ins intype:$x, intype:$y)),
3762                  inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3763                  []>;
3764 multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3765                            NVPTXRegClass intype> {
3766   def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3767   def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3770 defm TLD4_UNIFIED_R_2D_F32_F32
3771   : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3772 defm TLD4_UNIFIED_G_2D_F32_F32
3773   : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3774 defm TLD4_UNIFIED_B_2D_F32_F32
3775   : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3776 defm TLD4_UNIFIED_A_2D_F32_F32
3777   : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3779 defm TLD4_UNIFIED_R_2D_S32_F32
3780   : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3781 defm TLD4_UNIFIED_G_2D_S32_F32
3782   : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3783 defm TLD4_UNIFIED_B_2D_S32_F32
3784   : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3785 defm TLD4_UNIFIED_A_2D_S32_F32
3786   : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3788 defm TLD4_UNIFIED_R_2D_U32_F32
3789   : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3790 defm TLD4_UNIFIED_G_2D_U32_F32
3791   : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3792 defm TLD4_UNIFIED_B_2D_U32_F32
3793   : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3794 defm TLD4_UNIFIED_A_2D_U32_F32
3795   : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3801 //=== Surface load instructions
3803 let IsSuld = true in {
3805 class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3806     : NVPTXInst<(outs outtype:$r),
3807                 !con(surf, (ins Int32Regs:$x)),
3808                 inst # " \\{$r\\}, [$s, \\{$x\\}];",
3809                 []>;
3810 multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3811   def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3812   def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3815 defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3816 defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3817 defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3818 defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3820 defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3821 defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3822 defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3823 defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3825 defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3826 defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3827 defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3828 defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3830 class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3831     : NVPTXInst<(outs outtype:$r),
3832                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3833                 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3834                 []>;
3835 multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3836   def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3837   def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3840 defm SULD_1D_ARRAY_I8_CLAMP
3841   : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3842 defm SULD_1D_ARRAY_I16_CLAMP
3843   : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3844 defm SULD_1D_ARRAY_I32_CLAMP
3845   : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3846 defm SULD_1D_ARRAY_I64_CLAMP
3847   : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3849 defm SULD_1D_ARRAY_I8_TRAP
3850   : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3851 defm SULD_1D_ARRAY_I16_TRAP
3852   : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3853 defm SULD_1D_ARRAY_I32_TRAP
3854   : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3855 defm SULD_1D_ARRAY_I64_TRAP
3856   : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3858 defm SULD_1D_ARRAY_I8_ZERO
3859   : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3860 defm SULD_1D_ARRAY_I16_ZERO
3861   : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3862 defm SULD_1D_ARRAY_I32_ZERO
3863   : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3864 defm SULD_1D_ARRAY_I64_ZERO
3865   : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3867 class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3868     : NVPTXInst<(outs outtype:$r),
3869                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3870                 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3871                 []>;
3872 multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3873   def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3874   def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3877 defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3878 defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3879 defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3880 defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3882 defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3883 defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3884 defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3885 defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3887 defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3888 defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3889 defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3890 defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3892 class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3893     : NVPTXInst<(outs outtype:$r),
3894                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3895                 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3896                 []>;
3897 multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3898   def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3899   def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3902 defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3903 defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3904 defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3905 defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
3907 defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
3908 defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
3909 defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
3910 defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
3912 defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
3913 defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
3914 defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
3915 defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
3917 class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
3918     : NVPTXInst<(outs outtype:$r),
3919                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3920                 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3921                 []>;
3922 multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
3923   def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
3924   def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
3927 defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
3928 defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
3929 defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
3930 defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
3932 defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
3933 defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
3934 defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
3935 defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
3937 defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
3938 defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
3939 defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
3940 defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
3943 let IsSuld = 2 in {
3945 class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3946     : NVPTXInst<(outs outtype:$r, outtype:$g),
3947                 !con(surf, (ins Int32Regs:$x)),
3948                 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
3949                 []>;
3950 multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
3951   def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3952   def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
3955 defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
3956 defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
3957 defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
3958 defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
3960 defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
3961 defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
3962 defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
3963 defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
3965 defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
3966 defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
3967 defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
3968 defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
3970 class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3971     : NVPTXInst<(outs outtype:$r, outtype:$g),
3972                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3973                 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3974                 []>;
3975 multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3976   def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3977   def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3980 defm SULD_1D_ARRAY_V2I8_CLAMP
3981   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
3982 defm SULD_1D_ARRAY_V2I16_CLAMP
3983   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
3984 defm SULD_1D_ARRAY_V2I32_CLAMP
3985   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
3986 defm SULD_1D_ARRAY_V2I64_CLAMP
3987   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
3989 defm SULD_1D_ARRAY_V2I8_TRAP
3990   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
3991 defm SULD_1D_ARRAY_V2I16_TRAP
3992   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
3993 defm SULD_1D_ARRAY_V2I32_TRAP
3994   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
3995 defm SULD_1D_ARRAY_V2I64_TRAP
3996   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
3998 defm SULD_1D_ARRAY_V2I8_ZERO
3999   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4000 defm SULD_1D_ARRAY_V2I16_ZERO
4001   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4002 defm SULD_1D_ARRAY_V2I32_ZERO
4003   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4004 defm SULD_1D_ARRAY_V2I64_ZERO
4005   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4007 class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4008     : NVPTXInst<(outs outtype:$r, outtype:$g),
4009                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4010                 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4011                 []>;
4012 multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4013   def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4014   def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4017 defm SULD_2D_V2I8_CLAMP
4018   : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4019 defm SULD_2D_V2I16_CLAMP
4020   : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4021 defm SULD_2D_V2I32_CLAMP
4022   : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4023 defm SULD_2D_V2I64_CLAMP
4024   : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4026 defm SULD_2D_V2I8_TRAP
4027   : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4028 defm SULD_2D_V2I16_TRAP
4029   : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4030 defm SULD_2D_V2I32_TRAP
4031   : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4032 defm SULD_2D_V2I64_TRAP
4033   : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4035 defm SULD_2D_V2I8_ZERO
4036   : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4037 defm SULD_2D_V2I16_ZERO
4038   : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4039 defm SULD_2D_V2I32_ZERO
4040   : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4041 defm SULD_2D_V2I64_ZERO
4042   : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4044 class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4045     : NVPTXInst<(outs outtype:$r, outtype:$g),
4046                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4047                 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4048                 []>;
4049 multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4050   def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4051   def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4054 defm SULD_2D_ARRAY_V2I8_CLAMP
4055   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4056 defm SULD_2D_ARRAY_V2I16_CLAMP
4057   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4058 defm SULD_2D_ARRAY_V2I32_CLAMP
4059   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4060 defm SULD_2D_ARRAY_V2I64_CLAMP
4061   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4063 defm SULD_2D_ARRAY_V2I8_TRAP
4064   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4065 defm SULD_2D_ARRAY_V2I16_TRAP
4066   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4067 defm SULD_2D_ARRAY_V2I32_TRAP
4068   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4069 defm SULD_2D_ARRAY_V2I64_TRAP
4070   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4072 defm SULD_2D_ARRAY_V2I8_ZERO
4073   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4074 defm SULD_2D_ARRAY_V2I16_ZERO
4075   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4076 defm SULD_2D_ARRAY_V2I32_ZERO
4077   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4078 defm SULD_2D_ARRAY_V2I64_ZERO
4079   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4081 class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4082     : NVPTXInst<(outs outtype:$r, outtype:$g),
4083                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4084                 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4085                 []>;
4086 multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4087   def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4088   def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4091 defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4092 defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4093 defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4094 defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4096 defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4097 defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4098 defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4099 defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4101 defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4102 defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4103 defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4104 defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4108 let IsSuld = 3 in {
4110 class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4111     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4112                 !con(surf, (ins Int32Regs:$x)),
4113                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4114                 []>;
4115 multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4116   def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4117   def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4120 defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4121 defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4122 defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4124 defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4125 defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4126 defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4128 defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4129 defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4130 defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4132 class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4133     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4134                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4135                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4136                 []>;
4137 multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4138   def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4139   def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4142 defm SULD_1D_ARRAY_V4I8_CLAMP
4143   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4144 defm SULD_1D_ARRAY_V4I16_CLAMP
4145   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4146 defm SULD_1D_ARRAY_V4I32_CLAMP
4147   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4149 defm SULD_1D_ARRAY_V4I8_TRAP
4150   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4151 defm SULD_1D_ARRAY_V4I16_TRAP
4152   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4153 defm SULD_1D_ARRAY_V4I32_TRAP
4154   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4156 defm SULD_1D_ARRAY_V4I8_ZERO
4157   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4158 defm SULD_1D_ARRAY_V4I16_ZERO
4159   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4160 defm SULD_1D_ARRAY_V4I32_ZERO
4161   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4163 class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4164     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4165                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4166                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4167                 []>;
4168 multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4169   def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4170   def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4173 defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4174 defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4175 defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4177 defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4178 defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4179 defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4181 defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4182 defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4183 defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4185 class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4186     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4187                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4188                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4189                 []>;
4190 multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4191   def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4192   def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4195 defm SULD_2D_ARRAY_V4I8_CLAMP
4196   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4197 defm SULD_2D_ARRAY_V4I16_CLAMP
4198   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4199 defm SULD_2D_ARRAY_V4I32_CLAMP
4200   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4202 defm SULD_2D_ARRAY_V4I8_TRAP
4203   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4204 defm SULD_2D_ARRAY_V4I16_TRAP
4205   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4206 defm SULD_2D_ARRAY_V4I32_TRAP
4207   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4209 defm SULD_2D_ARRAY_V4I8_ZERO
4210   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4211 defm SULD_2D_ARRAY_V4I16_ZERO
4212   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4213 defm SULD_2D_ARRAY_V4I32_ZERO
4214   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4216 class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4217     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4218                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4219                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4220                 []>;
4221 multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4222   def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4223   def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4226 defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4227 defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4228 defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4230 defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4231 defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4232 defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4234 defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4235 defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4236 defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4240 //-----------------------------------
4241 // Texture Query Intrinsics
4242 //-----------------------------------
4244 let IsSurfTexQuery = true in {
4245 def TXQ_CHANNEL_ORDER_R
4246   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4247               "txq.channel_order.b32 \t$d, [$a];",
4248               []>;
4249 def TXQ_CHANNEL_ORDER_I
4250   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4251               "txq.channel_order.b32 \t$d, [$a];",
4252               []>;
4253 def TXQ_CHANNEL_DATA_TYPE_R
4254   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4255               "txq.channel_data_type.b32 \t$d, [$a];",
4256               []>;
4257 def TXQ_CHANNEL_DATA_TYPE_I
4258   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4259               "txq.channel_data_type.b32 \t$d, [$a];",
4260               []>;
4261 def TXQ_WIDTH_R
4262   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4263               "txq.width.b32 \t$d, [$a];",
4264               []>;
4265 def TXQ_WIDTH_I
4266   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4267               "txq.width.b32 \t$d, [$a];",
4268               []>;
4269 def TXQ_HEIGHT_R
4270   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4271               "txq.height.b32 \t$d, [$a];",
4272               []>;
4273 def TXQ_HEIGHT_I
4274   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4275               "txq.height.b32 \t$d, [$a];",
4276               []>;
4277 def TXQ_DEPTH_R
4278   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4279               "txq.depth.b32 \t$d, [$a];",
4280               []>;
4281 def TXQ_DEPTH_I
4282   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4283               "txq.depth.b32 \t$d, [$a];",
4284               []>;
4285 def TXQ_ARRAY_SIZE_R
4286   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4287               "txq.array_size.b32 \t$d, [$a];",
4288               []>;
4289 def TXQ_ARRAY_SIZE_I
4290   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4291               "txq.array_size.b32 \t$d, [$a];",
4292               []>;
4293 def TXQ_NUM_SAMPLES_R
4294   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4295               "txq.num_samples.b32 \t$d, [$a];",
4296               []>;
4297 def TXQ_NUM_SAMPLES_I
4298   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4299               "txq.num_samples.b32 \t$d, [$a];",
4300               []>;
4301 def TXQ_NUM_MIPMAP_LEVELS_R
4302   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4303               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4304               []>;
4305 def TXQ_NUM_MIPMAP_LEVELS_I
4306   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4307               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4308               []>;
4311 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4312           (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4313 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4314           (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4315 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4316           (TXQ_WIDTH_R Int64Regs:$a)>;
4317 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4318           (TXQ_HEIGHT_R Int64Regs:$a)>;
4319 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4320           (TXQ_DEPTH_R Int64Regs:$a)>;
4321 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4322           (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4323 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4324           (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4325 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4326           (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4329 //-----------------------------------
4330 // Surface Query Intrinsics
4331 //-----------------------------------
4333 let IsSurfTexQuery = true in {
4334 def SUQ_CHANNEL_ORDER_R
4335   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4336               "suq.channel_order.b32 \t$d, [$a];",
4337               []>;
4338 def SUQ_CHANNEL_ORDER_I
4339   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4340               "suq.channel_order.b32 \t$d, [$a];",
4341               []>;
4342 def SUQ_CHANNEL_DATA_TYPE_R
4343   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4344               "suq.channel_data_type.b32 \t$d, [$a];",
4345               []>;
4346 def SUQ_CHANNEL_DATA_TYPE_I
4347   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4348               "suq.channel_data_type.b32 \t$d, [$a];",
4349               []>;
4350 def SUQ_WIDTH_R
4351   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4352               "suq.width.b32 \t$d, [$a];",
4353               []>;
4354 def SUQ_WIDTH_I
4355   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4356               "suq.width.b32 \t$d, [$a];",
4357               []>;
4358 def SUQ_HEIGHT_R
4359   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4360               "suq.height.b32 \t$d, [$a];",
4361               []>;
4362 def SUQ_HEIGHT_I
4363   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4364               "suq.height.b32 \t$d, [$a];",
4365               []>;
4366 def SUQ_DEPTH_R
4367   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4368               "suq.depth.b32 \t$d, [$a];",
4369               []>;
4370 def SUQ_DEPTH_I
4371   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4372               "suq.depth.b32 \t$d, [$a];",
4373               []>;
4374 def SUQ_ARRAY_SIZE_R
4375   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4376               "suq.array_size.b32 \t$d, [$a];",
4377               []>;
4378 def SUQ_ARRAY_SIZE_I
4379   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4380               "suq.array_size.b32 \t$d, [$a];",
4381               []>;
4384 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4385           (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4386 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4387           (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4388 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4389           (SUQ_WIDTH_R Int64Regs:$a)>;
4390 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4391           (SUQ_HEIGHT_R Int64Regs:$a)>;
4392 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4393           (SUQ_DEPTH_R Int64Regs:$a)>;
4394 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4395           (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4398 //===- Handle Query -------------------------------------------------------===//
4400 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4401 def ISTYPEP_SAMPLER
4402   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4403               "istypep.samplerref \t$d, $a;",
4404               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4405 def ISTYPEP_SURFACE
4406   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4407               "istypep.surfref \t$d, $a;",
4408               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4409 def ISTYPEP_TEXTURE
4410   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4411               "istypep.texref \t$d, $a;",
4412               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4414 //===- Surface Stores -----------------------------------------------------===//
4416 let IsSust = true in {
4418 class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4419     : NVPTXInst<(outs),
4420                 !con(surf, (ins Int32Regs:$x, intype:$r)),
4421                 inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4422                 []>;
4423 multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4424   def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4425   def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4428 defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4429 defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4430 defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4431 defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4433 defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4434 defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4435 defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4436 defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4438 defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4439 defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4440 defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4441 defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4443 defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4444 defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4445 defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4447 class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4448     : NVPTXInst<(outs),
4449                 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4450                 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4451                 []>;
4452 multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4453   def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4454   def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4457 defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4458 defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4459 defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4460 defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4462 defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4463 defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4464 defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4465 defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4467 defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4468 defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4469 defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4470 defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4472 defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4473 defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4474 defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4476 class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4477     : NVPTXInst<(outs),
4478                 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4479                                 intype:$b, intype:$a)),
4480                 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4481                 []>;
4482 multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4483   def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4484   def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4487 defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4488 defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4489 defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4491 defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4492 defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4493 defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4495 defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4496 defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4497 defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4499 defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4500 defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4501 defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4503 class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4504     : NVPTXInst<(outs),
4505                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4506                 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4507                 []>;
4508 multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4509   def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4510   def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4513 defm SUST_B_1D_ARRAY_B8_CLAMP
4514   : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4515 defm SUST_B_1D_ARRAY_B16_CLAMP
4516   : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4517 defm SUST_B_1D_ARRAY_B32_CLAMP
4518   : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4519 defm SUST_B_1D_ARRAY_B64_CLAMP
4520   : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4522 defm SUST_B_1D_ARRAY_B8_TRAP
4523   : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4524 defm SUST_B_1D_ARRAY_B16_TRAP
4525   : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4526 defm SUST_B_1D_ARRAY_B32_TRAP
4527   : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4528 defm SUST_B_1D_ARRAY_B64_TRAP
4529   : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4531 defm SUST_B_1D_ARRAY_B8_ZERO
4532   : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4533 defm SUST_B_1D_ARRAY_B16_ZERO
4534   : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4535 defm SUST_B_1D_ARRAY_B32_ZERO
4536   : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4537 defm SUST_B_1D_ARRAY_B64_ZERO
4538   : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4540 defm SUST_P_1D_ARRAY_B8_TRAP
4541   : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4542 defm SUST_P_1D_ARRAY_B16_TRAP
4543   : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4544 defm SUST_P_1D_ARRAY_B32_TRAP
4545   : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4547 class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4548     : NVPTXInst<(outs),
4549                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4550                                 intype:$r, intype:$g)),
4551                 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4552                 []>;
4553 multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4554   def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4555   def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4558 defm SUST_B_1D_ARRAY_V2B8_CLAMP
4559   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4560 defm SUST_B_1D_ARRAY_V2B16_CLAMP
4561   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4562 defm SUST_B_1D_ARRAY_V2B32_CLAMP
4563   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4564 defm SUST_B_1D_ARRAY_V2B64_CLAMP
4565   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4567 defm SUST_B_1D_ARRAY_V2B8_TRAP
4568   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4569 defm SUST_B_1D_ARRAY_V2B16_TRAP
4570   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4571 defm SUST_B_1D_ARRAY_V2B32_TRAP
4572   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4573 defm SUST_B_1D_ARRAY_V2B64_TRAP
4574   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4576 defm SUST_B_1D_ARRAY_V2B8_ZERO
4577   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4578 defm SUST_B_1D_ARRAY_V2B16_ZERO
4579   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4580 defm SUST_B_1D_ARRAY_V2B32_ZERO
4581   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4582 defm SUST_B_1D_ARRAY_V2B64_ZERO
4583   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4585 defm SUST_P_1D_ARRAY_V2B8_TRAP
4586   : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4587 defm SUST_P_1D_ARRAY_V2B16_TRAP
4588   : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4589 defm SUST_P_1D_ARRAY_V2B32_TRAP
4590   : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4592 class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4593     : NVPTXInst<(outs),
4594                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4595                                 intype:$r, intype:$g, intype:$b, intype:$a)),
4596                 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4597                 []>;
4598 multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4599   def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4600   def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4603 defm SUST_B_1D_ARRAY_V4B8_CLAMP
4604   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4605 defm SUST_B_1D_ARRAY_V4B16_CLAMP
4606   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4607 defm SUST_B_1D_ARRAY_V4B32_CLAMP
4608   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4610 defm SUST_B_1D_ARRAY_V4B8_TRAP
4611   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4612 defm SUST_B_1D_ARRAY_V4B16_TRAP
4613   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4614 defm SUST_B_1D_ARRAY_V4B32_TRAP
4615   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4617 defm SUST_B_1D_ARRAY_V4B8_ZERO
4618   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4619 defm SUST_B_1D_ARRAY_V4B16_ZERO
4620   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4621 defm SUST_B_1D_ARRAY_V4B32_ZERO
4622   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4624 defm SUST_P_1D_ARRAY_V4B8_TRAP
4625   : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4626 defm SUST_P_1D_ARRAY_V4B16_TRAP
4627   : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4628 defm SUST_P_1D_ARRAY_V4B32_TRAP
4629   : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4631 class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4632     : NVPTXInst<(outs),
4633                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4634                 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4635                 []>;
4636 multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4637   def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4638   def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4641 defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4642 defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4643 defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4644 defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4646 defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4647 defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4648 defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4649 defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4651 defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4652 defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4653 defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4654 defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4656 defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4657 defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4658 defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4660 class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4661     : NVPTXInst<(outs),
4662                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4663                                 intype:$r, intype:$g)),
4664                 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4665                 []>;
4666 multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4667   def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4668   def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4671 defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4672 defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4673 defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4674 defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4676 defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4677 defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4678 defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4679 defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4681 defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4682 defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4683 defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4684 defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4686 defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4687 defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4688 defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4690 class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4691     : NVPTXInst<(outs),
4692                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4693                                 intype:$r, intype:$g, intype:$b, intype:$a)),
4694                 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4695                 []>;
4696 multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4697   def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4698   def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4701 defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4702 defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4703 defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4705 defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4706 defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4707 defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4709 defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4710 defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4711 defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4713 defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4714 defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4715 defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4717 class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4718     : NVPTXInst<(outs),
4719                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4720                                 intype:$r)),
4721                 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4722                 []>;
4723 multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4724   def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4725   def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4728 defm SUST_B_2D_ARRAY_B8_CLAMP
4729   : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4730 defm SUST_B_2D_ARRAY_B16_CLAMP
4731   : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4732 defm SUST_B_2D_ARRAY_B32_CLAMP
4733   : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4734 defm SUST_B_2D_ARRAY_B64_CLAMP
4735   : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4737 defm SUST_B_2D_ARRAY_B8_TRAP
4738   : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4739 defm SUST_B_2D_ARRAY_B16_TRAP
4740   : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4741 defm SUST_B_2D_ARRAY_B32_TRAP
4742   : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4743 defm SUST_B_2D_ARRAY_B64_TRAP
4744   : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4746 defm SUST_B_2D_ARRAY_B8_ZERO
4747   : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4748 defm SUST_B_2D_ARRAY_B16_ZERO
4749   : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4750 defm SUST_B_2D_ARRAY_B32_ZERO
4751   : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4752 defm SUST_B_2D_ARRAY_B64_ZERO
4753   : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4755 defm SUST_P_2D_ARRAY_B8_TRAP
4756   : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4757 defm SUST_P_2D_ARRAY_B16_TRAP
4758   : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4759 defm SUST_P_2D_ARRAY_B32_TRAP
4760   : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4762 class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4763     : NVPTXInst<(outs),
4764                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4765                                 intype:$r, intype:$g)),
4766                 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4767                 []>;
4768 multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4769   def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4770   def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4773 defm SUST_B_2D_ARRAY_V2B8_CLAMP
4774   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4775 defm SUST_B_2D_ARRAY_V2B16_CLAMP
4776   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4777 defm SUST_B_2D_ARRAY_V2B32_CLAMP
4778   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4779 defm SUST_B_2D_ARRAY_V2B64_CLAMP
4780   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4782 defm SUST_B_2D_ARRAY_V2B8_TRAP
4783   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4784 defm SUST_B_2D_ARRAY_V2B16_TRAP
4785   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4786 defm SUST_B_2D_ARRAY_V2B32_TRAP
4787   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4788 defm SUST_B_2D_ARRAY_V2B64_TRAP
4789   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4791 defm SUST_B_2D_ARRAY_V2B8_ZERO
4792   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4793 defm SUST_B_2D_ARRAY_V2B16_ZERO
4794   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4795 defm SUST_B_2D_ARRAY_V2B32_ZERO
4796   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4797 defm SUST_B_2D_ARRAY_V2B64_ZERO
4798   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4800 defm SUST_P_2D_ARRAY_V2B8_TRAP
4801   : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4802 defm SUST_P_2D_ARRAY_V2B16_TRAP
4803   : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4804 defm SUST_P_2D_ARRAY_V2B32_TRAP
4805   : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4807 class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4808     : NVPTXInst<(outs),
4809                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4810                                 intype:$r, intype:$g, intype:$b, intype:$a)),
4811                 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4812                 []>;
4813 multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4814   def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4815   def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4818 defm SUST_B_2D_ARRAY_V4B8_CLAMP
4819   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4820 defm SUST_B_2D_ARRAY_V4B16_CLAMP
4821   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4822 defm SUST_B_2D_ARRAY_V4B32_CLAMP
4823   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4825 defm SUST_B_2D_ARRAY_V4B8_TRAP
4826   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4827 defm SUST_B_2D_ARRAY_V4B16_TRAP
4828   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4829 defm SUST_B_2D_ARRAY_V4B32_TRAP
4830   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4832 defm SUST_B_2D_ARRAY_V4B8_ZERO
4833   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4834 defm SUST_B_2D_ARRAY_V4B16_ZERO
4835   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4836 defm SUST_B_2D_ARRAY_V4B32_ZERO
4837   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4839 defm SUST_P_2D_ARRAY_V4B8_TRAP
4840   : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4841 defm SUST_P_2D_ARRAY_V4B16_TRAP
4842   : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4843 defm SUST_P_2D_ARRAY_V4B32_TRAP
4844   : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4846 class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4847     : NVPTXInst<(outs),
4848                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4849                                 intype:$r)),
4850                 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4851                 []>;
4852 multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4853   def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4854   def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4857 defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4858 defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4859 defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4860 defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4862 defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4863 defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4864 defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4865 defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4867 defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4868 defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4869 defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4870 defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4872 defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4873 defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4874 defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4876 class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4877     : NVPTXInst<(outs),
4878                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4879                                 intype:$r, intype:$g)),
4880                 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4881                 []>;
4882 multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4883   def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4884   def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4887 defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4888 defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4889 defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4890 defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4892 defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4893 defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4894 defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4895 defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4897 defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4898 defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4899 defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4900 defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4902 defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4903 defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4904 defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4906 class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4907     : NVPTXInst<(outs),
4908                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4909                                 intype:$r, intype:$g, intype:$b, intype:$a)),
4910                 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
4911                 []>;
4912 multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
4913   def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4914   def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
4917 defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
4918 defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
4919 defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
4921 defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
4922 defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
4923 defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
4925 defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
4926 defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
4927 defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
4929 defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
4930 defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
4931 defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
4935 // Surface store instruction patterns
4936 // I'm not sure why we can't just include these in the instruction definitions,
4937 // but TableGen complains of type errors :(
4939 // .clamp variant
4940 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
4941            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4942           (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4944 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
4945            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4946           (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4948 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
4949            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4950           (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
4952 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
4953            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4954           (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
4956 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
4957            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4958           (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4959            Int16Regs:$r, Int16Regs:$g)>;
4961 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
4962            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4963           (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4964            Int16Regs:$r, Int16Regs:$g)>;
4966 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
4967            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4968           (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4969            Int32Regs:$r, Int32Regs:$g)>;
4971 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
4972            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4973           (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4974            Int64Regs:$r, Int64Regs:$g)>;
4976 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
4977            Int64Regs:$s, Int32Regs:$x,
4978            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4979           (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4980            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4982 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
4983            Int64Regs:$s, Int32Regs:$x,
4984            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4985           (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4986            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4988 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
4989            Int64Regs:$s, Int32Regs:$x,
4990            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4991           (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4992            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4996 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
4997            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
4998           (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4999            Int16Regs:$r)>;
5001 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5002            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5003           (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5004            Int16Regs:$r)>;
5006 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5007            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5008           (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5009            Int32Regs:$r)>;
5011 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5012            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5013           (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5014            Int64Regs:$r)>;
5016 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5017           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5018           (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5019            Int16Regs:$r, Int16Regs:$g)>;
5021 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5022           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5023           (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5024            Int16Regs:$r, Int16Regs:$g)>;
5026 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5027           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5028           (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5029            Int32Regs:$r, Int32Regs:$g)>;
5031 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5032           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5033           (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5034            Int64Regs:$r, Int64Regs:$g)>;
5036 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5037            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5038            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5039           (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5040            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5042 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5043            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5044            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5045           (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5046            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5048 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5049            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5050            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5051           (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5052            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5056 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5057            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5058           (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5059            Int16Regs:$r)>;
5061 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5062            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5063           (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5064            Int16Regs:$r)>;
5066 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5067            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5068           (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5069            Int32Regs:$r)>;
5071 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5072            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5073           (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5074            Int64Regs:$r)>;
5076 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5077           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5078           (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5079            Int16Regs:$r, Int16Regs:$g)>;
5081 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5082           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5083           (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5084            Int16Regs:$r, Int16Regs:$g)>;
5086 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5087           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5088           (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5089            Int32Regs:$r, Int32Regs:$g)>;
5091 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5092           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5093           (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5094            Int64Regs:$r, Int64Regs:$g)>;
5096 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5097            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5098            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5099           (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5100            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5102 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5103            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5104            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105           (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5106            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5108 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5109            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5110            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5111           (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5112            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5116 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5117           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5118           (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5119            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5120            Int16Regs:$r)>;
5122 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5123           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5124           (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5125            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5126            Int16Regs:$r)>;
5128 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5129           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5130           (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5131            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5132            Int32Regs:$r)>;
5134 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5135           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5136           (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5137            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5138            Int64Regs:$r)>;
5140 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5141            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5142            Int16Regs:$r, Int16Regs:$g),
5143           (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5144            Int32Regs:$x, Int32Regs:$y,
5145            Int16Regs:$r, Int16Regs:$g)>;
5147 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5148            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5149            Int16Regs:$r, Int16Regs:$g),
5150           (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5151            Int32Regs:$x, Int32Regs:$y,
5152            Int16Regs:$r, Int16Regs:$g)>;
5154 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5155            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5156            Int32Regs:$g),
5157           (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5158            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5160 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5161            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5162            Int64Regs:$g),
5163           (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5164            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5166 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5167            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5168            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5169           (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5170            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5171            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5173 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5174            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5175            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5176           (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5177            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5178            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5180 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5181            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5182            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5183           (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5184            Int32Regs:$x, Int32Regs:$y,
5185            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5189 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5190            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5191            Int16Regs:$r),
5192           (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5193            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5194            Int16Regs:$r)>;
5196 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5197            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5198            Int16Regs:$r),
5199           (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5200            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5201            Int16Regs:$r)>;
5203 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5204            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5205            Int32Regs:$r),
5206           (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5207            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5208            Int32Regs:$r)>;
5210 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5211            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5212            Int64Regs:$r),
5213           (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5214            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5215            Int64Regs:$r)>;
5217 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5218            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5219            Int16Regs:$r, Int16Regs:$g),
5220           (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5221            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5222            Int16Regs:$r, Int16Regs:$g)>;
5224 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5225            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5226            Int16Regs:$r, Int16Regs:$g),
5227           (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5228            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5229            Int16Regs:$r, Int16Regs:$g)>;
5231 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5232            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5233            Int32Regs:$r, Int32Regs:$g),
5234           (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5235            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5236            Int32Regs:$r, Int32Regs:$g)>;
5238 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5239            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5240            Int64Regs:$r, Int64Regs:$g),
5241           (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5242            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5243            Int64Regs:$r, Int64Regs:$g)>;
5245 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5246            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5247            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5248           (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5249            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5250            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5252 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5253            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5254            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5255           (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5256            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5257            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5259 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5260            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5261            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5262           (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5263            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5264            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5267 // .trap variant
5268 def : Pat<(int_nvvm_sust_b_1d_i8_trap
5269            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5270           (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5272 def : Pat<(int_nvvm_sust_b_1d_i16_trap
5273            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5274           (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5276 def : Pat<(int_nvvm_sust_b_1d_i32_trap
5277            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5278           (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5280 def : Pat<(int_nvvm_sust_b_1d_i64_trap
5281            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5282           (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5284 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5285            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5286           (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5287            Int16Regs:$r, Int16Regs:$g)>;
5289 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5290            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5291           (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5292            Int16Regs:$r, Int16Regs:$g)>;
5294 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5295            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5296           (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5297            Int32Regs:$r, Int32Regs:$g)>;
5299 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5300            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5301           (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5302            Int64Regs:$r, Int64Regs:$g)>;
5304 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5305            Int64Regs:$s, Int32Regs:$x,
5306            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5307           (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5308            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5310 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5311            Int64Regs:$s, Int32Regs:$x,
5312            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5313           (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5314            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5316 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5317            Int64Regs:$s, Int32Regs:$x,
5318            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5319           (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5320            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5324 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5325            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5326           (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5327            Int16Regs:$r)>;
5329 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5330            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5331           (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5332            Int16Regs:$r)>;
5334 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5335            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5336           (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5337            Int32Regs:$r)>;
5339 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5340            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5341           (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5342            Int64Regs:$r)>;
5344 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5345           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5346           (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5347            Int16Regs:$r, Int16Regs:$g)>;
5349 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5350           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5351           (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5352            Int16Regs:$r, Int16Regs:$g)>;
5354 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5355           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5356           (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5357            Int32Regs:$r, Int32Regs:$g)>;
5359 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5360           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5361           (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5362            Int64Regs:$r, Int64Regs:$g)>;
5364 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5365            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5366            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5367           (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5368            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5370 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5371            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5372            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5373           (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5374            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5376 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5377            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5378            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5379           (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5380            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5384 def : Pat<(int_nvvm_sust_b_2d_i8_trap
5385            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5386           (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5387            Int16Regs:$r)>;
5389 def : Pat<(int_nvvm_sust_b_2d_i16_trap
5390            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5391           (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5392            Int16Regs:$r)>;
5394 def : Pat<(int_nvvm_sust_b_2d_i32_trap
5395            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5396           (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5397            Int32Regs:$r)>;
5399 def : Pat<(int_nvvm_sust_b_2d_i64_trap
5400            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5401           (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5402            Int64Regs:$r)>;
5404 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5405           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5406           (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5407            Int16Regs:$r, Int16Regs:$g)>;
5409 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5410           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5411           (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5412            Int16Regs:$r, Int16Regs:$g)>;
5414 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5415           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5416           (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5417            Int32Regs:$r, Int32Regs:$g)>;
5419 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5420           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5421           (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5422            Int64Regs:$r, Int64Regs:$g)>;
5424 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5425            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5426            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5427           (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5428            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5430 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5431            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5432            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5433           (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5434            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5436 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5437            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5438            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5439           (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5440            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5444 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5445           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5446           (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5447            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5448            Int16Regs:$r)>;
5450 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5451           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5452           (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5453            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5454            Int16Regs:$r)>;
5456 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5457           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5458           (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5459            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5460            Int32Regs:$r)>;
5462 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5463           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5464           (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5465            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5466            Int64Regs:$r)>;
5468 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5469            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5470            Int16Regs:$r, Int16Regs:$g),
5471           (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5472            Int32Regs:$x, Int32Regs:$y,
5473            Int16Regs:$r, Int16Regs:$g)>;
5475 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5476            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5477            Int16Regs:$r, Int16Regs:$g),
5478           (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5479            Int32Regs:$x, Int32Regs:$y,
5480            Int16Regs:$r, Int16Regs:$g)>;
5482 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5483            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5484            Int32Regs:$g),
5485           (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5486            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5488 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5489            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5490            Int64Regs:$g),
5491           (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5492            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5494 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5495            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5496            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5497           (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5498            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5499            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5501 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5502            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5503            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5504           (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5505            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5506            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5508 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5509            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5510            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5511           (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5512            Int32Regs:$x, Int32Regs:$y,
5513            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5517 def : Pat<(int_nvvm_sust_b_3d_i8_trap
5518            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5519            Int16Regs:$r),
5520           (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5521            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5522            Int16Regs:$r)>;
5524 def : Pat<(int_nvvm_sust_b_3d_i16_trap
5525            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5526            Int16Regs:$r),
5527           (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5528            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5529            Int16Regs:$r)>;
5531 def : Pat<(int_nvvm_sust_b_3d_i32_trap
5532            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5533            Int32Regs:$r),
5534           (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5535            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5536            Int32Regs:$r)>;
5538 def : Pat<(int_nvvm_sust_b_3d_i64_trap
5539            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5540            Int64Regs:$r),
5541           (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5542            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5543            Int64Regs:$r)>;
5545 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5546            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5547            Int16Regs:$r, Int16Regs:$g),
5548           (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5549            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5550            Int16Regs:$r, Int16Regs:$g)>;
5552 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5553            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5554            Int16Regs:$r, Int16Regs:$g),
5555           (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5556            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5557            Int16Regs:$r, Int16Regs:$g)>;
5559 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5560            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5561            Int32Regs:$r, Int32Regs:$g),
5562           (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5563            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5564            Int32Regs:$r, Int32Regs:$g)>;
5566 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5567            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5568            Int64Regs:$r, Int64Regs:$g),
5569           (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5570            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5571            Int64Regs:$r, Int64Regs:$g)>;
5573 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5574            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5575            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5576           (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5577            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5578            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5580 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5581            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5582            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5583           (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5584            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5585            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5587 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5588            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5589            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5590           (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5591            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5592            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5595 // .zero variant
5596 def : Pat<(int_nvvm_sust_b_1d_i8_zero
5597            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5598           (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5600 def : Pat<(int_nvvm_sust_b_1d_i16_zero
5601            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5602           (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5604 def : Pat<(int_nvvm_sust_b_1d_i32_zero
5605            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5606           (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5608 def : Pat<(int_nvvm_sust_b_1d_i64_zero
5609            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5610           (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5612 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5613            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5614           (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5615            Int16Regs:$r, Int16Regs:$g)>;
5617 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5618            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5619           (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5620            Int16Regs:$r, Int16Regs:$g)>;
5622 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5623            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5624           (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5625            Int32Regs:$r, Int32Regs:$g)>;
5627 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5628            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5629           (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5630            Int64Regs:$r, Int64Regs:$g)>;
5632 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5633            Int64Regs:$s, Int32Regs:$x,
5634            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5635           (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5636            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5638 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5639            Int64Regs:$s, Int32Regs:$x,
5640            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5641           (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5642            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5644 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5645            Int64Regs:$s, Int32Regs:$x,
5646            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5647           (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5648            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5652 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5653            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5654           (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5655            Int16Regs:$r)>;
5657 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5658            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5659           (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5660            Int16Regs:$r)>;
5662 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5663            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5664           (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5665            Int32Regs:$r)>;
5667 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5668            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5669           (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5670            Int64Regs:$r)>;
5672 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5673           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5674           (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5675            Int16Regs:$r, Int16Regs:$g)>;
5677 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5678           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5679           (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5680            Int16Regs:$r, Int16Regs:$g)>;
5682 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5683           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5684           (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5685            Int32Regs:$r, Int32Regs:$g)>;
5687 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5688           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5689           (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5690            Int64Regs:$r, Int64Regs:$g)>;
5692 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5693            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5694            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5695           (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5696            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5698 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5699            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5700            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5701           (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5702            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5704 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5705            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5706            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5707           (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5708            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5712 def : Pat<(int_nvvm_sust_b_2d_i8_zero
5713            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5714           (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5715            Int16Regs:$r)>;
5717 def : Pat<(int_nvvm_sust_b_2d_i16_zero
5718            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5719           (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5720            Int16Regs:$r)>;
5722 def : Pat<(int_nvvm_sust_b_2d_i32_zero
5723            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5724           (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5725            Int32Regs:$r)>;
5727 def : Pat<(int_nvvm_sust_b_2d_i64_zero
5728            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5729           (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5730            Int64Regs:$r)>;
5732 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5733           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5734           (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5735            Int16Regs:$r, Int16Regs:$g)>;
5737 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5738           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5739           (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5740            Int16Regs:$r, Int16Regs:$g)>;
5742 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5743           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5744           (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5745            Int32Regs:$r, Int32Regs:$g)>;
5747 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5748           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5749           (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5750            Int64Regs:$r, Int64Regs:$g)>;
5752 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5753            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5754            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5755           (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5756            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5758 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5759            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5760            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5761           (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5762            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5764 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5765            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5766            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5767           (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5768            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5772 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5773           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5774           (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5775            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5776            Int16Regs:$r)>;
5778 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5779           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5780           (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5781            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5782            Int16Regs:$r)>;
5784 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5785           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5786           (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5787            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5788            Int32Regs:$r)>;
5790 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5791           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5792           (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5793            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5794            Int64Regs:$r)>;
5796 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5797            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5798            Int16Regs:$r, Int16Regs:$g),
5799           (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5800            Int32Regs:$x, Int32Regs:$y,
5801            Int16Regs:$r, Int16Regs:$g)>;
5803 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5804            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5805            Int16Regs:$r, Int16Regs:$g),
5806           (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5807            Int32Regs:$x, Int32Regs:$y,
5808            Int16Regs:$r, Int16Regs:$g)>;
5810 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5811            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5812            Int32Regs:$g),
5813           (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5814            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5816 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5817            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5818            Int64Regs:$g),
5819           (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5820            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5822 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5823            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5824            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5825           (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5826            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5827            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5829 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5830            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5831            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5832           (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5833            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5834            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5836 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5837            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5838            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5839           (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5840            Int32Regs:$x, Int32Regs:$y,
5841            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5845 def : Pat<(int_nvvm_sust_b_3d_i8_zero
5846            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5847            Int16Regs:$r),
5848           (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5849            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5850            Int16Regs:$r)>;
5852 def : Pat<(int_nvvm_sust_b_3d_i16_zero
5853            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5854            Int16Regs:$r),
5855           (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5856            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5857            Int16Regs:$r)>;
5859 def : Pat<(int_nvvm_sust_b_3d_i32_zero
5860            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5861            Int32Regs:$r),
5862           (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5863            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5864            Int32Regs:$r)>;
5866 def : Pat<(int_nvvm_sust_b_3d_i64_zero
5867            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5868            Int64Regs:$r),
5869           (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5870            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5871            Int64Regs:$r)>;
5873 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5874            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5875            Int16Regs:$r, Int16Regs:$g),
5876           (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5877            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5878            Int16Regs:$r, Int16Regs:$g)>;
5880 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5881            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5882            Int16Regs:$r, Int16Regs:$g),
5883           (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5884            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5885            Int16Regs:$r, Int16Regs:$g)>;
5887 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5888            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5889            Int32Regs:$r, Int32Regs:$g),
5890           (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5891            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5892            Int32Regs:$r, Int32Regs:$g)>;
5894 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5895            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5896            Int64Regs:$r, Int64Regs:$g),
5897           (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5898            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5899            Int64Regs:$r, Int64Regs:$g)>;
5901 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5902            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5903            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5904           (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5905            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5906            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5908 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
5909            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5910            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5911           (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
5912            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5913            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5915 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
5916            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5917            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5918           (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
5919            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5920            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5925 def : Pat<(int_nvvm_sust_p_1d_i8_trap
5926            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5927           (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5929 def : Pat<(int_nvvm_sust_p_1d_i16_trap
5930            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5931           (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5933 def : Pat<(int_nvvm_sust_p_1d_i32_trap
5934            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5935           (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5937 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
5938            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5939           (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5940            Int16Regs:$r, Int16Regs:$g)>;
5942 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
5943            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5944           (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5945            Int16Regs:$r, Int16Regs:$g)>;
5947 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
5948            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5949           (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5950            Int32Regs:$r, Int32Regs:$g)>;
5952 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
5953            Int64Regs:$s, Int32Regs:$x,
5954            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5955           (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5956            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5958 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
5959            Int64Regs:$s, Int32Regs:$x,
5960            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5961           (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5962            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5964 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
5965            Int64Regs:$s, Int32Regs:$x,
5966            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5967           (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5968            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5972 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
5973            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5974           (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5975            Int16Regs:$r)>;
5977 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
5978            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5979           (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5980            Int16Regs:$r)>;
5982 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
5983            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5984           (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5985            Int32Regs:$r)>;
5987 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
5988           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5989           (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5990            Int16Regs:$r, Int16Regs:$g)>;
5992 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
5993           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5994           (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5995            Int16Regs:$r, Int16Regs:$g)>;
5997 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
5998           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5999           (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6000            Int32Regs:$r, Int32Regs:$g)>;
6002 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6003            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6004            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6005           (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6006            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6008 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6009            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6010            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6011           (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6012            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6014 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6015            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6016            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6017           (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6018            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6022 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6023            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6024           (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6025            Int16Regs:$r)>;
6027 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6028            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6029           (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6030            Int16Regs:$r)>;
6032 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6033            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6034           (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6035            Int32Regs:$r)>;
6037 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6038           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6039           (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6040            Int16Regs:$r, Int16Regs:$g)>;
6042 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6043           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6044           (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6045            Int16Regs:$r, Int16Regs:$g)>;
6047 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6048           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6049           (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6050            Int32Regs:$r, Int32Regs:$g)>;
6052 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6053            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6054            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6055           (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6056            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6058 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6059            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6060            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6061           (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6062            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6064 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6065            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6066            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6067           (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6068            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6072 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6073           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6074           (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6075            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6076            Int16Regs:$r)>;
6078 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6079           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6080           (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6081            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6082            Int16Regs:$r)>;
6084 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6085           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6086           (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6087            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6088            Int32Regs:$r)>;
6090 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6091            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6092            Int16Regs:$r, Int16Regs:$g),
6093           (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6094            Int32Regs:$x, Int32Regs:$y,
6095            Int16Regs:$r, Int16Regs:$g)>;
6097 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6098            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6099            Int16Regs:$r, Int16Regs:$g),
6100           (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6101            Int32Regs:$x, Int32Regs:$y,
6102            Int16Regs:$r, Int16Regs:$g)>;
6104 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6105            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6106            Int32Regs:$g),
6107           (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6108            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6110 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6111            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6112            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6113           (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6114            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6115            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6117 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6118            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6119            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6120           (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6121            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6122            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6124 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6125            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6126            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6127           (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6128            Int32Regs:$x, Int32Regs:$y,
6129            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6133 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6134            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6135            Int16Regs:$r),
6136           (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6137            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6138            Int16Regs:$r)>;
6140 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6141            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6142            Int16Regs:$r),
6143           (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6144            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6145            Int16Regs:$r)>;
6147 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6148            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6149            Int32Regs:$r),
6150           (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6151            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6152            Int32Regs:$r)>;
6154 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6155            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6156            Int16Regs:$r, Int16Regs:$g),
6157           (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6158            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6159            Int16Regs:$r, Int16Regs:$g)>;
6161 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6162            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6163            Int16Regs:$r, Int16Regs:$g),
6164           (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6165            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6166            Int16Regs:$r, Int16Regs:$g)>;
6168 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6169            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6170            Int32Regs:$r, Int32Regs:$g),
6171           (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6172            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6173            Int32Regs:$r, Int32Regs:$g)>;
6175 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6176            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6177            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6178           (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6179            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6180            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6182 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6183            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6184            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6185           (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6186            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6187            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6189 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6190            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6191            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6192           (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6193            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6194            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6196 //-----------------------------------
6197 // Read Special Registers
6198 //-----------------------------------
6200 class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6201   : NVPTXInst<(outs Int64Regs:$d), (ins),
6202               !strconcat("mov.u64 \t$d, %", regname, ";"),
6203               [(set Int64Regs:$d, (intop))]>,
6204     Requires<Preds>;
6206 class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6207   : NVPTXInst<(outs Int32Regs:$d), (ins),
6208               !strconcat("mov.u32 \t$d, %", regname, ";"),
6209               [(set Int32Regs:$d, (intop))]>,
6210     Requires<Preds>;
6212 multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6213    foreach suffix = ["x", "y", "z", "w"] in {
6214       defvar reg = regname # "." # suffix;
6215       defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6216       def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
6217    }
6220 // TODO Add read vector-version of special registers
6222 defm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
6223 defm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
6224 defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6225 defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6227 defm INT_PTX_SREG_CLUSTERID :
6228        PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6229 defm INT_PTX_SREG_NCLUSTERID :
6230        PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6231 defm INT_PTX_SREG_CLUSTER_CTAID :
6232        PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6233 defm INT_PTX_SREG_CLUSTER_NCTAID:
6234        PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6236 def  INT_PTX_SREG_CLUSTER_CTARANK :
6237        PTX_READ_SREG_R32<"cluster_ctarank",
6238                          int_nvvm_read_ptx_sreg_cluster_ctarank,
6239                          [hasSM<90>, hasPTX<78>]>;
6240 def  INT_PTX_SREG_CLUSTER_NCTARANK:
6241        PTX_READ_SREG_R32<"cluster_nctarank",
6242                          int_nvvm_read_ptx_sreg_cluster_nctarank,
6243                          [hasSM<90>, hasPTX<78>]>;
6246 def INT_PTX_SREG_LANEID :
6247     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6248 def INT_PTX_SREG_WARPID :
6249     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6250 def INT_PTX_SREG_NWARPID :
6251     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6252 def INT_PTX_SREG_SMID :
6253     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6254 def INT_PTX_SREG_NSMID :
6255     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6256 def INT_PTX_SREG_GRIDID :
6257     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6259 def INT_PTX_SREG_LANEMASK_EQ :
6260     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6261 def INT_PTX_SREG_LANEMASK_LE :
6262     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6263 def INT_PTX_SREG_LANEMASK_LT :
6264     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6265 def INT_PTX_SREG_LANEMASK_GE :
6266     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6267 def INT_PTX_SREG_LANEMASK_GT :
6268     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6270 def INT_PTX_SREG_CLOCK :
6271     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6272 def INT_PTX_SREG_CLOCK64 :
6273     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6275 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6276 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6277 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6278 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6280 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6281 // handle the constant.
6282 def INT_PTX_SREG_WARPSIZE :
6283     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6284               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6286 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6287 // In addition to target-independent fields provided by WMMA_REGS, it adds
6288 // the fields commonly used to implement specific PTX instruction -- register
6289 // types and names, constraints, parts of assembly, etc.
6290 class WMMA_REGINFO<WMMA_REGS r, string op>
6291       : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6292   // NVPTX register types used to carry fragment data.
6293   NVPTXRegClass regclass = !cond(
6294     !eq(ptx_elt_type, "f16") : Int32Regs,
6295     !eq(ptx_elt_type, "f32") : Float32Regs,
6296     !eq(ptx_elt_type, "f64") : Float64Regs,
6297     !eq(ptx_elt_type, "bf16") : Int32Regs,
6298     !eq(ptx_elt_type, "tf32") : Int32Regs,
6299     !eq(ptx_elt_type, "s32") : Int32Regs,
6300     !eq(ptx_elt_type, "b16") : Int32Regs,
6301     !eq(ptx_elt_type, "s8") : Int32Regs,
6302     !eq(ptx_elt_type, "u8") : Int32Regs,
6303     !eq(ptx_elt_type, "s4") : Int32Regs,
6304     !eq(ptx_elt_type, "u4") : Int32Regs,
6305     !eq(ptx_elt_type, "b1") : Int32Regs);
6307   // Instruction input/output arguments for the fragment.
6308   list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6310   // List of register names for the fragment -- ["ra0", "ra1",...]
6311   list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6313   // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6314   string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6316   // Predicates for particular fragment variant. Technically those are
6317   // per-instruction predicates, but currently all fragments that can be used in
6318   // a given instruction are subject to the same constraints, so an instruction
6319   // can use predicates from any of its fragments. If/when this is no
6320   // longer the case, we can concat all per-fragment predicates to enforce that
6321   // all fragments of the instruction are viable.
6322   list<Predicate> Predicates = !cond(
6323     // fp16 -> fp16/fp32 @ m16n16k16
6324     !and(!eq(geom, "m16n16k16"),
6325          !or(!eq(ptx_elt_type, "f16"),
6326              !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6328     !and(!eq(geom,"m8n8k4"),
6329          !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6331     // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6332     !and(!or(!eq(geom, "m8n32k16"),
6333              !eq(geom, "m32n8k16")),
6334          !or(!eq(ptx_elt_type, "f16"),
6335              !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6337     // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6338     !and(!or(!eq(geom,"m16n16k16"),
6339              !eq(geom,"m8n32k16"),
6340              !eq(geom,"m32n8k16")),
6341          !or(!eq(ptx_elt_type, "u8"),
6342              !eq(ptx_elt_type, "s8"),
6343              !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6345     !and(!or(!eq(geom,"m16n16k16"),
6346              !eq(geom,"m8n32k16"),
6347              !eq(geom,"m32n8k16")),
6348          !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6350     !and(!eq(geom,"m16n16k8"),
6351          !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6353     !and(!eq(geom,"m16n16k8"),
6354          !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6356     // b1 -> s32 @ m8n8k128(b1)
6357     !and(!ne(op,"mma"),
6358          !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6360     // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6361     !and(!ne(op,"mma"),
6362          !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6364     !or(!eq(geom,"m16n8k8"),
6365         !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6367     !and(!ne(ptx_elt_type,"f64"),
6368          !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6370     // mma m8n8k32 requires higher PTX version
6371     !and(!eq(op,"mma"),
6372          !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6374     !and(!eq(ptx_elt_type,"f64"),
6375          !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6377     !and(!eq(op,"mma"),
6378          !or(!eq(geom, "m16n8k16"),
6379              !eq(geom, "m16n8k4"),
6380              !eq(geom, "m16n8k32"),
6381              !eq(geom, "m16n8k64"),
6382              !eq(geom, "m8n8k128"),
6383              !eq(geom, "m16n8k128"),
6384              !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6386     !and(!eq(op,"ldmatrix"),
6387          !eq(ptx_elt_type,"b16"),
6388          !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6390   // template DAGs for instruction inputs/output.
6391   dag Outs = !dag(outs, ptx_regs, reg_names);
6392   dag Ins = !dag(ins, ptx_regs, reg_names);
6395 // Convert dag of arguments into a dag to match given intrinsic.
6396 class BuildPatternI<Intrinsic Intr, dag Ins> {
6397   // Build a dag pattern that matches the intrinsic call.
6398   dag ret = !foreach(tmp, Ins,
6399                           !subst(imem, ADDRvar,
6400                           !subst(MEMri64, ADDRri64,
6401                           !subst(MEMri, ADDRri,
6402                           !subst(ins, Intr, tmp)))));
6405 // Same as above, but uses PatFrag instead of an Intrinsic.
6406 class BuildPatternPF<PatFrag Intr, dag Ins> {
6407   // Build a dag pattern that matches the intrinsic call.
6408   dag ret = !foreach(tmp, Ins,
6409                           !subst(imem, ADDRvar,
6410                           !subst(MEMri64, ADDRri64,
6411                           !subst(MEMri, ADDRri,
6412                           !subst(ins, Intr, tmp)))));
6415 // Common WMMA-related fields used for building patterns for all MMA instructions.
6416 class WMMA_INSTR<string _Intr, list<dag> _Args>
6417   : NVPTXInst<(outs), (ins), "?", []> {
6418   Intrinsic Intr = !cast<Intrinsic>(_Intr);
6419   // Concatenate all arguments into a single dag.
6420   dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6421   // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6422   dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6426 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6429 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6430                 DAGOperand SrcOp>
6431   : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6432                               [!con((ins SrcOp:$src),
6433                                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6434     Requires<Frag.Predicates> {
6435   // Load/store intrinsics are overloaded on pointer's address space.
6436   // To match the right intrinsic, we need to build AS-constrained PatFrag.
6437   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6438   dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6439   dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6440   // Build PatFrag that only matches particular address space.
6441   PatFrag IntrFrag = PatFrag<PFOperands,
6442                              PFOperandsIntr,
6443                              !cond(!eq(Space, ".shared"): AS_match.shared,
6444                                    !eq(Space, ".global"): AS_match.global,
6445                                    true: AS_match.generic)>;
6446   // Build AS-constrained pattern.
6447   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6449   let OutOperandList = Frag.Outs;
6450   let InOperandList = !con(Args, (ins MmaCode:$ptx));
6451   let AsmString = "wmma.load."
6452                   # Frag.frag
6453                   # ".sync"
6454                   # "${ptx:aligned}"
6455                   # "." # Layout
6456                   # "." # Frag.geom
6457                   # Space
6458                   # "." # Frag.ptx_elt_type # " \t"
6459                   # Frag.regstring
6460                   # ", [$src]"
6461                   # !if(WithStride, ", $ldm", "")
6462                   # ";";
6466 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6468 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6469                    bit WithStride, DAGOperand DstOp>
6470   : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6471                [!con((ins DstOp:$dst),
6472                      Frag.Ins,
6473                      !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6474     Requires<Frag.Predicates> {
6476   // Load/store intrinsics are overloaded on pointer's address space.
6477   // To match the right intrinsic, we need to build AS-constrained PatFrag.
6478   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6479   dag PFOperands = !con((ops node:$dst),
6480                         !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6481                         !if(WithStride, (ops node:$ldm), (ops)));
6482   // Build PatFrag that only matches particular address space.
6483   PatFrag IntrFrag = PatFrag<PFOperands,
6484                              !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6485                              !cond(!eq(Space, ".shared"): AS_match.shared,
6486                                    !eq(Space, ".global"): AS_match.global,
6487                                    true: AS_match.generic)>;
6488   // Build AS-constrained pattern.
6489   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6491   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6492   let OutOperandList = (outs);
6493   let AsmString = "wmma.store.d.sync"
6494                   # "${ptx:aligned}"
6495                   # "." # Layout
6496                   # "." # Frag.geom
6497                   # Space
6498                   # "." # Frag.ptx_elt_type
6499                   # " \t[$dst],"
6500                   # Frag.regstring
6501                   # !if(WithStride, ", $ldm", "")
6502                   # ";";
6505 // Create all load/store variants
6506 defset list<WMMA_INSTR> MMA_LDSTs  = {
6507   foreach layout = ["row", "col"] in {
6508     foreach stride = [false, true] in {
6509       foreach space = [".global", ".shared", ""] in {
6510         foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6511           foreach frag = NVVM_MMA_OPS.all_ld_ops in
6512             if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6513               def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6514           foreach frag = NVVM_MMA_OPS.all_st_ops in
6515             if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6516               def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6517         } // addr
6518       } // space
6519     } // stride
6520   } // layout
6521 } // defset
6523 // B1 instruction variants need extra constraints.
6524 class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6525   string Op = b1op;
6526   WMMA_REGINFO Frag = FragA;
6527   list<Predicate> ret = !listconcat(
6528     FragA.Predicates,
6529     !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6530   );
6532 // WMMA.MMA
6533 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6534                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6535                string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6536   : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6537                          [FragA.Ins, FragB.Ins, FragC.Ins]>,
6538     // Requires does not seem to have effect on Instruction w/o Patterns.
6539     // We set it here anyways and propagate to the Pat<> we construct below.
6540     Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6541   let OutOperandList = FragD.Outs;
6542   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6543   string TypeList = !cond(
6544     !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6545                                      # "." # FragC.ptx_elt_type,
6546     1: "." # FragD.ptx_elt_type
6547        # "." # FragA.ptx_elt_type
6548        # "." # FragB.ptx_elt_type
6549        # "." # FragC.ptx_elt_type,
6550   );
6551   let AsmString = "wmma.mma"
6552                   # b1op
6553                   # ".sync"
6554                   # "${ptx:aligned}"
6555                   # "." # ALayout
6556                   # "." # BLayout
6557                   # "." # FragA.geom
6558                   # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6559                   # TypeList
6560                   # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6561                   # FragD.regstring # ",\n\t\t"
6562                   # FragA.regstring # ",\n\t\t"
6563                   # FragB.regstring # ",\n\t\t"
6564                   # FragC.regstring # ";";
6567 defset list<WMMA_INSTR> WMMAs  = {
6568   foreach layout_a = ["row", "col"] in {
6569     foreach layout_b = ["row", "col"] in {
6570       foreach satf = [0, 1] in {
6571         foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6572           foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6573             foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6574               if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6575                 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6576                               WMMA_REGINFO<op[1], "wmma.mma">,
6577                               WMMA_REGINFO<op[2], "wmma.mma">,
6578                               WMMA_REGINFO<op[3], "wmma.mma">,
6579                               layout_a, layout_b, satf, rnd, b1op>;
6580               }
6581             } // b1op
6582           } // op
6583         } // rnd
6584       } // satf
6585     } // layout_b
6586   } // layout_a
6587 } // defset
6589 // MMA
6590 class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6591                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6592                string ALayout, string BLayout, int Satfinite, string b1op>
6593   : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6594                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6595     // Requires does not seem to have effect on Instruction w/o Patterns.
6596     // We set it here anyways and propagate to the Pat<> we construct below.
6597   Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6598   let OutOperandList = FragD.Outs;
6599   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6600   string TypeList = "." # FragD.ptx_elt_type
6601                     # "." # FragA.ptx_elt_type
6602                     # "." # FragB.ptx_elt_type
6603                     # "." # FragC.ptx_elt_type;
6604   let AsmString = "mma.sync.aligned."
6605                   # FragA.geom
6606                   # "." # ALayout
6607                   # "." # BLayout
6608                   # !if(Satfinite, ".satfinite", "")
6609                   # TypeList
6610                   # b1op # "\n\t\t"
6611                   # FragD.regstring # ",\n\t\t"
6612                   # FragA.regstring # ",\n\t\t"
6613                   # FragB.regstring # ",\n\t\t"
6614                   # FragC.regstring # ";";
6617 defset list<WMMA_INSTR> MMAs  = {
6618   foreach layout_a = ["row", "col"] in {
6619     foreach layout_b = ["row", "col"] in {
6620       foreach satf = [0, 1] in {
6621         foreach op = NVVM_MMA_OPS.all_mma_ops in {
6622           foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6623             if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6624               def : MMA<WMMA_REGINFO<op[0], "mma">,
6625                         WMMA_REGINFO<op[1], "mma">,
6626                         WMMA_REGINFO<op[2], "mma">,
6627                         WMMA_REGINFO<op[3], "mma">,
6628                         layout_a, layout_b, satf, b1op>;
6629             }
6630           } // b1op
6631         } // op
6632       } // satf
6633     } // layout_b
6634   } // layout_a
6635 } // defset
6638 // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6640 class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6641                DAGOperand SrcOp>
6642   : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6643     Requires<Frag.Predicates> {
6644   // Build PatFrag that only matches particular address space.
6645   PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6646                              !cond(!eq(Space, ".shared"): AS_match.shared,
6647                                    true: AS_match.generic)>;
6648   // Build AS-constrained pattern.
6649   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6651   let OutOperandList = Frag.Outs;
6652   let InOperandList = !con(Args, (ins MmaCode:$ptx));
6653   let AsmString = "ldmatrix.sync.aligned."
6654                   # Frag.geom
6655                   # "." # Frag.frag
6656                   # !if(Transposed, ".trans", "")
6657                   # Space
6658                   # "." # Frag.ptx_elt_type
6659                   # " " # Frag.regstring # ", [$src];";
6662 // Create all ldmatrix variants
6663 defset list<WMMA_INSTR> LDMATRIXs  = {
6664   foreach transposed = [false, true] in {
6665     foreach space = [".shared", ""] in {
6666       foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6667         foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6668           if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6669             def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6670                             addr>;
6671       } // addr
6672     } // space
6673   } // transposed
6674 } // defset
6676 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6677 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6678 // the instruction record.
6679 class MMA_PAT<WMMA_INSTR wi>
6680       : Pat<wi.IntrinsicPattern,
6681             !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6682                  (wi ptx.version))>,
6683         Requires<wi.Predicates>;
6685 // Build intrinsic->instruction patterns for all MMA instructions.
6686 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6687   def : MMA_PAT<mma>;
6689 multiclass MAPA<string suffix, Intrinsic Intr> {
6690   def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6691               "mapa" # suffix # ".u32\t$d, $a, $b;",
6692               [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6693     Requires<[hasSM<90>, hasPTX<78>]>;
6694   def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6695               "mapa" # suffix # ".u32\t$d, $a, $b;",
6696               [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6697     Requires<[hasSM<90>, hasPTX<78>]>;
6698   def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6699               "mapa" # suffix # ".u64\t$d, $a, $b;",
6700               [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6701     Requires<[hasSM<90>, hasPTX<78>]>;
6702   def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6703               "mapa" # suffix # ".u64\t$d, $a, $b;",
6704               [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6705     Requires<[hasSM<90>, hasPTX<78>]>;
6708 defm mapa  : MAPA<"", int_nvvm_mapa>;
6709 defm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6712 multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6713   def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6714               "getctarank" # suffix # ".u32\t$d, $a;",
6715               [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6716     Requires<[hasSM<90>, hasPTX<78>]>;
6717   def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6718               "getctarank" # suffix # ".u64\t$d, $a;",
6719               [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6720     Requires<[hasSM<90>, hasPTX<78>]>;
6723 defm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
6724 defm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6726 def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6727               "mov.pred\t$d, %is_explicit_cluster;",
6728               [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6729     Requires<[hasSM<90>, hasPTX<78>]>;