[RISCV] Add support for Smepmp 1.0 (#78489)
[llvm-project.git] / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
blob33f1e4a43e072af5f4a8cf5d18faeb37ebb51ae5
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 def immFloat0 : PatLeaf<(fpimm), [{
10     float f = (float)N->getValueAPF().convertToFloat();
11     return (f==0.0f);
12 }]>;
14 def immFloat1 : PatLeaf<(fpimm), [{
15     float f = (float)N->getValueAPF().convertToFloat();
16     return (f==1.0f);
17 }]>;
19 def immDouble0 : PatLeaf<(fpimm), [{
20     double d = (double)N->getValueAPF().convertToDouble();
21     return (d==0.0);
22 }]>;
24 def immDouble1 : PatLeaf<(fpimm), [{
25     double d = (double)N->getValueAPF().convertToDouble();
26     return (d==1.0);
27 }]>;
29 def AS_match {
30   code generic = [{
31    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32   }];
33   code shared = [{
34    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35   }];
36   code global = [{
37    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38   }];
41 // A node that will be replaced with the current PTX version.
42 class PTX {
43   SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44     return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45   }]>;
46   // (i32 0) will be XForm'ed to the currently used PTX version.
47   dag version = (PTXVerXform (i32 0));
49 def ptx : PTX;
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54   list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                         [prefix # !sub(n, 1)]),
56                             []);
59 class THREADMASK_INFO<bit sync> {
60   list<bit> ret = !if(sync, [0, 1], [0]);
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = true in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                   "bar.sync \t0;",
69       [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                   "bar.sync \t$src1;",
72       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                   "bar.sync \t$src1, $src2;",
75       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77   !strconcat("{{ \n\t",
78              ".reg .pred \t%p1; \n\t",
79              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81              "}}"),
82       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84   !strconcat("{{ \n\t",
85              ".reg .pred \t%p1; \n\t",
86              ".reg .pred \t%p2; \n\t",
87              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90              "}}"),
91       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93   !strconcat("{{ \n\t",
94              ".reg .pred \t%p1; \n\t",
95              ".reg .pred \t%p2; \n\t",
96              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99              "}}"),
100       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                              [(int_nvvm_bar_sync imm:$i)]>;
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                              [(int_nvvm_bar_warp_sync imm:$i)]>,
107         Requires<[hasPTX<60>, hasSM<30>]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                              [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110         Requires<[hasPTX<60>, hasSM<30>]>;
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                    [(int_nvvm_barrier_sync imm:$i)]>,
114         Requires<[hasPTX<60>, hasSM<30>]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                    [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117         Requires<[hasPTX<60>, hasSM<30>]>;
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                  "barrier.sync \t$id, $cnt;",
121                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122         Requires<[hasPTX<60>, hasSM<30>]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                  "barrier.sync \t$id, $cnt;",
125                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126         Requires<[hasPTX<60>, hasSM<30>]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                  "barrier.sync \t$id, $cnt;",
129                  [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130         Requires<[hasPTX<60>, hasSM<30>]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                  "barrier.sync \t$id, $cnt;",
133                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134         Requires<[hasPTX<60>, hasSM<30>]>;
136 class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
137                           list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
138         NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
139         Requires<Preds>;
141 def barrier_cluster_arrive:
142         INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
143 def barrier_cluster_arrive_relaxed:
144         INT_BARRIER_CLUSTER<"arrive.relaxed",
145         int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
146 def barrier_cluster_wait:
147         INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
149 // 'aligned' versions of the cluster barrier intrinsics
150 def barrier_cluster_arrive_aligned:
151         INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
152 def barrier_cluster_arrive_relaxed_aligned:
153         INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
154         int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
155 def barrier_cluster_wait_aligned:
156         INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
158 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
159                  bit offset_imm, bit mask_imm, bit threadmask_imm>
160       : NVPTXInst<(outs), (ins), "?", []> {
161   NVPTXRegClass rc = !cond(
162     !eq(reg, "i32"): Int32Regs,
163     !eq(reg, "f32"): Float32Regs);
164   string IntrName = "int_nvvm_shfl_"
165                     # !if(sync, "sync_", "")
166                     # mode
167                     # "_" # reg
168                     # !if(return_pred, "p", "");
169   Intrinsic Intr = !cast<Intrinsic>(IntrName);
170   let InOperandList = !con(
171     !if(sync,
172         !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
173         (ins)),
174     (ins rc:$src),
175     !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
176     !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
177     );
178   let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
179   let AsmString = "shfl."
180      # !if(sync, "sync.", "")
181      # mode # ".b32\t"
182      # "$dst"
183      # !if(return_pred, "|$pred", "") # ", "
184      # "$src, $offset, $mask"
185      # !if(sync, ", $threadmask", "")
186      # ";"
187      ;
188   let Pattern = [!con(
189       !foreach(tmp, OutOperandList,
190              !subst(outs, set,
191              !subst(i32imm, imm, tmp))),
192       (set !foreach(tmp, InOperandList,
193              !subst(ins, Intr,
194              !subst(i32imm, imm, tmp))))
195   )];
198 foreach sync = [false, true] in {
199   foreach mode = ["up", "down", "bfly", "idx"] in {
200     foreach regclass = ["i32", "f32"] in {
201       foreach return_pred = [false, true] in {
202         foreach offset_imm = [false, true] in {
203           foreach mask_imm = [false, true] in {
204             foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
205               def : SHFL_INSTR<sync, mode, regclass, return_pred,
206                                offset_imm, mask_imm, threadmask_imm>,
207                     Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
208             }
209           }
210         }
211       }
212     }
213   }
216 // vote.{all,any,uni,ballot}
217 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
218   def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
219               "vote." # mode # " \t$dest, $pred;",
220               [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
221         Requires<[hasPTX<60>, hasSM<30>]>;
224 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
225 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
226 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
227 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
229 // vote.sync.{all,any,uni,ballot}
230 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
231   def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
232               "vote.sync." # mode # " \t$dest, $pred, $mask;",
233               [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
234           Requires<[hasPTX<60>, hasSM<30>]>;
235   def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
236               "vote.sync." # mode #" \t$dest, $pred, $mask;",
237               [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
238           Requires<[hasPTX<60>, hasSM<30>]>;
241 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
242 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
243 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
244 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
246 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
247                           Operand ImmOp> {
248   def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
249               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
250               [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
251            Requires<[hasPTX<60>, hasSM<70>]>;
252   def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
253               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
254               [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
255            Requires<[hasPTX<60>, hasSM<70>]>;
256   def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
257               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
258               [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
259            Requires<[hasPTX<60>, hasSM<70>]>;
260   def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
261               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
262               [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
263            Requires<[hasPTX<60>, hasSM<70>]>;
266 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
267                                         i32imm>;
268 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
269                                         i64imm>;
271 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
272                           Operand ImmOp> {
273   def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
274                      (ins i32imm:$mask, ImmOp:$value),
275               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
276               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
277            Requires<[hasPTX<60>, hasSM<70>]>;
278   def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
279                      (ins Int32Regs:$mask, ImmOp:$value),
280               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
281               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
282            Requires<[hasPTX<60>, hasSM<70>]>;
283   def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
284                      (ins i32imm:$mask, regclass:$value),
285               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
286               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
287            Requires<[hasPTX<60>, hasSM<70>]>;
288   def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
289                      (ins Int32Regs:$mask, regclass:$value),
290               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
291               [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
292            Requires<[hasPTX<60>, hasSM<70>]>;
294 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
295                                          i32imm>;
296 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
297                                          i64imm>;
299 multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
300   def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
301           "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
302           [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
303         Requires<[hasPTX<70>, hasSM<80>]>;
306 defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
307 defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
308 defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
309 defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
310 defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
311 defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
312 defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
313 defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
315 } // isConvergent = true
317 //-----------------------------------
318 // Explicit Memory Fence Functions
319 //-----------------------------------
320 class MEMBAR<string StrOp, Intrinsic IntOP> :
321               NVPTXInst<(outs), (ins),
322             StrOp, [(IntOP)]>;
324 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
325 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
326 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
328 def INT_FENCE_SC_CLUSTER:
329        MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
330        Requires<[hasPTX<78>, hasSM<90>]>;
332 //-----------------------------------
333 // Async Copy Functions
334 //-----------------------------------
336 multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
337   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
338             !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
339             [(Intrin Int32Regs:$addr)]>,
340     Requires<[hasPTX<70>, hasSM<80>]>;
341   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
342             !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
343             [(Intrin Int64Regs:$addr)]>,
344     Requires<[hasPTX<70>, hasSM<80>]>;
347 defm CP_ASYNC_MBARRIER_ARRIVE :
348   CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
349 defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
350   CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
351 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
352   CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
353 defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
354   CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
356 multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
357   def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
358             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
359             [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
360     Requires<[hasPTX<70>, hasSM<80>]>;
361   def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
362             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
363             [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
364     Requires<[hasPTX<70>, hasSM<80>]>;
365   // Variant with src_size parameter
366   def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
367              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
368              [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
369     Requires<[hasPTX<70>, hasSM<80>]>;
370   def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
371              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
372              [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
373     Requires<[hasPTX<70>, hasSM<80>]>;
374   def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
375              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
376              [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
377     Requires<[hasPTX<70>, hasSM<80>]>;
378   def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
379              !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
380              [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
381     Requires<[hasPTX<70>, hasSM<80>]>;
384 defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
385   CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
386                                       int_nvvm_cp_async_ca_shared_global_4_s>;
388 defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
389   CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
390                                       int_nvvm_cp_async_ca_shared_global_8_s>;
392 defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
393   CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
394                                        int_nvvm_cp_async_ca_shared_global_16_s>;
396 defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
397   CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
398                                        int_nvvm_cp_async_cg_shared_global_16_s>;
400 def CP_ASYNC_COMMIT_GROUP :
401   NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
402   Requires<[hasPTX<70>, hasSM<80>]>;
404 def CP_ASYNC_WAIT_GROUP :
405   NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
406   [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
407   Requires<[hasPTX<70>, hasSM<80>]>;
409 def CP_ASYNC_WAIT_ALL :
410   NVPTXInst<(outs), (ins), "cp.async.wait_all;",
411   [(int_nvvm_cp_async_wait_all)]>,
412   Requires<[hasPTX<70>, hasSM<80>]>;
414 // cp.async.bulk variants of the commit/wait group
415 def CP_ASYNC_BULK_COMMIT_GROUP :
416   NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
417   [(int_nvvm_cp_async_bulk_commit_group)]>,
418   Requires<[hasPTX<80>, hasSM<90>]>;
420 def CP_ASYNC_BULK_WAIT_GROUP :
421   NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
422   [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
423   Requires<[hasPTX<80>, hasSM<90>]>;
425 def CP_ASYNC_BULK_WAIT_GROUP_READ :
426   NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
427   [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
428   Requires<[hasPTX<80>, hasSM<90>]>;
430 //-----------------------------------
431 // MBarrier Functions
432 //-----------------------------------
434 multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
435   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
436            !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
437     [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
438     Requires<[hasPTX<70>, hasSM<80>]>;
439   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
440            !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
441     [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
442     Requires<[hasPTX<70>, hasSM<80>]>;
445 defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
446 defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
447                                           int_nvvm_mbarrier_init_shared>;
449 multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
450   def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
451            !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
452     [(Intrin Int32Regs:$addr)]>,
453     Requires<[hasPTX<70>, hasSM<80>]>;
454   def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
455            !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
456     [(Intrin Int64Regs:$addr)]>,
457     Requires<[hasPTX<70>, hasSM<80>]>;
460 defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
461 defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
462                                             int_nvvm_mbarrier_inval_shared>;
464 multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
465   def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
466            !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
467     [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
468     Requires<[hasPTX<70>, hasSM<80>]>;
469   def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
470            !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
471     [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
472     Requires<[hasPTX<70>, hasSM<80>]>;
475 defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
476 defm MBARRIER_ARRIVE_SHARED :
477   MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
479 multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
480   def _32 : NVPTXInst<(outs Int64Regs:$state),
481            (ins Int32Regs:$addr, Int32Regs:$count),
482            !strconcat("mbarrier.arrive.noComplete", AddrSpace,
483                       ".b64 $state, [$addr], $count;"),
484     [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
485     Requires<[hasPTX<70>, hasSM<80>]>;
486   def _64 : NVPTXInst<(outs Int64Regs:$state),
487            (ins Int64Regs:$addr, Int32Regs:$count),
488            !strconcat("mbarrier.arrive.noComplete", AddrSpace,
489                       ".b64 $state, [$addr], $count;"),
490     [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
491     Requires<[hasPTX<70>, hasSM<80>]>;
494 defm MBARRIER_ARRIVE_NOCOMPLETE :
495   MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
496 defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
497   MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
499 multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
500   def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
501            !strconcat("mbarrier.arrive_drop", AddrSpace,
502                       ".b64 $state, [$addr];"),
503            [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
504     Requires<[hasPTX<70>, hasSM<80>]>;
505   def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
506            !strconcat("mbarrier.arrive_drop", AddrSpace,
507                       ".b64 $state, [$addr];"),
508            [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
509     Requires<[hasPTX<70>, hasSM<80>]>;
512 defm MBARRIER_ARRIVE_DROP :
513   MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
514 defm MBARRIER_ARRIVE_DROP_SHARED :
515   MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
517 multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
518   def _32 : NVPTXInst<(outs Int64Regs:$state),
519            (ins Int32Regs:$addr, Int32Regs:$count),
520            !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
521                       ".b64 $state, [$addr], $count;"),
522            [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
523     Requires<[hasPTX<70>, hasSM<80>]>;
524   def _64 : NVPTXInst<(outs Int64Regs:$state),
525            (ins Int64Regs:$addr, Int32Regs:$count),
526            !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
527                       ".b64 $state, [$addr], $count;"),
528            [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
529     Requires<[hasPTX<70>, hasSM<80>]>;
532 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
533   MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
534 defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
535   MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
536                        int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
538 multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
539   def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
540            !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
541            [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
542     Requires<[hasPTX<70>, hasSM<80>]>;
543   def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
544            !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
545            [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
546     Requires<[hasPTX<70>, hasSM<80>]>;
549 defm MBARRIER_TEST_WAIT :
550   MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
551 defm MBARRIER_TEST_WAIT_SHARED :
552   MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
554 class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
555            NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
556            "mbarrier.pending_count.b64 $res, $state;",
557            [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
558     Requires<[hasPTX<70>, hasSM<80>]>;
560 def MBARRIER_PENDING_COUNT :
561   MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
563 //-----------------------------------
564 // Math Functions
565 //-----------------------------------
567 // Map min(1.0, max(0.0, x)) to sat(x)
568 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
569 // NaN
570 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
571 // Same story for fmax, fmin.
573 def : Pat<(int_nvvm_fmin_f immFloat1,
574             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
575           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
576 def : Pat<(int_nvvm_fmin_f immFloat1,
577             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
578           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
579 def : Pat<(int_nvvm_fmin_f
580             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
581           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
582 def : Pat<(int_nvvm_fmin_f
583             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
584           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
586 def : Pat<(int_nvvm_fmin_d immDouble1,
587             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
588           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
589 def : Pat<(int_nvvm_fmin_d immDouble1,
590             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
591           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
592 def : Pat<(int_nvvm_fmin_d
593             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
594           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
595 def : Pat<(int_nvvm_fmin_d
596             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
597           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
600 // We need a full string for OpcStr here because we need to deal with case like
601 // INT_PTX_RECIP.
602 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
603   NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
604             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
605             OpcStr,
606         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
607         Requires<Preds>;
609 // We need a full string for OpcStr here because we need to deal with the case
610 // like INT_PTX_NATIVE_POWR_F.
611 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
612   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
613   list<Predicate> Preds = []>
614             : NVPTXInst<(outs t_regclass:$dst),
615               (ins s0_regclass:$src0, s1_regclass:$src1),
616             OpcStr,
617         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
618         Requires<Preds>;
620 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
621   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
622   NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
623             : NVPTXInst<(outs t_regclass:$dst),
624               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
625             OpcStr,
626         [(set t_regclass:$dst,
627           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
628           Requires<Preds>;
631 // MISC
634 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
635   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
638 // Min Max
641 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
642   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
643 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
644   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
645 def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
646   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
647   [hasPTX<70>, hasSM<80>]>;
648 def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
649   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
650   [hasPTX<70>, hasSM<80>]>;
651 def INT_NVVM_FMIN_XORSIGN_ABS_F :
652   F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
653     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
654     [hasPTX<72>, hasSM<86>]>;
655 def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
656   F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
657     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
658     [hasPTX<72>, hasSM<86>]>;
659 def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
660   F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
661     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
662     [hasPTX<72>, hasSM<86>]>;
663 def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
664   F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
665     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
666     [hasPTX<72>, hasSM<86>]>;
668 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
669   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
670 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
671   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
672 def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
673   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
674   [hasPTX<70>, hasSM<80>]>;
675 def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
676   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
677   [hasPTX<70>, hasSM<80>]>;
678 def INT_NVVM_FMAX_XORSIGN_ABS_F :
679   F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
680     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
681     [hasPTX<72>, hasSM<86>]>;
682 def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
683   F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
684     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
685     [hasPTX<72>, hasSM<86>]>;
686 def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
687   F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
688     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
689     [hasPTX<72>, hasSM<86>]>;
690 def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
691   F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
692     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
693     [hasPTX<72>, hasSM<86>]>;
695 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
696   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
697 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
698   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
701 // Min Max f16, f16x2, bf16, bf16x2
704 class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
705                     list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
706   string Variant = V;
707   Intrinsic Intr = I;
708   NVPTXRegClass RegClass = RC;
709   list<Predicate> Predicates = Preds;
712 multiclass MIN_MAX<string IntName> {
713   foreach P = [
714     MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
715       int_nvvm_fmax_f16), Int16Regs>,
716     MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
717       int_nvvm_fmax_ftz_f16), Int16Regs>,
718     MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
719       int_nvvm_fmax_nan_f16), Int16Regs>,
720     MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
721       int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
722     MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
723       int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
724       Int16Regs, [hasPTX<72>, hasSM<86>]>,
725     MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
726       int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
727       Int16Regs, [hasPTX<72>, hasSM<86>]>,
728     MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
729       int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
730       Int16Regs, [hasPTX<72>, hasSM<86>]>,
731     MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
732       int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
733       int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
734     MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
735       int_nvvm_fmax_f16x2), Int32Regs>,
736     MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
737       int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
738     MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
739       int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
740     MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
741       int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
742     MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
743       int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
744       Int32Regs, [hasPTX<72>, hasSM<86>]>,
745     MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
746       int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
747       Int32Regs, [hasPTX<72>, hasSM<86>]>,
748     MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
749       int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
750       Int32Regs, [hasPTX<72>, hasSM<86>]>,
751     MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
752       int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
753       int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
754       Int32Regs, [hasPTX<72>, hasSM<86>]>,
755     MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
756       int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
757     MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
758       int_nvvm_fmax_nan_bf16), Int16Regs>,
759     MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
760       int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
761       Int16Regs, [hasPTX<72>, hasSM<86>]>,
762     MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
763       int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
764       Int16Regs, [hasPTX<72>, hasSM<86>]>,
765     MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
766       int_nvvm_fmax_bf16x2), Int32Regs>,
767     MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
768       int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
769     MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
770       int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
771       Int32Regs, [hasPTX<72>, hasSM<86>]>,
772     MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
773       int_nvvm_fmin_nan_xorsign_abs_bf16x2,
774       int_nvvm_fmax_nan_xorsign_abs_bf16x2),
775       Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
776         def P.Variant : F_MATH_2<!strconcat(
777           IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
778           P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
779   }
782 defm INT_NVVM_FMIN : MIN_MAX<"min">;
783 defm INT_NVVM_FMAN : MIN_MAX<"max">;
786 // Multiplication
789 def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs,
790   Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
791 def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs,
792   Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
793 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
794   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
795 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
796   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
797 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
798   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
799 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
800   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
802 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
803   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
804 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
805   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
806 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
807   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
808 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
809   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
810 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
811   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
812 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
813   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
814 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
815   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
816 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
817   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
819 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
820   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
821 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
822   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
823 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
824   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
825 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
826   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
828 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
829   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
830 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
831   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
834 // Div
837 def INT_NVVM_DIV_APPROX_FTZ_F
838   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
839     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
840 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
841   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
843 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
844   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
845 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
846   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
847 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
848   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
849 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
850   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
851 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
852   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
853 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
854   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
855 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
856   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
857 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
858   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
860 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
861   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
862 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
863   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
864 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
865   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
866 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
867   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
870 // Sad
873 def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;",
874   Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
875 def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;",
876   Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
877 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
878   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
879 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
880   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
881 def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;",
882   Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
883 def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
884   Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
887 // Floor  Ceil
890 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
891           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
892 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
893           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
894 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
895           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
897 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
898           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
899 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
900           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
901 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
902           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
905 // Abs
908 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
909   Float32Regs, int_nvvm_fabs_ftz_f>;
910 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
911   Float32Regs, int_nvvm_fabs_f>;
913 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
914   Float64Regs, int_nvvm_fabs_d>;
917 // Abs, Neg bf16, bf16x2
920 def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
921   Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
922 def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
923   Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
924 def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
925   Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
926 def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
927   Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
930 // Round
933 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
934           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
935 def : Pat<(int_nvvm_round_f Float32Regs:$a),
936           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
937 def : Pat<(int_nvvm_round_d Float64Regs:$a),
938           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
941 // Trunc
944 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
945           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
946 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
947           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
948 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
949           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
952 // Saturate
955 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
956           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
957 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
958           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
959 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
960           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
963 // Exp2  Log2
966 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
967   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
968 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
969   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
970 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
971   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
972 def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
973   Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
974 def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
975   Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
977 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
978   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
979 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
980   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
981 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
982   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
985 // Sin  Cos
988 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
989   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
990 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
991   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
993 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
994   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
995 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
996   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
999 // Fma
1002 class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
1003                 list<Predicate> Preds = []> {
1004   string Variant = V;
1005   Intrinsic Intr = I;
1006   NVPTXRegClass RegClass = RC;
1007   list<Predicate> Predicates = Preds;
1010 multiclass FMA_INST {
1011   foreach P = [
1012     FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
1013     FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
1014     FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
1015     FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
1017     FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
1018     FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
1019     FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
1020     FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
1021     FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
1022     FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
1023     FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
1024     FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
1026     FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
1027     FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
1028       [hasPTX<42>, hasSM<53>]>,
1029     FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
1030       [hasPTX<42>, hasSM<53>]>,
1031     FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
1032       [hasPTX<42>, hasSM<53>]>,
1033     FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
1034       [hasPTX<70>, hasSM<80>]>,
1035     FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
1036       [hasPTX<70>, hasSM<80>]>,
1038     FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1039     FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1040       [hasPTX<70>, hasSM<80>]>,
1041     FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1042       [hasPTX<70>, hasSM<80>]>,
1043     FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1044       [hasPTX<70>, hasSM<80>]>,
1045     FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1046       [hasPTX<70>, hasSM<80>]>,
1047     FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1048       [hasPTX<70>, hasSM<80>]>,
1050     FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1051       [hasPTX<42>, hasSM<53>]>,
1052     FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1053       [hasPTX<42>, hasSM<53>]>,
1054     FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1055       [hasPTX<42>, hasSM<53>]>,
1056     FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1057       Int32Regs, [hasPTX<42>, hasSM<53>]>,
1058     FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1059       [hasPTX<70>, hasSM<80>]>,
1060     FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1061       Int32Regs, [hasPTX<70>, hasSM<80>]>,
1062     FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1063       [hasPTX<70>, hasSM<80>]>,
1064     FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1065       [hasPTX<70>, hasSM<80>]>
1066   ] in {
1067     def P.Variant :
1068       F_MATH_3<!strconcat("fma",
1069         !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1070         P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1071   }
1074 defm INT_NVVM_FMA : FMA_INST;
1077 // Rcp
1080 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1081   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1082 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1083   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1084 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1085   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1086 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1087   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1088 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1089   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1090 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1091   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1092 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1093   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1094 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1095   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1097 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1098   Float64Regs, int_nvvm_rcp_rn_d>;
1099 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1100   Float64Regs, int_nvvm_rcp_rz_d>;
1101 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1102   Float64Regs, int_nvvm_rcp_rm_d>;
1103 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1104   Float64Regs, int_nvvm_rcp_rp_d>;
1106 def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1107   Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1108 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1109   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1112 // Sqrt
1115 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1116   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1117 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1118   Float32Regs, int_nvvm_sqrt_rn_f>;
1119 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1120   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1121 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1122   Float32Regs, int_nvvm_sqrt_rz_f>;
1123 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1124   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1125 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1126   Float32Regs, int_nvvm_sqrt_rm_f>;
1127 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1128   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1129 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1130   Float32Regs, int_nvvm_sqrt_rp_f>;
1131 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1132   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1133 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1134   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1136 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1137   Float64Regs, int_nvvm_sqrt_rn_d>;
1138 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1139   Float64Regs, int_nvvm_sqrt_rz_d>;
1140 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1141   Float64Regs, int_nvvm_sqrt_rm_d>;
1142 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1143   Float64Regs, int_nvvm_sqrt_rp_d>;
1145 // nvvm_sqrt intrinsic
1146 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1147           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1148 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1149           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1150 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1151           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1152 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1153           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1156 // Rsqrt
1159 def INT_NVVM_RSQRT_APPROX_FTZ_F
1160   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1161     int_nvvm_rsqrt_approx_ftz_f>;
1162 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1163   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1164 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1165   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1168 // Add
1171 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1172   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1173 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1174   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1175 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1176   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1177 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1178   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1179 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1180   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1181 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1182   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1183 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1184   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1185 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1186   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1188 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1189   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1190 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1191   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1192 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1193   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1194 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1195   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1198 // Convert
1201 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1202           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1203 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1204           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1205 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1206           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1207 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1208           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1209 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1210           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1211 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1212           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1213 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1214           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1215 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1216           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1218 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1219           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1220 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1221           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1222 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1223           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1224 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1225           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1227 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1228           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1229 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1230           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1231 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1232           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1233 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1234           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1236 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1237           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1238 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1239           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1240 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1241           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1242 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1243           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1245 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1246           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1247 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1248           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1249 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1250           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1251 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1252           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1254 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1255           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1256 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1257           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1258 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1259           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1260 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1261           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1262 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1263           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1264 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1265           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1266 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1267           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1268 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1269           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1271 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1272           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1273 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1274           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1275 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1276           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1277 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1278           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1279 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1280           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1281 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1282           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1283 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1284           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1285 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1286           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1288 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1289           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1290 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1291           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1292 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1293           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1294 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1295           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1297 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1298           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1299 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1300           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1301 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1302           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1303 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1304           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1306 def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1307           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1308 def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1309           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1310 def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1311           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1312 def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1313           (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1315 def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1316           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1317 def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1318           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1319 def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1320           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1321 def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1322           (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1324 def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1325           (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1326 def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1327           (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1328 def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1329           (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1330 def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1331           (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1333 def CVT_tf32_f32 :
1334    NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1335                    "cvt.rna.tf32.f32 \t$dest, $a;",
1336        [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1338 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1339   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1341 def INT_NVVM_D2I_LO : F_MATH_1<
1342   !strconcat("{{\n\t",
1343              ".reg .b32 %temp; \n\t",
1344              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1345              "}}"),
1346   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1347 def INT_NVVM_D2I_HI : F_MATH_1<
1348   !strconcat("{{\n\t",
1349              ".reg .b32 %temp; \n\t",
1350              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1351              "}}"),
1352   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1354 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1355           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1356 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1357           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1358 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1359           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1360 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1361           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1362 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1363           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1364 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1365           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1366 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1367           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1368 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1369           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1371 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1372           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1373 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1374           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1375 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1376           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1377 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1378           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1379 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1380           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1381 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1382           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1383 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1384           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1385 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1386           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1388 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1389           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1390 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1391           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1392 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1393           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1394 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1395           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1397 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1398           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1399 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1400           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1401 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1402           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1403 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1404           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1406 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1407           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1408 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1409           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1410 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1411           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1412 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1413           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1415 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1416           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1417 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1418           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1419 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1420           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1421 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1422           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1424 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1425           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1426 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1427           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1428 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1429           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1430 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1431           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1433 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1434           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1435 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1436           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1437 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1438           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1439 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1440           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1443 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1444           (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1445 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1446           (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1449 // Bitcast
1452 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1453   Float32Regs, int_nvvm_bitcast_f2i>;
1454 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1455   Int32Regs, int_nvvm_bitcast_i2f>;
1457 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1458   Int64Regs, int_nvvm_bitcast_ll2d>;
1459 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1460   Float64Regs, int_nvvm_bitcast_d2ll>;
1463 // FNS
1466 class INT_FNS_MBO<dag ins, dag Operands>
1467   : NVPTXInst<(outs Int32Regs:$dst), ins,
1468                "fns.b32 \t$dst, $mask, $base, $offset;",
1469                [(set Int32Regs:$dst, Operands )]>,
1470     Requires<[hasPTX<60>, hasSM<30>]>;
1472 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1473                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1474 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1475                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1476 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1477                      (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1478 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1479                      (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1480 def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1481                      (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1482 def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1483                      (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1484 def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1485                      (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1486 def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1487                      (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1489 //-----------------------------------
1490 // Atomic Functions
1491 //-----------------------------------
1493 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1494  : PatFrag<ops, frag, AS_match.global>;
1495 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1496  : PatFrag<ops, frag, AS_match.shared>;
1497 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1498  : PatFrag<ops, frag, AS_match.generic>;
1500 multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1501   ValueType regT, NVPTXRegClass regclass,
1502   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1503   Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1504   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1505     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1506     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1507   Requires<Pred>;
1508   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1509     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1510     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1511   Requires<Pred>;
1513 multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1514   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1515   list<Predicate> Pred = []> {
1516   defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1517     IntOp, IMMType, IMM, Pred>;
1518   defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1519     IntOp, IMMType, IMM, Pred>;
1522 // has 2 operands, neg the second one
1523 multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1524   ValueType regT, NVPTXRegClass regclass,
1525   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1526   list<Predicate> Pred> {
1527   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1528     !strconcat(
1529       "{{ \n\t",
1530       ".reg \t.s", TypeStr, " temp; \n\t",
1531       "neg.s", TypeStr, " \ttemp, $b; \n\t",
1532       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1533       "}}"),
1534     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1535   Requires<Pred>;
1537 multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1538   string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1539  defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1540    IntOp, Pred> ;
1541  defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1542    IntOp, Pred> ;
1545 // has 3 operands
1546 multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1547   ValueType regT, NVPTXRegClass regclass,
1548   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1549   Operand IMMType, list<Predicate> Pred> {
1550   def reg : NVPTXInst<(outs regclass:$dst),
1551     (ins ptrclass:$addr, regclass:$b, regclass:$c),
1552     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1553     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1554   Requires<Pred>;
1556   def imm1 : NVPTXInst<(outs regclass:$dst),
1557     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1558     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1559     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1560   Requires<Pred>;
1562   def imm2 : NVPTXInst<(outs regclass:$dst),
1563     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1564     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1565     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1566   Requires<Pred>;
1568   def imm3 : NVPTXInst<(outs regclass:$dst),
1569     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1570     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1571     [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1572   Requires<Pred>;
1574 multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1575   string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1576   defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1577     IntOp, IMMType, Pred>;
1578   defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1579     IntOp, IMMType, Pred>;
1582 // atom_add
1584 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1585   (atomic_load_add_32 node:$a, node:$b)>;
1586 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1587   (atomic_load_add_32 node:$a, node:$b)>;
1588 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1589   (atomic_load_add_32 node:$a, node:$b)>;
1590 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1591   (atomic_load_add_64 node:$a, node:$b)>;
1592 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1593   (atomic_load_add_64 node:$a, node:$b)>;
1594 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1595   (atomic_load_add_64 node:$a, node:$b)>;
1596 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1597   (atomic_load_fadd node:$a, node:$b)>;
1598 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1599   (atomic_load_fadd node:$a, node:$b)>;
1600 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1601   (atomic_load_fadd node:$a, node:$b)>;
1603 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1604   atomic_load_add_32_g, i32imm, imm>;
1605 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1606   atomic_load_add_32_s, i32imm, imm>;
1607 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1608   atomic_load_add_32_gen, i32imm, imm>;
1609 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1610   ".add", atomic_load_add_32_gen, i32imm, imm>;
1612 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1613   atomic_load_add_64_g, i64imm, imm>;
1614 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1615   atomic_load_add_64_s, i64imm, imm>;
1616 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1617   atomic_load_add_64_gen, i64imm, imm>;
1618 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1619   ".add", atomic_load_add_64_gen, i64imm, imm>;
1621 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1622   atomic_load_add_g, f32imm, fpimm>;
1623 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1624   atomic_load_add_s, f32imm, fpimm>;
1625 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1626   atomic_load_add_gen, f32imm, fpimm>;
1628 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1629   atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1630 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1631   atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1632 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1633   atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1635 // atom_sub
1637 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1638   (atomic_load_sub_32 node:$a, node:$b)>;
1639 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1640   (atomic_load_sub_32 node:$a, node:$b)>;
1641 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1642   (atomic_load_sub_32 node:$a, node:$b)>;
1643 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1644   (atomic_load_sub_64 node:$a, node:$b)>;
1645 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1646   (atomic_load_sub_64 node:$a, node:$b)>;
1647 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1648   (atomic_load_sub_64 node:$a, node:$b)>;
1650 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1651   atomic_load_sub_32_g>;
1652 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1653   atomic_load_sub_64_g>;
1654 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1655   atomic_load_sub_32_gen>;
1656 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1657   ".add", atomic_load_sub_32_gen>;
1658 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1659   atomic_load_sub_32_s>;
1660 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1661   atomic_load_sub_64_s>;
1662 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1663   atomic_load_sub_64_gen>;
1664 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1665   ".add", atomic_load_sub_64_gen>;
1667 // atom_swap
1669 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1670   (atomic_swap_32 node:$a, node:$b)>;
1671 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1672   (atomic_swap_32 node:$a, node:$b)>;
1673 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1674   (atomic_swap_32 node:$a, node:$b)>;
1675 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1676   (atomic_swap_64 node:$a, node:$b)>;
1677 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1678   (atomic_swap_64 node:$a, node:$b)>;
1679 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1680   (atomic_swap_64 node:$a, node:$b)>;
1682 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1683   atomic_swap_32_g, i32imm, imm>;
1684 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1685   atomic_swap_32_s, i32imm, imm>;
1686 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1687   atomic_swap_32_gen, i32imm, imm>;
1688 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1689   ".exch", atomic_swap_32_gen, i32imm, imm>;
1690 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1691   atomic_swap_64_g, i64imm, imm>;
1692 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1693   atomic_swap_64_s, i64imm, imm>;
1694 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1695   atomic_swap_64_gen, i64imm, imm>;
1696 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1697   ".exch", atomic_swap_64_gen, i64imm, imm>;
1699 // atom_max
1701 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1702   , (atomic_load_max_32 node:$a, node:$b)>;
1703 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1704   (atomic_load_max_32 node:$a, node:$b)>;
1705 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1706   (atomic_load_max_32 node:$a, node:$b)>;
1707 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1708   , (atomic_load_max_64 node:$a, node:$b)>;
1709 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1710   (atomic_load_max_64 node:$a, node:$b)>;
1711 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1712   (atomic_load_max_64 node:$a, node:$b)>;
1713 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1714   (atomic_load_umax_32 node:$a, node:$b)>;
1715 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1716   (atomic_load_umax_32 node:$a, node:$b)>;
1717 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1718   (atomic_load_umax_32 node:$a, node:$b)>;
1719 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1720   (atomic_load_umax_64 node:$a, node:$b)>;
1721 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1722   (atomic_load_umax_64 node:$a, node:$b)>;
1723 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1724   (atomic_load_umax_64 node:$a, node:$b)>;
1726 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1727   ".max", atomic_load_max_32_g, i32imm, imm>;
1728 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1729   ".max", atomic_load_max_32_s, i32imm, imm>;
1730 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1731   atomic_load_max_32_gen, i32imm, imm>;
1732 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1733   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1734 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1735   ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
1736 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1737   ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
1738 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1739   atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1740 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1741   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1742 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1743   ".max", atomic_load_umax_32_g, i32imm, imm>;
1744 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1745   ".max", atomic_load_umax_32_s, i32imm, imm>;
1746 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1747   atomic_load_umax_32_gen, i32imm, imm>;
1748 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1749   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1750 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1751   ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
1752 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1753   ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
1754 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1755   atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1756 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1757   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1759 // atom_min
1761 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1762   (atomic_load_min_32 node:$a, node:$b)>;
1763 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1764   (atomic_load_min_32 node:$a, node:$b)>;
1765 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1766   (atomic_load_min_32 node:$a, node:$b)>;
1767 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1768   (atomic_load_min_64 node:$a, node:$b)>;
1769 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1770   (atomic_load_min_64 node:$a, node:$b)>;
1771 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1772   (atomic_load_min_64 node:$a, node:$b)>;
1773 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1774   (atomic_load_umin_32 node:$a, node:$b)>;
1775 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1776   (atomic_load_umin_32 node:$a, node:$b)>;
1777 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1778   (atomic_load_umin_32 node:$a, node:$b)>;
1779 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1780   (atomic_load_umin_64 node:$a, node:$b)>;
1781 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1782   (atomic_load_umin_64 node:$a, node:$b)>;
1783 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1784   (atomic_load_umin_64 node:$a, node:$b)>;
1786 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1787   ".min", atomic_load_min_32_g, i32imm, imm>;
1788 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1789   ".min", atomic_load_min_32_s, i32imm, imm>;
1790 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1791   atomic_load_min_32_gen, i32imm, imm>;
1792 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1793   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1794 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1795   ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
1796 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1797   ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
1798 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1799   atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1800 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1801   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1802 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1803   ".min", atomic_load_umin_32_g, i32imm, imm>;
1804 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1805   ".min", atomic_load_umin_32_s, i32imm, imm>;
1806 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1807   atomic_load_umin_32_gen, i32imm, imm>;
1808 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1809   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1810 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1811   ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
1812 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1813   ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
1814 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1815   atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1816 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1817   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1819 // atom_inc  atom_dec
1821 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1822   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1823 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1824   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1825 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1826   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1827 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1828   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1829 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1830   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1831 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1832   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1834 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1835   atomic_load_inc_32_g, i32imm, imm>;
1836 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1837   atomic_load_inc_32_s, i32imm, imm>;
1838 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1839   atomic_load_inc_32_gen, i32imm, imm>;
1840 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1841   ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1842 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1843   atomic_load_dec_32_g, i32imm, imm>;
1844 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1845   atomic_load_dec_32_s, i32imm, imm>;
1846 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1847   atomic_load_dec_32_gen, i32imm, imm>;
1848 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1849   ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1851 // atom_and
1853 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1854   (atomic_load_and_32 node:$a, node:$b)>;
1855 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1856   (atomic_load_and_32 node:$a, node:$b)>;
1857 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1858   (atomic_load_and_32 node:$a, node:$b)>;
1859 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1860   (atomic_load_and_64 node:$a, node:$b)>;
1861 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1862   (atomic_load_and_64 node:$a, node:$b)>;
1863 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1864   (atomic_load_and_64 node:$a, node:$b)>;
1866 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1867   atomic_load_and_32_g, i32imm, imm>;
1868 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1869   atomic_load_and_32_s, i32imm, imm>;
1870 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1871   atomic_load_and_32_gen, i32imm, imm>;
1872 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1873   ".and", atomic_load_and_32_gen, i32imm, imm>;
1874 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1875   atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
1876 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1877   atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
1878 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1879   atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1880 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1881   ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1883 // atom_or
1885 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1886   (atomic_load_or_32 node:$a, node:$b)>;
1887 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1888   (atomic_load_or_32 node:$a, node:$b)>;
1889 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1890   (atomic_load_or_32 node:$a, node:$b)>;
1891 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1892   (atomic_load_or_64 node:$a, node:$b)>;
1893 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1894   (atomic_load_or_64 node:$a, node:$b)>;
1895 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1896   (atomic_load_or_64 node:$a, node:$b)>;
1898 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1899   atomic_load_or_32_g, i32imm, imm>;
1900 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1901   atomic_load_or_32_gen, i32imm, imm>;
1902 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1903   ".or", atomic_load_or_32_gen, i32imm, imm>;
1904 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1905   atomic_load_or_32_s, i32imm, imm>;
1906 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1907   atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
1908 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1909   atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1910 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1911   ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1912 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1913   atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
1915 // atom_xor
1917 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1918   (atomic_load_xor_32 node:$a, node:$b)>;
1919 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1920   (atomic_load_xor_32 node:$a, node:$b)>;
1921 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1922   (atomic_load_xor_32 node:$a, node:$b)>;
1923 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1924   (atomic_load_xor_64 node:$a, node:$b)>;
1925 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1926   (atomic_load_xor_64 node:$a, node:$b)>;
1927 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1928   (atomic_load_xor_64 node:$a, node:$b)>;
1930 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1931   atomic_load_xor_32_g, i32imm, imm>;
1932 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1933   atomic_load_xor_32_s, i32imm, imm>;
1934 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1935   atomic_load_xor_32_gen, i32imm, imm>;
1936 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1937   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1938 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1939   atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
1940 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1941   atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
1942 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1943   atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1944 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1945   ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1947 // atom_cas
1949 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1950   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1951 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1952   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1953 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1954   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1955 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1956   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1957 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1958   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1959 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1960   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1962 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
1963   atomic_cmp_swap_32_g, i32imm>;
1964 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
1965   atomic_cmp_swap_32_s, i32imm>;
1966 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
1967   atomic_cmp_swap_32_gen, i32imm>;
1968 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
1969   ".cas", atomic_cmp_swap_32_gen, i32imm>;
1970 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
1971   atomic_cmp_swap_64_g, i64imm>;
1972 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
1973   atomic_cmp_swap_64_s, i64imm>;
1974 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
1975   atomic_cmp_swap_64_gen, i64imm>;
1976 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
1977   ".cas", atomic_cmp_swap_64_gen, i64imm>;
1979 // Support for scoped atomic operations.  Matches
1980 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1981 // and converts it into the appropriate instruction.
1982 // NOTE: not all possible combinations are implemented
1983 //  'space' is limited to generic as it's the only one needed to support CUDA.
1984 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1985 class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
1986                   dag ins, dag Operands>
1987       : NVPTXInst<(outs regclass:$result), ins,
1988                   AsmStr,
1989                   [(set (regT regclass:$result), Operands)]>,
1990         Requires<Preds>;
1992 // Define instruction variants for all addressing modes.
1993 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1994                        ValueType regT, NVPTXRegClass regclass, Operand ImmType,
1995                        SDNode Imm, ValueType ImmTy,
1996                        list<Predicate> Preds> {
1997   let AddedComplexity = 1 in {
1998     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1999                       (ins Int32Regs:$src, regclass:$b),
2000                       (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
2001     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2002                       (ins Int64Regs:$src, regclass:$b),
2003                       (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
2004   }
2005   // tablegen can't infer argument types from Intrinsic (though it can
2006   // from Instruction) so we have to enforce specific type on
2007   // immediates via explicit cast to ImmTy.
2008   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2009                     (ins Int32Regs:$src, ImmType:$b),
2010                     (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
2011   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2012                     (ins Int64Regs:$src, ImmType:$b),
2013                     (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
2016 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
2017                        ValueType regT, NVPTXRegClass regclass,
2018                        Operand ImmType, SDNode Imm, ValueType ImmTy,
2019                        list<Predicate> Preds> {
2020   // Variants for register/immediate permutations of $b and $c
2021   let AddedComplexity = 2 in {
2022     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2023                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
2024                       (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2025     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2026                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
2027                       (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2028   }
2029   let AddedComplexity = 1 in {
2030     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2031                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
2032                       (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2033     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2034                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
2035                       (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2036     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2037                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2038                       (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2039     def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2040                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2041                       (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2042   }
2043   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2044                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2045                     (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2046   def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2047                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2048                     (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2051 // Constructs intrinsic name and instruction asm strings.
2052 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2053                        string ScopeStr, string SpaceStr,
2054                        ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2055                        ValueType ImmTy, list<Predicate> Preds> {
2056   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2057                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2058                             # "." # OpStr # "." # TypeStr
2059                             # " \t$result, [$src], $b;",
2060                      !cast<Intrinsic>(
2061                             "int_nvvm_atomic_" # OpStr
2062                             # "_" # SpaceStr # "_" # IntTypeStr
2063                             # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2064                      regT, regclass, ImmType, Imm, ImmTy, Preds>;
2066 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2067                        string ScopeStr, string SpaceStr,
2068                        ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2069                        ValueType ImmTy, list<Predicate> Preds> {
2070   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2071                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2072                             # "." # OpStr # "." # TypeStr
2073                             # " \t$result, [$src], $b, $c;",
2074                      !cast<Intrinsic>(
2075                             "int_nvvm_atomic_" # OpStr
2076                             # "_" # SpaceStr # "_" # IntTypeStr
2077                             # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2078                      regT, regclass, ImmType, Imm, ImmTy, Preds>;
2081 // Constructs variants for different address spaces.
2082 // For now we only need variants for generic space pointers.
2083 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2084                        string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2085                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2086    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2087                             regT, regclass, ImmType, Imm, ImmTy, Preds>;
2089 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2090                        string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2091                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2092    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2093                             regT, regclass, ImmType, Imm, ImmTy, Preds>;
2096 // Constructs variants for different scopes of atomic op.
2097 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2098                        ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2099                        ValueType ImmTy, list<Predicate> Preds> {
2100    // .gpu scope is default and is currently covered by existing
2101    // atomics w/o explicitly specified scope.
2102    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2103                            regT, regclass, ImmType, Imm, ImmTy,
2104                            !listconcat(Preds,[hasAtomScope])>;
2105    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2106                            regT, regclass, ImmType, Imm, ImmTy,
2107                            !listconcat(Preds,[hasAtomScope])>;
2109 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2110            ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2111            list<Predicate> Preds> {
2112    // No need to define ".gpu"-scoped atomics.  They do the same thing
2113    // as the regular, non-scoped atomics defined elsewhere.
2114    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2115                            regT, regclass, ImmType, Imm, ImmTy,
2116                            !listconcat(Preds,[hasAtomScope])>;
2117    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2118                            regT, regclass, ImmType, Imm, ImmTy,
2119                            !listconcat(Preds,[hasAtomScope])>;
2122 // atom.add
2123 multiclass ATOM2_add_impl<string OpStr> {
2124    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2125    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2126    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2127    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2128                             []>;
2129    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2130                             [hasAtomAddF64]>;
2133 // atom.{and,or,xor}
2134 multiclass ATOM2_bitwise_impl<string OpStr> {
2135    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2136    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2137                             [hasAtomBitwise64]>;
2140 // atom.exch
2141 multiclass ATOM2_exch_impl<string OpStr> {
2142    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2143    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2146 // atom.{min,max}
2147 multiclass ATOM2_minmax_impl<string OpStr> {
2148    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2149    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2150    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2151                             [hasAtomMinMax64]>;
2152    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2153                             [hasAtomMinMax64]>;
2156 // atom.{inc,dec}
2157 multiclass ATOM2_incdec_impl<string OpStr> {
2158    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2161 // atom.cas
2162 multiclass ATOM3_cas_impl<string OpStr> {
2163    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2164    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2167 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2168 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2169 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2170 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2171 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2172 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2173 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2174 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2175 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
2176 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2178 //-----------------------------------
2179 // Support for ldu on sm_20 or later
2180 //-----------------------------------
2182 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
2183 // read-only in a kernel.
2185 // Scalar
2187 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2188   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2189                !strconcat("ldu.global.", TyStr),
2190                       []>, Requires<[hasLDU]>;
2191   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2192                !strconcat("ldu.global.", TyStr),
2193                         []>, Requires<[hasLDU]>;
2194  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2195                !strconcat("ldu.global.", TyStr),
2196                       []>, Requires<[hasLDU]>;
2197  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2198                !strconcat("ldu.global.", TyStr),
2199                       []>, Requires<[hasLDU]>;
2200  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2201                !strconcat("ldu.global.", TyStr),
2202                         []>, Requires<[hasLDU]>;
2205 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2206 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2207 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2208 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2209 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2210 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2212 // vector
2214 // Elementized vector ldu
2215 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2216  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2217                      (ins Int32Regs:$src),
2218                      !strconcat("ldu.global.", TyStr), []>;
2219  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2220                      (ins Int64Regs:$src),
2221                      !strconcat("ldu.global.", TyStr), []>;
2222  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2223                      (ins MEMri:$src),
2224                      !strconcat("ldu.global.", TyStr), []>;
2225  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2226                      (ins MEMri64:$src),
2227                      !strconcat("ldu.global.", TyStr), []>;
2228  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2229                      (ins imemAny:$src),
2230                      !strconcat("ldu.global.", TyStr), []>;
2233 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2234  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2235                             regclass:$dst4), (ins Int32Regs:$src),
2236                !strconcat("ldu.global.", TyStr), []>;
2237  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2238                             regclass:$dst4), (ins Int64Regs:$src),
2239                !strconcat("ldu.global.", TyStr), []>;
2240  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2241                             regclass:$dst4), (ins MEMri:$src),
2242                !strconcat("ldu.global.", TyStr), []>;
2243  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2244                             regclass:$dst4), (ins MEMri64:$src),
2245                !strconcat("ldu.global.", TyStr), []>;
2246  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2247                             regclass:$dst4), (ins imemAny:$src),
2248                !strconcat("ldu.global.", TyStr), []>;
2251 defm INT_PTX_LDU_G_v2i8_ELE
2252   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2253 defm INT_PTX_LDU_G_v2i16_ELE
2254   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2255 defm INT_PTX_LDU_G_v2i32_ELE
2256   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2257 defm INT_PTX_LDU_G_v2f32_ELE
2258   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2259 defm INT_PTX_LDU_G_v2i64_ELE
2260   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2261 defm INT_PTX_LDU_G_v2f64_ELE
2262   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2263 defm INT_PTX_LDU_G_v4i8_ELE
2264   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2265 defm INT_PTX_LDU_G_v4i16_ELE
2266   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2267     Int16Regs>;
2268 defm INT_PTX_LDU_G_v4i32_ELE
2269   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2270     Int32Regs>;
2271 defm INT_PTX_LDU_G_v4f16_ELE
2272   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2273     Int16Regs>;
2274 defm INT_PTX_LDU_G_v4f16x2_ELE
2275   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2276     Int32Regs>;
2277 defm INT_PTX_LDU_G_v4f32_ELE
2278   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2279     Float32Regs>;
2282 //-----------------------------------
2283 // Support for ldg on sm_35 or later
2284 //-----------------------------------
2286 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
2287 // non-coherent texture cache, and therefore the values read must be read-only
2288 // during the lifetime of the kernel.
2290 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2291   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2292                !strconcat("ld.global.nc.", TyStr),
2293                       []>, Requires<[hasLDG]>;
2294   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2295                !strconcat("ld.global.nc.", TyStr),
2296                         []>, Requires<[hasLDG]>;
2297  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2298                !strconcat("ld.global.nc.", TyStr),
2299                       []>, Requires<[hasLDG]>;
2300  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2301                !strconcat("ld.global.nc.", TyStr),
2302                       []>, Requires<[hasLDG]>;
2303  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2304                !strconcat("ld.global.nc.", TyStr),
2305                         []>, Requires<[hasLDG]>;
2308 defm INT_PTX_LDG_GLOBAL_i8
2309   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2310 defm INT_PTX_LDG_GLOBAL_i16
2311   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2312 defm INT_PTX_LDG_GLOBAL_i32
2313   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2314 defm INT_PTX_LDG_GLOBAL_i64
2315   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2316 defm INT_PTX_LDG_GLOBAL_f32
2317   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2318 defm INT_PTX_LDG_GLOBAL_f64
2319   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2321 // vector
2323 // Elementized vector ldg
2324 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2325  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2326                      (ins Int32Regs:$src),
2327                      !strconcat("ld.global.nc.", TyStr), []>;
2328  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2329                      (ins Int64Regs:$src),
2330                      !strconcat("ld.global.nc.", TyStr), []>;
2331  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2332                      (ins MEMri:$src),
2333                      !strconcat("ld.global.nc.", TyStr), []>;
2334  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2335                      (ins MEMri64:$src),
2336                      !strconcat("ld.global.nc.", TyStr), []>;
2337  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2338                      (ins imemAny:$src),
2339                      !strconcat("ld.global.nc.", TyStr), []>;
2342 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2343   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2344                               regclass:$dst4), (ins Int32Regs:$src),
2345                !strconcat("ld.global.nc.", TyStr), []>;
2346   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2347                                regclass:$dst4), (ins Int64Regs:$src),
2348                !strconcat("ld.global.nc.", TyStr), []>;
2349   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2350                               regclass:$dst4), (ins MEMri:$src),
2351                !strconcat("ld.global.nc.", TyStr), []>;
2352   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2353                               regclass:$dst4), (ins MEMri64:$src),
2354                !strconcat("ld.global.nc.", TyStr), []>;
2355   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2356                              regclass:$dst4), (ins imemAny:$src),
2357                !strconcat("ld.global.nc.", TyStr), []>;
2360 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2361 defm INT_PTX_LDG_G_v2i8_ELE
2362   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2363 defm INT_PTX_LDG_G_v2i16_ELE
2364   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2365 defm INT_PTX_LDG_G_v2i32_ELE
2366   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2367 defm INT_PTX_LDG_G_v2f32_ELE
2368   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2369 defm INT_PTX_LDG_G_v2i64_ELE
2370   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2371 defm INT_PTX_LDG_G_v2f64_ELE
2372   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2373 defm INT_PTX_LDG_G_v4i8_ELE
2374   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2375 defm INT_PTX_LDG_G_v4i16_ELE
2376   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2377 defm INT_PTX_LDG_G_v4i32_ELE
2378   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2379 defm INT_PTX_LDG_G_v4f32_ELE
2380   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2383 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2384    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2385           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2386       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2387    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2388           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2389       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2390    def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2391           "{{ .reg .b64 %tmp;\n\t"
2392           #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2393           #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2394       [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2395       Requires<[useShortPtr]>;
2398 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2399    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2400           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2401       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2402    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2403           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2404       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2405    def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2406           "{{ .reg .b64 %tmp;\n\t"
2407           #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2408           #"  cvt.u32.u64 \t$result, %tmp; }}",
2409       [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2410       Requires<[useShortPtr]>;
2413 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2414 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2415 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2416 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2418 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2419 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2420 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2421 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2424 // nvvm.ptr.gen.to.param
2425 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2426   (ins Int32Regs:$src),
2427                         "mov.u32 \t$result, $src;",
2428                               [(set Int32Regs:$result,
2429                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2430 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2431   (ins Int64Regs:$src),
2432                         "mov.u64 \t$result, $src;",
2433                               [(set Int64Regs:$result,
2434                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2437 // nvvm.move intrinsicc
2438 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2439                              "mov.b16 \t$r, $s;",
2440                              [(set Int16Regs:$r,
2441                                (int_nvvm_move_i16 Int16Regs:$s))]>;
2442 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2443                              "mov.b32 \t$r, $s;",
2444                              [(set Int32Regs:$r,
2445                                (int_nvvm_move_i32 Int32Regs:$s))]>;
2446 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2447                              "mov.b64 \t$r, $s;",
2448                              [(set Int64Regs:$r,
2449                                (int_nvvm_move_i64 Int64Regs:$s))]>;
2450 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2451                              "mov.f32 \t$r, $s;",
2452                              [(set Float32Regs:$r,
2453                                (int_nvvm_move_float Float32Regs:$s))]>;
2454 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2455                              "mov.f64 \t$r, $s;",
2456                              [(set Float64Regs:$r,
2457                                (int_nvvm_move_double Float64Regs:$s))]>;
2458 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2459                              "mov.u32 \t$r, $s;",
2460                              [(set Int32Regs:$r,
2461                                (int_nvvm_move_ptr Int32Regs:$s))]>;
2462 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2463                              "mov.u64 \t$r, $s;",
2464                              [(set Int64Regs:$r,
2465                                (int_nvvm_move_ptr Int64Regs:$s))]>;
2467 // @TODO: Are these actually needed, or will we always just see symbols
2468 // copied to registers first?
2469 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2470                              "mov.u32 \t$r, $s;",
2471                              [(set Int32Regs:$r,
2472                              (int_nvvm_move_ptr texternalsym:$s))]>;
2473 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2474                              "mov.u64 \t$r, $s;",
2475                              [(set Int64Regs:$r,
2476                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
2479 // MoveParam        %r1, param
2480 // ptr_local_to_gen %r2, %r1
2481 // ptr_gen_to_local %r3, %r2
2482 // ->
2483 // mov %r1, param
2485 // @TODO: Revisit this.  There is a type
2486 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2487 // instructions are not currently defined. However, we can use the ptr
2488 // variants and the asm printer will do the right thing.
2489 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2490                 (MoveParam texternalsym:$src)))),
2491                (nvvm_move_ptr64  texternalsym:$src)>;
2492 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2493                 (MoveParam texternalsym:$src)))),
2494                (nvvm_move_ptr32  texternalsym:$src)>;
2496 def texsurf_handles
2497   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2498               "mov.u64 \t$result, $src;", []>;
2500 //-----------------------------------
2501 // Compiler Error Warn
2502 // - Just ignore them in codegen
2503 //-----------------------------------
2505 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2506                 "// llvm.nvvm.compiler.warn()",
2507                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2508 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2509                 "// llvm.nvvm.compiler.warn()",
2510                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2511 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2512                 "// llvm.nvvm.compiler.error()",
2513                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2514 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2515                 "// llvm.nvvm.compiler.error()",
2516                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2519 // isspacep
2521 multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2522   def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2523               "isspacep." # suffix # "\t$d, $a;",
2524               [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2525     Requires<Preds>;
2526   def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2527               "isspacep." # suffix # "\t$d, $a;",
2528               [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2529     Requires<Preds>;
2532 defm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2533 defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2534 defm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
2535 defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2536 defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2537                                        int_nvvm_isspacep_shared_cluster,
2538                                        [hasPTX<78>, hasSM<90>]>;
2540 // Special register reads
2541 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2542                             (ins SpecialRegs:$r),
2543                             "mov.b32 \t$d, $r;", []>;
2545 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2546 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2547 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2548 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2549 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2550 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2551 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2552 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2553 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2554 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2555 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2556 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2557 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2558 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2559 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2560 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2561 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2562 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2563 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2564 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2565 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2566 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2567 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2568 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2569 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2570 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2571 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2572 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2573 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2574 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2575 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2576 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2579 // rotate builtin support
2581 def ROTATE_B32_HW_IMM
2582   : NVPTXInst<(outs Int32Regs:$dst),
2583               (ins  Int32Regs:$src, i32imm:$amt),
2584               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2585               [(set Int32Regs:$dst,
2586                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2587               Requires<[hasHWROT32]> ;
2589 def ROTATE_B32_HW_REG
2590   : NVPTXInst<(outs Int32Regs:$dst),
2591               (ins  Int32Regs:$src, Int32Regs:$amt),
2592               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2593               [(set Int32Regs:$dst,
2594                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2595               Requires<[hasHWROT32]> ;
2597 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2598           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2599       Requires<[noHWROT32]> ;
2601 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2602           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2603       Requires<[noHWROT32]> ;
2605 let hasSideEffects = false in {
2606   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2607     !strconcat("{{\n\t",
2608                ".reg .b32 %dummy;\n\t",
2609                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2610                "}}"),
2611           []> ;
2613   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2614     !strconcat("{{\n\t",
2615                ".reg .b32 %dummy;\n\t",
2616                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2617                "}}"),
2618           []> ;
2621 let hasSideEffects = false in {
2622   def PACK_TWO_INT32
2623     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2624                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2627 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2628           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2629                           (GET_LO_INT64 Int64Regs:$src))> ;
2631 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2632 // no side effects.
2633 let hasSideEffects = false in {
2634   def SHF_L_WRAP_B32_IMM
2635     : NVPTXInst<(outs Int32Regs:$dst),
2636                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2637                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2638       Requires<[hasHWROT32]>;
2640   def SHF_L_WRAP_B32_REG
2641     : NVPTXInst<(outs Int32Regs:$dst),
2642                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2643                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2644       Requires<[hasHWROT32]>;
2646   def SHF_R_WRAP_B32_IMM
2647     : NVPTXInst<(outs Int32Regs:$dst),
2648                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2649                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2650       Requires<[hasHWROT32]>;
2652   def SHF_R_WRAP_B32_REG
2653     : NVPTXInst<(outs Int32Regs:$dst),
2654                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2655                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2656       Requires<[hasHWROT32]>;
2659 // HW version of rotate 64
2660 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2661           (PACK_TWO_INT32
2662             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2663                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2664             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2665                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2666       Requires<[hasHWROT32]>;
2668 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2669           (PACK_TWO_INT32
2670             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2671                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2672             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2673                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2674       Requires<[hasHWROT32]>;
2677 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2678           (PACK_TWO_INT32
2679             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2680                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2681             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2682                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2683       Requires<[hasHWROT32]>;
2685 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2686           (PACK_TWO_INT32
2687             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2688                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2689             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2690                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2691       Requires<[hasHWROT32]>;
2693 // SW version of rotate 64
2694 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2695           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2696       Requires<[noHWROT32]>;
2697 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2698           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2699       Requires<[noHWROT32]>;
2700 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2701           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2702       Requires<[noHWROT32]>;
2703 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2704           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2705       Requires<[noHWROT32]>;
2708 //-----------------------------------
2709 // Texture Intrinsics
2710 //-----------------------------------
2712 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2713 // also defined in NVPTXReplaceImageHandles.cpp
2715 // texmode_independent
2716 let IsTex = true, IsTexModeUnified = false in {
2717 // Texture fetch instructions using handles
2719 class TEX_1D_base<string inst, NVPTXRegClass outtype,
2720                   NVPTXRegClass intype, dag texsamp>
2721     : NVPTXInst<(outs outtype:$r, outtype:$g,
2722                       outtype:$b, outtype:$a),
2723                  !con(texsamp, (ins intype:$x)),
2724                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2725                  []>;
2727 multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2728   def _RR : TEX_1D_base<inst, outtype, intype,
2729                         (ins Int64Regs:$t, Int64Regs:$s)>;
2730   def _RI : TEX_1D_base<inst, outtype, intype,
2731                         (ins Int64Regs:$t, i64imm:$s)>;
2732   def _IR : TEX_1D_base<inst, outtype, intype,
2733                         (ins i64imm:$t, Int64Regs:$s)>;
2734   def _II : TEX_1D_base<inst, outtype, intype,
2735                         (ins i64imm:$t, i64imm:$s)>;
2738 defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2739 defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2740 defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2741 defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2742 defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2743 defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2745 class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2746                         NVPTXRegClass intype, dag texsamp>
2747     : NVPTXInst<(outs outtype:$r, outtype:$g,
2748                       outtype:$b, outtype:$a),
2749                  !con(texsamp, (ins intype:$x, intype:$lod)),
2750                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2751                  []>;
2753 multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2754                         NVPTXRegClass intype> {
2755   def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2756                               (ins Int64Regs:$t, Int64Regs:$s)>;
2757   def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2758                               (ins Int64Regs:$t, i64imm:$s)>;
2759   def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2760                               (ins i64imm:$t, Int64Regs:$s)>;
2761   def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2762                               (ins i64imm:$t, i64imm:$s)>;
2765 defm TEX_1D_F32_F32_LEVEL :
2766   TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2767 defm TEX_1D_S32_F32_LEVEL :
2768   TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2769 defm TEX_1D_U32_F32_LEVEL :
2770   TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2772 class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2773                        NVPTXRegClass intype, dag texsamp>
2774     : NVPTXInst<(outs outtype:$r, outtype:$g,
2775                       outtype:$b, outtype:$a),
2776                  !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2777                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2778                         " \\{$gradx\\}, \\{$grady\\};",
2779                  []>;
2781 multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2782                        NVPTXRegClass intype> {
2783   def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2784                              (ins Int64Regs:$t, Int64Regs:$s)>;
2785   def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2786                              (ins Int64Regs:$t, i64imm:$s)>;
2787   def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2788                              (ins i64imm:$t, Int64Regs:$s)>;
2789   def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2790                              (ins i64imm:$t, i64imm:$s)>;
2793 defm TEX_1D_F32_F32_GRAD
2794   : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2795 defm TEX_1D_S32_F32_GRAD
2796   : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2797 defm TEX_1D_U32_F32_GRAD
2798   : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2800 class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2801                         NVPTXRegClass intype, dag texsamp>
2802     : NVPTXInst<(outs outtype:$r, outtype:$g,
2803                       outtype:$b, outtype:$a),
2804                  !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2805                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2806                  []>;
2808 multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2809                         NVPTXRegClass intype> {
2810   def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2811                               (ins Int64Regs:$t, Int64Regs:$s)>;
2812   def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2813                               (ins Int64Regs:$t, i64imm:$s)>;
2814   def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2815                               (ins i64imm:$t, Int64Regs:$s)>;
2816   def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2817                               (ins i64imm:$t, i64imm:$s)>;
2820 defm TEX_1D_ARRAY_F32_F32
2821   : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2822 defm TEX_1D_ARRAY_F32_S32
2823   : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2824 defm TEX_1D_ARRAY_S32_S32
2825   : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2826 defm TEX_1D_ARRAY_S32_F32
2827   : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2828 defm TEX_1D_ARRAY_U32_S32
2829   : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2830 defm TEX_1D_ARRAY_U32_F32
2831   : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2833 class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2834                               NVPTXRegClass intype, dag texsamp>
2835     : NVPTXInst<(outs outtype:$r, outtype:$g,
2836                       outtype:$b, outtype:$a),
2837                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2838                  inst # " \t\\{$r, $g, $b, $a\\},"
2839                         " [$t, $s, \\{$l, $x\\}], $lod;",
2840                  []>;
2842 multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2843                               NVPTXRegClass intype> {
2844   def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2845                                     (ins Int64Regs:$t, Int64Regs:$s)>;
2846   def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2847                                     (ins Int64Regs:$t, i64imm:$s)>;
2848   def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2849                                     (ins i64imm:$t, Int64Regs:$s)>;
2850   def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2851                                     (ins i64imm:$t, i64imm:$s)>;
2854 defm TEX_1D_ARRAY_F32_F32_LEVEL
2855   : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2856 defm TEX_1D_ARRAY_S32_F32_LEVEL
2857   : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2858 defm TEX_1D_ARRAY_U32_F32_LEVEL
2859   : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2861 class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2862                              NVPTXRegClass intype, dag texsamp>
2863     : NVPTXInst<(outs outtype:$r, outtype:$g,
2864                       outtype:$b, outtype:$a),
2865                  !con(texsamp, (ins Int32Regs:$l, intype:$x,
2866                                     intype:$gradx, intype:$grady)),
2867                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2868                         " \\{$gradx\\}, \\{$grady\\};",
2869                  []>;
2871 multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2872                              NVPTXRegClass intype> {
2873   def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2874                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2875   def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2876                                    (ins Int64Regs:$t, i64imm:$s)>;
2877   def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2878                                    (ins i64imm:$t, Int64Regs:$s)>;
2879   def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2880                                    (ins i64imm:$t, i64imm:$s)>;
2883 defm TEX_1D_ARRAY_F32_F32_GRAD
2884   : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2885 defm TEX_1D_ARRAY_S32_F32_GRAD
2886   : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2887 defm TEX_1D_ARRAY_U32_F32_GRAD
2888   : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2890 class TEX_2D_base<string inst, NVPTXRegClass outtype,
2891                   NVPTXRegClass intype, dag texsamp>
2892     : NVPTXInst<(outs outtype:$r, outtype:$g,
2893                       outtype:$b, outtype:$a),
2894                  !con(texsamp, (ins intype:$x, intype:$y)),
2895                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2896                  []>;
2898 multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2899   def _RR : TEX_2D_base<inst, outtype, intype,
2900                         (ins Int64Regs:$t, Int64Regs:$s)>;
2901   def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2902   def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2903   def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2906 defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2907 defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2908 defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2909 defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2910 defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2911 defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2913 class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2914                         NVPTXRegClass intype, dag texsamp>
2915     : NVPTXInst<(outs outtype:$r, outtype:$g,
2916                       outtype:$b, outtype:$a),
2917                  !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2918                  inst # " \t\\{$r, $g, $b, $a\\},"
2919                         " [$t, $s, \\{$x, $y\\}], $lod;",
2920                  []>;
2922 multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2923                         NVPTXRegClass intype> {
2924   def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2925                               (ins Int64Regs:$t, Int64Regs:$s)>;
2926   def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2927                               (ins Int64Regs:$t, i64imm:$s)>;
2928   def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2929                               (ins i64imm:$t, Int64Regs:$s)>;
2930   def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2931                               (ins i64imm:$t, i64imm:$s)>;
2934 defm TEX_2D_F32_F32_LEVEL :
2935   TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2936 defm TEX_2D_S32_F32_LEVEL :
2937   TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2938 defm TEX_2D_U32_F32_LEVEL :
2939   TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2941 class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2942                        NVPTXRegClass intype, dag texsamp>
2943     : NVPTXInst<(outs outtype:$r, outtype:$g,
2944                       outtype:$b, outtype:$a),
2945                  !con(texsamp, (ins intype:$x, intype:$y,
2946                                     intype:$gradx0, intype:$gradx1,
2947                                     intype:$grady0, intype:$grady1)),
2948                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2949                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2950                  []>;
2952 multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2953                        NVPTXRegClass intype> {
2954   def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2955                               (ins Int64Regs:$t, Int64Regs:$s)>;
2956   def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2957                               (ins Int64Regs:$t, i64imm:$s)>;
2958   def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2959                               (ins i64imm:$t, Int64Regs:$s)>;
2960   def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2961                               (ins i64imm:$t, i64imm:$s)>;
2964 defm TEX_2D_F32_F32_GRAD :
2965   TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2966 defm TEX_2D_S32_F32_GRAD :
2967   TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2968 defm TEX_2D_U32_F32_GRAD :
2969   TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2971 class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2972                         NVPTXRegClass intype, dag texsamp>
2973     : NVPTXInst<(outs outtype:$r, outtype:$g,
2974                       outtype:$b, outtype:$a),
2975                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2976                  inst # " \t\\{$r, $g, $b, $a\\},"
2977                         " [$t, $s, \\{$l, $x, $y, $y\\}];",
2978                  []>;
2980 multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2981                         NVPTXRegClass intype> {
2982   def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2983                               (ins Int64Regs:$t, Int64Regs:$s)>;
2984   def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2985                               (ins Int64Regs:$t, i64imm:$s)>;
2986   def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2987                               (ins i64imm:$t, Int64Regs:$s)>;
2988   def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2989                               (ins i64imm:$t, i64imm:$s)>;
2992 defm TEX_2D_ARRAY_F32_F32
2993   : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2994 defm TEX_2D_ARRAY_F32_S32
2995   : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2996 defm TEX_2D_ARRAY_S32_S32
2997   : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2998 defm TEX_2D_ARRAY_S32_F32
2999   : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3000 defm TEX_2D_ARRAY_U32_S32
3001   : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3002 defm TEX_2D_ARRAY_U32_F32
3003   : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3005 class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3006                               NVPTXRegClass intype, dag texsamp>
3007     : NVPTXInst<(outs outtype:$r, outtype:$g,
3008                       outtype:$b, outtype:$a),
3009                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3010                                     intype:$lod)),
3011                  inst # " \t\\{$r, $g, $b, $a\\},"
3012                         " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
3013                  []>;
3015 multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3016                               NVPTXRegClass intype> {
3017   def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3018                               (ins Int64Regs:$t, Int64Regs:$s)>;
3019   def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3020                               (ins Int64Regs:$t, i64imm:$s)>;
3021   def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3022                               (ins i64imm:$t, Int64Regs:$s)>;
3023   def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3024                               (ins i64imm:$t, i64imm:$s)>;
3027 defm TEX_2D_ARRAY_F32_F32_LEVEL
3028   : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3029 defm TEX_2D_ARRAY_S32_F32_LEVEL
3030   : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3031 defm TEX_2D_ARRAY_U32_F32_LEVEL
3032   : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3034 class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3035                              NVPTXRegClass intype, dag texsamp>
3036     : NVPTXInst<(outs outtype:$r, outtype:$g,
3037                       outtype:$b, outtype:$a),
3038                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3039                                     intype:$gradx0, intype:$gradx1,
3040                                     intype:$grady0, intype:$grady1)),
3041                  inst # " \t\\{$r, $g, $b, $a\\},"
3042                         " [$t, $s, \\{$l, $x, $y, $y\\}],"
3043                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3044                  []>;
3046 multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3047                              NVPTXRegClass intype> {
3048   def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3049                               (ins Int64Regs:$t, Int64Regs:$s)>;
3050   def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3051                               (ins Int64Regs:$t, i64imm:$s)>;
3052   def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3053                               (ins i64imm:$t, Int64Regs:$s)>;
3054   def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3055                               (ins i64imm:$t, i64imm:$s)>;
3058 defm TEX_2D_ARRAY_F32_F32_GRAD
3059   : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3060 defm TEX_2D_ARRAY_S32_F32_GRAD
3061   : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3062 defm TEX_2D_ARRAY_U32_F32_GRAD
3063   : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3065 class TEX_3D_base<string inst, NVPTXRegClass outtype,
3066                   NVPTXRegClass intype, dag texsamp>
3067     : NVPTXInst<(outs outtype:$r, outtype:$g,
3068                       outtype:$b, outtype:$a),
3069                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3070                  inst # " \t\\{$r, $g, $b, $a\\},"
3071                         " [$t, $s, \\{$x, $y, $z, $z\\}];",
3072                  []>;
3074 multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3075   def _RR : TEX_3D_base<inst, outtype, intype,
3076                               (ins Int64Regs:$t, Int64Regs:$s)>;
3077   def _RI : TEX_3D_base<inst, outtype, intype,
3078                               (ins Int64Regs:$t, i64imm:$s)>;
3079   def _IR : TEX_3D_base<inst, outtype, intype,
3080                               (ins i64imm:$t, Int64Regs:$s)>;
3081   def _II : TEX_3D_base<inst, outtype, intype,
3082                               (ins i64imm:$t, i64imm:$s)>;
3085 defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3086 defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3087 defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3088 defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3089 defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3090 defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3092 class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3093                         NVPTXRegClass intype, dag texsamp>
3094     : NVPTXInst<(outs outtype:$r, outtype:$g,
3095                       outtype:$b, outtype:$a),
3096                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3097                                     intype:$lod)),
3098                  inst # " \t\\{$r, $g, $b, $a\\},"
3099                         " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3100                  []>;
3102 multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3103                         NVPTXRegClass intype> {
3104   def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3105                               (ins Int64Regs:$t, Int64Regs:$s)>;
3106   def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3107                               (ins Int64Regs:$t, i64imm:$s)>;
3108   def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3109                               (ins i64imm:$t, Int64Regs:$s)>;
3110   def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3111                               (ins i64imm:$t, i64imm:$s)>;
3114 defm TEX_3D_F32_F32_LEVEL
3115   : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3116 defm TEX_3D_S32_F32_LEVEL
3117   : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3118 defm TEX_3D_U32_F32_LEVEL
3119   : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3121 class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3122                        NVPTXRegClass intype, dag texsamp>
3123     : NVPTXInst<(outs outtype:$r, outtype:$g,
3124                       outtype:$b, outtype:$a),
3125                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3126                                     intype :$gradx0, intype:$gradx1,
3127                                     intype:$gradx2, intype:$grady0,
3128                                     intype:$grady1, intype:$grady2)),
3129                  inst # " \t\\{$r, $g, $b, $a\\},"
3130                         " [$t, $s, \\{$x, $y, $z, $z\\}],"
3131                         " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3132                         " \\{$grady0, $grady1, $grady2, $grady2\\};",
3133                  []>;
3135 multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3136                        NVPTXRegClass intype> {
3137   def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3138                              (ins Int64Regs:$t, Int64Regs:$s)>;
3139   def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3140                              (ins Int64Regs:$t, i64imm:$s)>;
3141   def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3142                              (ins i64imm:$t, Int64Regs:$s)>;
3143   def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3144                              (ins i64imm:$t, i64imm:$s)>;
3147 defm TEX_3D_F32_F32_GRAD
3148   : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3149 defm TEX_3D_S32_F32_GRAD
3150   : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3151 defm TEX_3D_U32_F32_GRAD
3152   : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3154 class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3155                     NVPTXRegClass intype, dag texsamp>
3156     : NVPTXInst<(outs outtype:$r, outtype:$g,
3157                       outtype:$b, outtype:$a),
3158                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3159                  inst # " \t\\{$r, $g, $b, $a\\},"
3160                         " [$t, $s, \\{$x, $y, $z, $z\\}];",
3161                  []>;
3163 multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3164   def _RR : TEX_CUBE_base<inst, outtype, intype,
3165                           (ins Int64Regs:$t, Int64Regs:$s)>;
3166   def _RI : TEX_CUBE_base<inst, outtype, intype,
3167                           (ins Int64Regs:$t, i64imm:$s)>;
3168   def _IR : TEX_CUBE_base<inst, outtype, intype,
3169                           (ins i64imm:$t, Int64Regs:$s)>;
3170   def _II : TEX_CUBE_base<inst, outtype, intype,
3171                           (ins i64imm:$t, i64imm:$s)>;
3174 defm TEX_CUBE_F32_F32
3175   : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3176 defm TEX_CUBE_S32_F32
3177   : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3178 defm TEX_CUBE_U32_F32
3179   : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3181 class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3182                           NVPTXRegClass intype, dag texsamp>
3183     : NVPTXInst<(outs outtype:$r, outtype:$g,
3184                       outtype:$b, outtype:$a),
3185                  !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3186                                     intype:$lod)),
3187                  inst # " \t\\{$r, $g, $b, $a\\},"
3188                         " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3189                  []>;
3191 multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3192                           NVPTXRegClass intype> {
3193   def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3194                                 (ins Int64Regs:$t, Int64Regs:$s)>;
3195   def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3196                                 (ins Int64Regs:$t, i64imm:$s)>;
3197   def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3198                                 (ins i64imm:$t, Int64Regs:$s)>;
3199   def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3200                                 (ins i64imm:$t, i64imm:$s)>;
3203 defm TEX_CUBE_F32_F32_LEVEL
3204   : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3205 defm TEX_CUBE_S32_F32_LEVEL
3206   : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3207 defm TEX_CUBE_U32_F32_LEVEL
3208   : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3210 class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3211                           NVPTXRegClass intype, dag texsamp>
3212     : NVPTXInst<(outs outtype:$r, outtype:$g,
3213                       outtype:$b, outtype:$a),
3214                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3215                                     intype:$z)),
3216                  inst # " \t\\{$r, $g, $b, $a\\},"
3217                         " [$t, $s, \\{$l, $x, $y, $z\\}];",
3218                  []>;
3220 multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3221                           NVPTXRegClass intype> {
3222   def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3223                                 (ins Int64Regs:$t, Int64Regs:$s)>;
3224   def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3225                                 (ins Int64Regs:$t, i64imm:$s)>;
3226   def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3227                                 (ins i64imm:$t, Int64Regs:$s)>;
3228   def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3229                                 (ins i64imm:$t, i64imm:$s)>;
3232 defm TEX_CUBE_ARRAY_F32_F32
3233   : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3234 defm TEX_CUBE_ARRAY_S32_F32
3235   : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3236 defm TEX_CUBE_ARRAY_U32_F32
3237   : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3239 class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3240                                 NVPTXRegClass intype, dag texsamp>
3241     : NVPTXInst<(outs outtype:$r, outtype:$g,
3242                       outtype:$b, outtype:$a),
3243                  !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3244                                     intype:$z, intype:$lod)),
3245                  inst # " \t\\{$r, $g, $b, $a\\},"
3246                         " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3247                  []>;
3249 multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3250                                 NVPTXRegClass intype> {
3251   def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3252                                       (ins Int64Regs:$t, Int64Regs:$s)>;
3253   def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3254                                       (ins Int64Regs:$t, i64imm:$s)>;
3255   def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3256                                       (ins i64imm:$t, Int64Regs:$s)>;
3257   def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3258                                       (ins i64imm:$t, i64imm:$s)>;
3261 defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3262   : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3263                          Float32Regs, Float32Regs>;
3264 defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3265   : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3266                          Int32Regs, Float32Regs>;
3267 defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3268   : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3269                          Int32Regs, Float32Regs>;
3271 class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3272                    NVPTXRegClass intype, dag texsamp>
3273     : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3274                       outtype:$v2, outtype:$v3),
3275                  !con(texsamp, (ins intype:$x, intype:$y)),
3276                  inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3277                  []>;
3279 multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3280   def _RR : TLD4_2D_base<inst, outtype, intype,
3281                          (ins Int64Regs:$t, Int64Regs:$s)>;
3282   def _RI : TLD4_2D_base<inst, outtype, intype,
3283                          (ins Int64Regs:$t, i64imm:$s)>;
3284   def _IR : TLD4_2D_base<inst, outtype, intype,
3285                          (ins i64imm:$t, Int64Regs:$s)>;
3286   def _II : TLD4_2D_base<inst, outtype, intype,
3287                          (ins i64imm:$t, i64imm:$s)>;
3290 defm TLD4_R_2D_F32_F32
3291   : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3292 defm TLD4_G_2D_F32_F32
3293   : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3294 defm TLD4_B_2D_F32_F32
3295   : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3296 defm TLD4_A_2D_F32_F32
3297   : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3299 defm TLD4_R_2D_S32_F32
3300   : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3301 defm TLD4_G_2D_S32_F32
3302   : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3303 defm TLD4_B_2D_S32_F32
3304   : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3305 defm TLD4_A_2D_S32_F32
3306   : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3308 defm TLD4_R_2D_U32_F32
3309   : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3310 defm TLD4_G_2D_U32_F32
3311   : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3312 defm TLD4_B_2D_U32_F32
3313   : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3314 defm TLD4_A_2D_U32_F32
3315   : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3320 // texmode_unified
3321 let IsTex = true, IsTexModeUnified = true in {
3322 // Texture fetch instructions using handles
3324 class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3325                           NVPTXRegClass intype, dag tex>
3326     : NVPTXInst<(outs outtype:$r, outtype:$g,
3327                       outtype:$b, outtype:$a),
3328                  !con(tex, (ins intype:$x)),
3329                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3330                  []>;
3332 multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3333                           NVPTXRegClass intype> {
3334   def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3335   def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3338 defm TEX_UNIFIED_1D_F32_S32
3339   : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3340 defm TEX_UNIFIED_1D_F32_F32
3341   : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3342 defm TEX_UNIFIED_1D_S32_S32
3343   : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3344 defm TEX_UNIFIED_1D_S32_F32
3345   : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3346 defm TEX_UNIFIED_1D_U32_S32
3347   : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3348 defm TEX_UNIFIED_1D_U32_F32
3349   : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3351 class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3352                                 NVPTXRegClass intype, dag tex>
3353     : NVPTXInst<(outs outtype:$r, outtype:$g,
3354                       outtype:$b, outtype:$a),
3355                  !con(tex, (ins intype:$x, intype:$lod)),
3356                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3357                  []>;
3359 multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3360                                 NVPTXRegClass intype> {
3361   def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3362   def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3365 defm TEX_UNIFIED_1D_F32_F32_LEVEL
3366   : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3367 defm TEX_UNIFIED_1D_S32_F32_LEVEL
3368   : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3369 defm TEX_UNIFIED_1D_U32_F32_LEVEL
3370   : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3372 class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3373                                NVPTXRegClass intype, dag tex>
3374     : NVPTXInst<(outs outtype:$r, outtype:$g,
3375                       outtype:$b, outtype:$a),
3376                  !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3377                  inst # " \t\\{$r, $g, $b, $a\\},"
3378                         " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3379                  []>;
3381 multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3382                                NVPTXRegClass intype> {
3383   def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3384   def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3387 defm TEX_UNIFIED_1D_F32_F32_GRAD
3388   : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3389 defm TEX_UNIFIED_1D_S32_F32_GRAD
3390   : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3391 defm TEX_UNIFIED_1D_U32_F32_GRAD
3392   : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3394 class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3395                                 NVPTXRegClass intype, dag tex>
3396     : NVPTXInst<(outs outtype:$r, outtype:$g,
3397                       outtype:$b, outtype:$a),
3398                  !con(tex, (ins Int32Regs:$l, intype:$x)),
3399                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3400                  []>;
3402 multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3403                                 NVPTXRegClass intype> {
3404   def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3405   def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3408 defm TEX_UNIFIED_1D_ARRAY_F32_S32
3409   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3410 defm TEX_UNIFIED_1D_ARRAY_F32_F32
3411   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3412 defm TEX_UNIFIED_1D_ARRAY_S32_S32
3413   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3414 defm TEX_UNIFIED_1D_ARRAY_S32_F32
3415   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3416 defm TEX_UNIFIED_1D_ARRAY_U32_S32
3417   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3418 defm TEX_UNIFIED_1D_ARRAY_U32_F32
3419   : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3421 class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3422                                       NVPTXRegClass intype, dag tex>
3423     : NVPTXInst<(outs outtype:$r, outtype:$g,
3424                       outtype:$b, outtype:$a),
3425                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3426                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3427                  []>;
3429 multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3430                                       NVPTXRegClass intype> {
3431   def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3432                                            (ins Int64Regs:$t)>;
3433   def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3434                                            (ins i64imm:$t)>;
3437 defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3438   : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3439                                Float32Regs, Float32Regs>;
3440 defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3441   : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3442                                Int32Regs, Float32Regs>;
3443 defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3444   : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3445                                Int32Regs, Float32Regs>;
3447 class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3448                                      NVPTXRegClass intype, dag tex>
3449     : NVPTXInst<(outs outtype:$r, outtype:$g,
3450                       outtype:$b, outtype:$a),
3451                  !con(tex, (ins Int32Regs:$l, intype:$x,
3452                                 intype:$gradx, intype:$grady)),
3453                  inst # " \t\\{$r, $g, $b, $a\\},"
3454                         "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3455                  []>;
3457 multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3458                                      NVPTXRegClass intype> {
3459   def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3460                                           (ins Int64Regs:$t)>;
3461   def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3462                                           (ins i64imm:$t)>;
3465 defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3466   : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3467                               Float32Regs, Float32Regs>;
3468 defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3469   : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3470                               Int32Regs, Float32Regs>;
3471 defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3472   : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3473                               Int32Regs, Float32Regs>;
3475 class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3476                           NVPTXRegClass intype, dag tex>
3477     : NVPTXInst<(outs outtype:$r, outtype:$g,
3478                       outtype:$b, outtype:$a),
3479                  !con(tex, (ins intype:$x, intype:$y)),
3480                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3481                  []>;
3483 multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3484                           NVPTXRegClass intype> {
3485   def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3486   def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3489 defm TEX_UNIFIED_2D_F32_S32
3490   : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3491 defm TEX_UNIFIED_2D_F32_F32
3492   : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3493 defm TEX_UNIFIED_2D_S32_S32
3494   : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3495 defm TEX_UNIFIED_2D_S32_F32
3496   : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3497 defm TEX_UNIFIED_2D_U32_S32
3498   : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3499 defm TEX_UNIFIED_2D_U32_F32
3500   : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3502 class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3503                                 NVPTXRegClass intype, dag tex>
3504     : NVPTXInst<(outs outtype:$r, outtype:$g,
3505                       outtype:$b, outtype:$a),
3506                  !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3507                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3508                  []>;
3510 multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3511                                 NVPTXRegClass intype> {
3512   def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3513   def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3516 defm TEX_UNIFIED_2D_F32_F32_LEVEL
3517   : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3518 defm TEX_UNIFIED_2D_S32_F32_LEVEL
3519   : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3520 defm TEX_UNIFIED_2D_U32_F32_LEVEL
3521   : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3523 class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3524                                NVPTXRegClass intype, dag tex>
3525     : NVPTXInst<(outs outtype:$r, outtype:$g,
3526                       outtype:$b, outtype:$a),
3527                  !con(tex, (ins intype:$x, intype:$y,
3528                                 intype:$gradx0, intype:$gradx1,
3529                                 intype:$grady0, intype:$grady1)),
3530                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3531                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3532                  []>;
3533 multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3534                                NVPTXRegClass intype> {
3535   def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3536   def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3539 defm TEX_UNIFIED_2D_F32_F32_GRAD
3540   : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3541 defm TEX_UNIFIED_2D_S32_F32_GRAD
3542   : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3543 defm TEX_UNIFIED_2D_U32_F32_GRAD
3544   : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3546 class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3547                                 NVPTXRegClass intype, dag tex>
3548     : NVPTXInst<(outs outtype:$r, outtype:$g,
3549                       outtype:$b, outtype:$a),
3550                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3551                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3552                  []>;
3553 multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3554                                 NVPTXRegClass intype> {
3555   def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3556   def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3559 defm TEX_UNIFIED_2D_ARRAY_F32_S32
3560   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3561 defm TEX_UNIFIED_2D_ARRAY_F32_F32
3562   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3563 defm TEX_UNIFIED_2D_ARRAY_S32_S32
3564   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3565 defm TEX_UNIFIED_2D_ARRAY_S32_F32
3566   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3567 defm TEX_UNIFIED_2D_ARRAY_U32_S32
3568   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3569 defm TEX_UNIFIED_2D_ARRAY_U32_F32
3570   : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3572 class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3573                                       NVPTXRegClass intype, dag tex>
3574     : NVPTXInst<(outs outtype:$r, outtype:$g,
3575                       outtype:$b, outtype:$a),
3576                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3577                                 intype:$lod)),
3578                  inst # " \t\\{$r, $g, $b, $a\\},"
3579                         "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3580                  []>;
3581 multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3582                                       NVPTXRegClass intype> {
3583   def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3584                                            (ins Int64Regs:$t)>;
3585   def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3586                                            (ins i64imm:$t)>;
3589 defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3590   : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3591                                Float32Regs, Float32Regs>;
3592 defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3593   : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3594                                Int32Regs, Float32Regs>;
3595 defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3596   : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3597                                Int32Regs, Float32Regs>;
3599 class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3600                                      NVPTXRegClass intype, dag tex>
3601     : NVPTXInst<(outs outtype:$r, outtype:$g,
3602                       outtype:$b, outtype:$a),
3603                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3604                                 intype:$gradx0, intype:$gradx1,
3605                                 intype:$grady0, intype:$grady1)),
3606                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3607                         " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3608                  []>;
3609 multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3610                                      NVPTXRegClass intype> {
3611   def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3612                                           (ins Int64Regs:$t)>;
3613   def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3614                                           (ins i64imm:$t)>;
3617 defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3618   : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3619                               Float32Regs, Float32Regs>;
3620 defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3621   : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3622                               Int32Regs, Float32Regs>;
3623 defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3624   : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3625                               Int32Regs, Float32Regs>;
3627 class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3628                           NVPTXRegClass intype, dag tex>
3629     : NVPTXInst<(outs outtype:$r, outtype:$g,
3630                       outtype:$b, outtype:$a),
3631                  !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3632                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3633                  []>;
3634 multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3635                           NVPTXRegClass intype> {
3636   def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3637   def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3640 defm TEX_UNIFIED_3D_F32_S32
3641   : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3642 defm TEX_UNIFIED_3D_F32_F32
3643   : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3644 defm TEX_UNIFIED_3D_S32_S32
3645   : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3646 defm TEX_UNIFIED_3D_S32_F32
3647   : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3648 defm TEX_UNIFIED_3D_U32_S32
3649   : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3650 defm TEX_UNIFIED_3D_U32_F32
3651   : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3653 class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3654                                 NVPTXRegClass intype, dag tex>
3655     : NVPTXInst<(outs outtype:$r, outtype:$g,
3656                       outtype:$b, outtype:$a),
3657                  !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3658                  inst # " \t\\{$r, $g, $b, $a\\},"
3659                         " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3660                  []>;
3661 multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3662                                 NVPTXRegClass intype> {
3663   def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3664   def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3667 defm TEX_UNIFIED_3D_F32_F32_LEVEL
3668   : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3669 defm TEX_UNIFIED_3D_S32_F32_LEVEL
3670   : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3671 defm TEX_UNIFIED_3D_U32_F32_LEVEL
3672   : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3674 class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3675                                NVPTXRegClass intype, dag tex>
3676     : NVPTXInst<(outs outtype:$r, outtype:$g,
3677                       outtype:$b, outtype:$a),
3678                  !con(tex, (ins intype:$x, intype:$y, intype:$z,
3679                                 intype:$gradx0, intype:$gradx1,
3680                                 intype:$gradx2, intype:$grady0,
3681                                 intype:$grady1, intype:$grady2)),
3682                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3683                         " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3684                         " \\{$grady0, $grady1, $grady2, $grady2\\};",
3685                  []>;
3686 multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3687                                NVPTXRegClass intype> {
3688   def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3689   def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3692 defm TEX_UNIFIED_3D_F32_F32_GRAD
3693   : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3694 defm TEX_UNIFIED_3D_S32_F32_GRAD
3695   : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3696 defm TEX_UNIFIED_3D_U32_F32_GRAD
3697   : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3699 class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3700                             NVPTXRegClass intype, dag tex>
3701     : NVPTXInst<(outs outtype:$r, outtype:$g,
3702                       outtype:$b, outtype:$a),
3703                  !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3704                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3705                  []>;
3706 multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3707                             NVPTXRegClass intype> {
3708   def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3709   def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3712 defm TEX_UNIFIED_CUBE_F32_F32
3713   : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3714 defm TEX_UNIFIED_CUBE_S32_F32
3715   : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3716 defm TEX_UNIFIED_CUBE_U32_F32
3717   : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3719 class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3720                                   NVPTXRegClass intype, dag tex>
3721     : NVPTXInst<(outs outtype:$r, outtype:$g,
3722                       outtype:$b, outtype:$a),
3723                  !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3724                  inst # " \t\\{$r, $g, $b, $a\\},"
3725                         " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3726                  []>;
3727 multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3728                                   NVPTXRegClass intype> {
3729   def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3730                                        (ins Int64Regs:$t)>;
3731   def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3732                                        (ins i64imm:$t)>;
3735 defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3736   : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3737                            Float32Regs, Float32Regs>;
3738 defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3739   : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3740                            Int32Regs, Float32Regs>;
3741 defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3742   : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3743                            Int32Regs, Float32Regs>;
3745 class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3746                                   NVPTXRegClass intype, dag tex>
3747     : NVPTXInst<(outs outtype:$r, outtype:$g,
3748                       outtype:$b, outtype:$a),
3749                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3750                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3751                  []>;
3752 multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3753                                   NVPTXRegClass intype> {
3754   def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3755                                        (ins Int64Regs:$t)>;
3756   def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3757                                        (ins i64imm:$t)>;
3760 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3761   : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3762 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3763   : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3764 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3765   : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3767 class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3768                                         NVPTXRegClass intype, dag tex>
3769     : NVPTXInst<(outs outtype:$r, outtype:$g,
3770                       outtype:$b, outtype:$a),
3771                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3772                                 intype:$lod)),
3773                  inst # " \t\\{$r, $g, $b, $a\\},"
3774                         " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3775                  []>;
3776 multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3777                                         NVPTXRegClass intype> {
3778   def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3779                                              (ins Int64Regs:$t)>;
3780   def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3781                                              (ins i64imm:$t)>;
3784 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3785   : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3786                                  Float32Regs, Float32Regs>;
3787 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3788   : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3789                                  Int32Regs, Float32Regs>;
3790 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3791   : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3792                                  Int32Regs, Float32Regs>;
3794 class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
3795                                  NVPTXRegClass intype, dag tex>
3796     : NVPTXInst<(outs outtype:$r, outtype:$g,
3797                       outtype:$b, outtype:$a),
3798                  !con(tex, (ins intype:$x, intype:$y, intype:$z,
3799                                 intype:$gradx0, intype:$gradx1,
3800                                 intype:$gradx2, intype:$grady0,
3801                                 intype:$grady1, intype:$grady2)),
3802                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3803                         " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3804                         " \\{$grady0, $grady1, $grady2, $grady2\\};",
3805                  []>;
3807 multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
3808                                  NVPTXRegClass intype> {
3809   def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3810   def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3813 defm TEX_UNIFIED_CUBE_F32_F32_GRAD
3814   : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3815 defm TEX_UNIFIED_CUBE_S32_F32_GRAD
3816   : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3817 defm TEX_UNIFIED_CUBE_U32_F32_GRAD
3818   : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3820 class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3821                                        NVPTXRegClass intype, dag tex>
3822     : NVPTXInst<(outs outtype:$r, outtype:$g,
3823                       outtype:$b, outtype:$a),
3824                  !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3825                                 intype:$gradx0, intype:$gradx1,
3826                                 intype:$gradx2, intype:$grady0,
3827                                 intype:$grady1, intype:$grady2)),
3828                  inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
3829                         " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3830                         " \\{$grady0, $grady1, $grady2, $grady2\\};",
3831                  []>;
3832 multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3833                                        NVPTXRegClass intype> {
3834   def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3835                                             (ins Int64Regs:$t)>;
3836   def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3837                                             (ins i64imm:$t)>;
3840 defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
3841   : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
3842                                 Float32Regs, Float32Regs>;
3843 defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
3844   : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
3845                                 Int32Regs, Float32Regs>;
3846 defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
3847   : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
3848                                 Int32Regs, Float32Regs>;
3850 class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3851                            NVPTXRegClass intype, dag tex>
3852     : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3853                       outtype:$v2, outtype:$v3),
3854                  !con(tex, (ins intype:$x, intype:$y)),
3855                  inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3856                  []>;
3857 multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3858                            NVPTXRegClass intype> {
3859   def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3860   def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3863 defm TLD4_UNIFIED_R_2D_F32_F32
3864   : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3865 defm TLD4_UNIFIED_G_2D_F32_F32
3866   : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3867 defm TLD4_UNIFIED_B_2D_F32_F32
3868   : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3869 defm TLD4_UNIFIED_A_2D_F32_F32
3870   : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3872 defm TLD4_UNIFIED_R_2D_S32_F32
3873   : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3874 defm TLD4_UNIFIED_G_2D_S32_F32
3875   : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3876 defm TLD4_UNIFIED_B_2D_S32_F32
3877   : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3878 defm TLD4_UNIFIED_A_2D_S32_F32
3879   : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3881 defm TLD4_UNIFIED_R_2D_U32_F32
3882   : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3883 defm TLD4_UNIFIED_G_2D_U32_F32
3884   : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3885 defm TLD4_UNIFIED_B_2D_U32_F32
3886   : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3887 defm TLD4_UNIFIED_A_2D_U32_F32
3888   : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3894 //=== Surface load instructions
3896 let IsSuld = true in {
3898 class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3899     : NVPTXInst<(outs outtype:$r),
3900                 !con(surf, (ins Int32Regs:$x)),
3901                 inst # " \\{$r\\}, [$s, \\{$x\\}];",
3902                 []>;
3903 multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3904   def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3905   def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3908 defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3909 defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3910 defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3911 defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3913 defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3914 defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3915 defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3916 defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3918 defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3919 defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3920 defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3921 defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3923 class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3924     : NVPTXInst<(outs outtype:$r),
3925                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3926                 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3927                 []>;
3928 multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3929   def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3930   def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3933 defm SULD_1D_ARRAY_I8_CLAMP
3934   : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3935 defm SULD_1D_ARRAY_I16_CLAMP
3936   : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3937 defm SULD_1D_ARRAY_I32_CLAMP
3938   : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3939 defm SULD_1D_ARRAY_I64_CLAMP
3940   : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3942 defm SULD_1D_ARRAY_I8_TRAP
3943   : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3944 defm SULD_1D_ARRAY_I16_TRAP
3945   : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3946 defm SULD_1D_ARRAY_I32_TRAP
3947   : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3948 defm SULD_1D_ARRAY_I64_TRAP
3949   : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3951 defm SULD_1D_ARRAY_I8_ZERO
3952   : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3953 defm SULD_1D_ARRAY_I16_ZERO
3954   : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3955 defm SULD_1D_ARRAY_I32_ZERO
3956   : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3957 defm SULD_1D_ARRAY_I64_ZERO
3958   : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3960 class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3961     : NVPTXInst<(outs outtype:$r),
3962                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3963                 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3964                 []>;
3965 multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3966   def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3967   def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3970 defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3971 defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3972 defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3973 defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3975 defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3976 defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3977 defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3978 defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3980 defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3981 defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3982 defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3983 defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3985 class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3986     : NVPTXInst<(outs outtype:$r),
3987                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3988                 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3989                 []>;
3990 multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3991   def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3992   def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3995 defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3996 defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3997 defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3998 defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
4000 defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
4001 defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
4002 defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
4003 defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
4005 defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
4006 defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
4007 defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
4008 defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
4010 class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
4011     : NVPTXInst<(outs outtype:$r),
4012                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4013                 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4014                 []>;
4015 multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
4016   def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
4017   def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
4020 defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
4021 defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
4022 defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
4023 defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
4025 defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
4026 defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
4027 defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
4028 defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
4030 defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
4031 defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
4032 defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
4033 defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
4036 let IsSuld = 2 in {
4038 class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4039     : NVPTXInst<(outs outtype:$r, outtype:$g),
4040                 !con(surf, (ins Int32Regs:$x)),
4041                 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
4042                 []>;
4043 multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
4044   def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4045   def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
4048 defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
4049 defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
4050 defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
4051 defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
4053 defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
4054 defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
4055 defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
4056 defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
4058 defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
4059 defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
4060 defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
4061 defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
4063 class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4064     : NVPTXInst<(outs outtype:$r, outtype:$g),
4065                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4066                 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4067                 []>;
4068 multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4069   def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4070   def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4073 defm SULD_1D_ARRAY_V2I8_CLAMP
4074   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
4075 defm SULD_1D_ARRAY_V2I16_CLAMP
4076   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
4077 defm SULD_1D_ARRAY_V2I32_CLAMP
4078   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
4079 defm SULD_1D_ARRAY_V2I64_CLAMP
4080   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
4082 defm SULD_1D_ARRAY_V2I8_TRAP
4083   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
4084 defm SULD_1D_ARRAY_V2I16_TRAP
4085   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
4086 defm SULD_1D_ARRAY_V2I32_TRAP
4087   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4088 defm SULD_1D_ARRAY_V2I64_TRAP
4089   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4091 defm SULD_1D_ARRAY_V2I8_ZERO
4092   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4093 defm SULD_1D_ARRAY_V2I16_ZERO
4094   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4095 defm SULD_1D_ARRAY_V2I32_ZERO
4096   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4097 defm SULD_1D_ARRAY_V2I64_ZERO
4098   : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4100 class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4101     : NVPTXInst<(outs outtype:$r, outtype:$g),
4102                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4103                 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4104                 []>;
4105 multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4106   def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4107   def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4110 defm SULD_2D_V2I8_CLAMP
4111   : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4112 defm SULD_2D_V2I16_CLAMP
4113   : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4114 defm SULD_2D_V2I32_CLAMP
4115   : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4116 defm SULD_2D_V2I64_CLAMP
4117   : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4119 defm SULD_2D_V2I8_TRAP
4120   : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4121 defm SULD_2D_V2I16_TRAP
4122   : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4123 defm SULD_2D_V2I32_TRAP
4124   : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4125 defm SULD_2D_V2I64_TRAP
4126   : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4128 defm SULD_2D_V2I8_ZERO
4129   : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4130 defm SULD_2D_V2I16_ZERO
4131   : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4132 defm SULD_2D_V2I32_ZERO
4133   : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4134 defm SULD_2D_V2I64_ZERO
4135   : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4137 class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4138     : NVPTXInst<(outs outtype:$r, outtype:$g),
4139                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4140                 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4141                 []>;
4142 multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4143   def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4144   def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4147 defm SULD_2D_ARRAY_V2I8_CLAMP
4148   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4149 defm SULD_2D_ARRAY_V2I16_CLAMP
4150   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4151 defm SULD_2D_ARRAY_V2I32_CLAMP
4152   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4153 defm SULD_2D_ARRAY_V2I64_CLAMP
4154   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4156 defm SULD_2D_ARRAY_V2I8_TRAP
4157   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4158 defm SULD_2D_ARRAY_V2I16_TRAP
4159   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4160 defm SULD_2D_ARRAY_V2I32_TRAP
4161   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4162 defm SULD_2D_ARRAY_V2I64_TRAP
4163   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4165 defm SULD_2D_ARRAY_V2I8_ZERO
4166   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4167 defm SULD_2D_ARRAY_V2I16_ZERO
4168   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4169 defm SULD_2D_ARRAY_V2I32_ZERO
4170   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4171 defm SULD_2D_ARRAY_V2I64_ZERO
4172   : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4174 class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4175     : NVPTXInst<(outs outtype:$r, outtype:$g),
4176                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4177                 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4178                 []>;
4179 multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4180   def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4181   def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4184 defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4185 defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4186 defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4187 defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4189 defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4190 defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4191 defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4192 defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4194 defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4195 defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4196 defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4197 defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4201 let IsSuld = 3 in {
4203 class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4204     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4205                 !con(surf, (ins Int32Regs:$x)),
4206                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4207                 []>;
4208 multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4209   def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4210   def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4213 defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4214 defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4215 defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4217 defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4218 defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4219 defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4221 defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4222 defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4223 defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4225 class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4226     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4227                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4228                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4229                 []>;
4230 multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4231   def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4232   def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4235 defm SULD_1D_ARRAY_V4I8_CLAMP
4236   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4237 defm SULD_1D_ARRAY_V4I16_CLAMP
4238   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4239 defm SULD_1D_ARRAY_V4I32_CLAMP
4240   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4242 defm SULD_1D_ARRAY_V4I8_TRAP
4243   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4244 defm SULD_1D_ARRAY_V4I16_TRAP
4245   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4246 defm SULD_1D_ARRAY_V4I32_TRAP
4247   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4249 defm SULD_1D_ARRAY_V4I8_ZERO
4250   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4251 defm SULD_1D_ARRAY_V4I16_ZERO
4252   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4253 defm SULD_1D_ARRAY_V4I32_ZERO
4254   : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4256 class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4257     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4258                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4259                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4260                 []>;
4261 multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4262   def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4263   def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4266 defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4267 defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4268 defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4270 defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4271 defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4272 defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4274 defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4275 defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4276 defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4278 class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4279     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4280                 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4281                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4282                 []>;
4283 multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4284   def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4285   def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4288 defm SULD_2D_ARRAY_V4I8_CLAMP
4289   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4290 defm SULD_2D_ARRAY_V4I16_CLAMP
4291   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4292 defm SULD_2D_ARRAY_V4I32_CLAMP
4293   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4295 defm SULD_2D_ARRAY_V4I8_TRAP
4296   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4297 defm SULD_2D_ARRAY_V4I16_TRAP
4298   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4299 defm SULD_2D_ARRAY_V4I32_TRAP
4300   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4302 defm SULD_2D_ARRAY_V4I8_ZERO
4303   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4304 defm SULD_2D_ARRAY_V4I16_ZERO
4305   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4306 defm SULD_2D_ARRAY_V4I32_ZERO
4307   : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4309 class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4310     : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4311                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4312                 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4313                 []>;
4314 multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4315   def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4316   def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4319 defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4320 defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4321 defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4323 defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4324 defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4325 defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4327 defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4328 defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4329 defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4333 //-----------------------------------
4334 // Texture Query Intrinsics
4335 //-----------------------------------
4337 let IsSurfTexQuery = true in {
4338 def TXQ_CHANNEL_ORDER_R
4339   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4340               "txq.channel_order.b32 \t$d, [$a];",
4341               []>;
4342 def TXQ_CHANNEL_ORDER_I
4343   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4344               "txq.channel_order.b32 \t$d, [$a];",
4345               []>;
4346 def TXQ_CHANNEL_DATA_TYPE_R
4347   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4348               "txq.channel_data_type.b32 \t$d, [$a];",
4349               []>;
4350 def TXQ_CHANNEL_DATA_TYPE_I
4351   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4352               "txq.channel_data_type.b32 \t$d, [$a];",
4353               []>;
4354 def TXQ_WIDTH_R
4355   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4356               "txq.width.b32 \t$d, [$a];",
4357               []>;
4358 def TXQ_WIDTH_I
4359   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4360               "txq.width.b32 \t$d, [$a];",
4361               []>;
4362 def TXQ_HEIGHT_R
4363   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4364               "txq.height.b32 \t$d, [$a];",
4365               []>;
4366 def TXQ_HEIGHT_I
4367   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4368               "txq.height.b32 \t$d, [$a];",
4369               []>;
4370 def TXQ_DEPTH_R
4371   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4372               "txq.depth.b32 \t$d, [$a];",
4373               []>;
4374 def TXQ_DEPTH_I
4375   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4376               "txq.depth.b32 \t$d, [$a];",
4377               []>;
4378 def TXQ_ARRAY_SIZE_R
4379   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4380               "txq.array_size.b32 \t$d, [$a];",
4381               []>;
4382 def TXQ_ARRAY_SIZE_I
4383   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4384               "txq.array_size.b32 \t$d, [$a];",
4385               []>;
4386 def TXQ_NUM_SAMPLES_R
4387   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4388               "txq.num_samples.b32 \t$d, [$a];",
4389               []>;
4390 def TXQ_NUM_SAMPLES_I
4391   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4392               "txq.num_samples.b32 \t$d, [$a];",
4393               []>;
4394 def TXQ_NUM_MIPMAP_LEVELS_R
4395   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4396               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4397               []>;
4398 def TXQ_NUM_MIPMAP_LEVELS_I
4399   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4400               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4401               []>;
4404 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4405           (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4406 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4407           (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4408 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4409           (TXQ_WIDTH_R Int64Regs:$a)>;
4410 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4411           (TXQ_HEIGHT_R Int64Regs:$a)>;
4412 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4413           (TXQ_DEPTH_R Int64Regs:$a)>;
4414 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4415           (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4416 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4417           (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4418 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4419           (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4422 //-----------------------------------
4423 // Surface Query Intrinsics
4424 //-----------------------------------
4426 let IsSurfTexQuery = true in {
4427 def SUQ_CHANNEL_ORDER_R
4428   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4429               "suq.channel_order.b32 \t$d, [$a];",
4430               []>;
4431 def SUQ_CHANNEL_ORDER_I
4432   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4433               "suq.channel_order.b32 \t$d, [$a];",
4434               []>;
4435 def SUQ_CHANNEL_DATA_TYPE_R
4436   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4437               "suq.channel_data_type.b32 \t$d, [$a];",
4438               []>;
4439 def SUQ_CHANNEL_DATA_TYPE_I
4440   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4441               "suq.channel_data_type.b32 \t$d, [$a];",
4442               []>;
4443 def SUQ_WIDTH_R
4444   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4445               "suq.width.b32 \t$d, [$a];",
4446               []>;
4447 def SUQ_WIDTH_I
4448   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4449               "suq.width.b32 \t$d, [$a];",
4450               []>;
4451 def SUQ_HEIGHT_R
4452   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4453               "suq.height.b32 \t$d, [$a];",
4454               []>;
4455 def SUQ_HEIGHT_I
4456   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4457               "suq.height.b32 \t$d, [$a];",
4458               []>;
4459 def SUQ_DEPTH_R
4460   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4461               "suq.depth.b32 \t$d, [$a];",
4462               []>;
4463 def SUQ_DEPTH_I
4464   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4465               "suq.depth.b32 \t$d, [$a];",
4466               []>;
4467 def SUQ_ARRAY_SIZE_R
4468   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4469               "suq.array_size.b32 \t$d, [$a];",
4470               []>;
4471 def SUQ_ARRAY_SIZE_I
4472   : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4473               "suq.array_size.b32 \t$d, [$a];",
4474               []>;
4477 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4478           (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4479 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4480           (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4481 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4482           (SUQ_WIDTH_R Int64Regs:$a)>;
4483 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4484           (SUQ_HEIGHT_R Int64Regs:$a)>;
4485 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4486           (SUQ_DEPTH_R Int64Regs:$a)>;
4487 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4488           (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4491 //===- Handle Query -------------------------------------------------------===//
4493 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4494 def ISTYPEP_SAMPLER
4495   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4496               "istypep.samplerref \t$d, $a;",
4497               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4498 def ISTYPEP_SURFACE
4499   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4500               "istypep.surfref \t$d, $a;",
4501               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4502 def ISTYPEP_TEXTURE
4503   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4504               "istypep.texref \t$d, $a;",
4505               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4507 //===- Surface Stores -----------------------------------------------------===//
4509 let IsSust = true in {
4511 class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4512     : NVPTXInst<(outs),
4513                 !con(surf, (ins Int32Regs:$x, intype:$r)),
4514                 inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4515                 []>;
4516 multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4517   def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4518   def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4521 defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4522 defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4523 defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4524 defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4526 defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4527 defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4528 defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4529 defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4531 defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4532 defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4533 defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4534 defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4536 defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4537 defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4538 defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4540 class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4541     : NVPTXInst<(outs),
4542                 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4543                 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4544                 []>;
4545 multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4546   def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4547   def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4550 defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4551 defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4552 defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4553 defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4555 defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4556 defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4557 defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4558 defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4560 defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4561 defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4562 defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4563 defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4565 defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4566 defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4567 defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4569 class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4570     : NVPTXInst<(outs),
4571                 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4572                                 intype:$b, intype:$a)),
4573                 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4574                 []>;
4575 multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4576   def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4577   def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4580 defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4581 defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4582 defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4584 defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4585 defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4586 defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4588 defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4589 defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4590 defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4592 defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4593 defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4594 defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4596 class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4597     : NVPTXInst<(outs),
4598                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4599                 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4600                 []>;
4601 multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4602   def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4603   def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4606 defm SUST_B_1D_ARRAY_B8_CLAMP
4607   : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4608 defm SUST_B_1D_ARRAY_B16_CLAMP
4609   : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4610 defm SUST_B_1D_ARRAY_B32_CLAMP
4611   : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4612 defm SUST_B_1D_ARRAY_B64_CLAMP
4613   : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4615 defm SUST_B_1D_ARRAY_B8_TRAP
4616   : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4617 defm SUST_B_1D_ARRAY_B16_TRAP
4618   : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4619 defm SUST_B_1D_ARRAY_B32_TRAP
4620   : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4621 defm SUST_B_1D_ARRAY_B64_TRAP
4622   : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4624 defm SUST_B_1D_ARRAY_B8_ZERO
4625   : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4626 defm SUST_B_1D_ARRAY_B16_ZERO
4627   : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4628 defm SUST_B_1D_ARRAY_B32_ZERO
4629   : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4630 defm SUST_B_1D_ARRAY_B64_ZERO
4631   : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4633 defm SUST_P_1D_ARRAY_B8_TRAP
4634   : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4635 defm SUST_P_1D_ARRAY_B16_TRAP
4636   : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4637 defm SUST_P_1D_ARRAY_B32_TRAP
4638   : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4640 class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4641     : NVPTXInst<(outs),
4642                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4643                                 intype:$r, intype:$g)),
4644                 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4645                 []>;
4646 multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4647   def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4648   def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4651 defm SUST_B_1D_ARRAY_V2B8_CLAMP
4652   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4653 defm SUST_B_1D_ARRAY_V2B16_CLAMP
4654   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4655 defm SUST_B_1D_ARRAY_V2B32_CLAMP
4656   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4657 defm SUST_B_1D_ARRAY_V2B64_CLAMP
4658   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4660 defm SUST_B_1D_ARRAY_V2B8_TRAP
4661   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4662 defm SUST_B_1D_ARRAY_V2B16_TRAP
4663   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4664 defm SUST_B_1D_ARRAY_V2B32_TRAP
4665   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4666 defm SUST_B_1D_ARRAY_V2B64_TRAP
4667   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4669 defm SUST_B_1D_ARRAY_V2B8_ZERO
4670   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4671 defm SUST_B_1D_ARRAY_V2B16_ZERO
4672   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4673 defm SUST_B_1D_ARRAY_V2B32_ZERO
4674   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4675 defm SUST_B_1D_ARRAY_V2B64_ZERO
4676   : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4678 defm SUST_P_1D_ARRAY_V2B8_TRAP
4679   : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4680 defm SUST_P_1D_ARRAY_V2B16_TRAP
4681   : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4682 defm SUST_P_1D_ARRAY_V2B32_TRAP
4683   : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4685 class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4686     : NVPTXInst<(outs),
4687                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4688                                 intype:$r, intype:$g, intype:$b, intype:$a)),
4689                 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4690                 []>;
4691 multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4692   def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4693   def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4696 defm SUST_B_1D_ARRAY_V4B8_CLAMP
4697   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4698 defm SUST_B_1D_ARRAY_V4B16_CLAMP
4699   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4700 defm SUST_B_1D_ARRAY_V4B32_CLAMP
4701   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4703 defm SUST_B_1D_ARRAY_V4B8_TRAP
4704   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4705 defm SUST_B_1D_ARRAY_V4B16_TRAP
4706   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4707 defm SUST_B_1D_ARRAY_V4B32_TRAP
4708   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4710 defm SUST_B_1D_ARRAY_V4B8_ZERO
4711   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4712 defm SUST_B_1D_ARRAY_V4B16_ZERO
4713   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4714 defm SUST_B_1D_ARRAY_V4B32_ZERO
4715   : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4717 defm SUST_P_1D_ARRAY_V4B8_TRAP
4718   : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4719 defm SUST_P_1D_ARRAY_V4B16_TRAP
4720   : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4721 defm SUST_P_1D_ARRAY_V4B32_TRAP
4722   : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4724 class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4725     : NVPTXInst<(outs),
4726                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4727                 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4728                 []>;
4729 multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4730   def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4731   def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4734 defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4735 defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4736 defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4737 defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4739 defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4740 defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4741 defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4742 defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4744 defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4745 defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4746 defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4747 defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4749 defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4750 defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4751 defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4753 class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4754     : NVPTXInst<(outs),
4755                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4756                                 intype:$r, intype:$g)),
4757                 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4758                 []>;
4759 multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4760   def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4761   def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4764 defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4765 defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4766 defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4767 defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4769 defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4770 defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4771 defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4772 defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4774 defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4775 defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4776 defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4777 defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4779 defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4780 defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4781 defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4783 class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4784     : NVPTXInst<(outs),
4785                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4786                                 intype:$r, intype:$g, intype:$b, intype:$a)),
4787                 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4788                 []>;
4789 multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4790   def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4791   def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4794 defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4795 defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4796 defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4798 defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4799 defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4800 defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4802 defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4803 defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4804 defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4806 defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4807 defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4808 defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4810 class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4811     : NVPTXInst<(outs),
4812                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4813                                 intype:$r)),
4814                 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4815                 []>;
4816 multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4817   def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4818   def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4821 defm SUST_B_2D_ARRAY_B8_CLAMP
4822   : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4823 defm SUST_B_2D_ARRAY_B16_CLAMP
4824   : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4825 defm SUST_B_2D_ARRAY_B32_CLAMP
4826   : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4827 defm SUST_B_2D_ARRAY_B64_CLAMP
4828   : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4830 defm SUST_B_2D_ARRAY_B8_TRAP
4831   : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4832 defm SUST_B_2D_ARRAY_B16_TRAP
4833   : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4834 defm SUST_B_2D_ARRAY_B32_TRAP
4835   : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4836 defm SUST_B_2D_ARRAY_B64_TRAP
4837   : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4839 defm SUST_B_2D_ARRAY_B8_ZERO
4840   : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4841 defm SUST_B_2D_ARRAY_B16_ZERO
4842   : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4843 defm SUST_B_2D_ARRAY_B32_ZERO
4844   : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4845 defm SUST_B_2D_ARRAY_B64_ZERO
4846   : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4848 defm SUST_P_2D_ARRAY_B8_TRAP
4849   : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4850 defm SUST_P_2D_ARRAY_B16_TRAP
4851   : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4852 defm SUST_P_2D_ARRAY_B32_TRAP
4853   : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4855 class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4856     : NVPTXInst<(outs),
4857                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4858                                 intype:$r, intype:$g)),
4859                 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4860                 []>;
4861 multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4862   def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4863   def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4866 defm SUST_B_2D_ARRAY_V2B8_CLAMP
4867   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4868 defm SUST_B_2D_ARRAY_V2B16_CLAMP
4869   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4870 defm SUST_B_2D_ARRAY_V2B32_CLAMP
4871   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4872 defm SUST_B_2D_ARRAY_V2B64_CLAMP
4873   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4875 defm SUST_B_2D_ARRAY_V2B8_TRAP
4876   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4877 defm SUST_B_2D_ARRAY_V2B16_TRAP
4878   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4879 defm SUST_B_2D_ARRAY_V2B32_TRAP
4880   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4881 defm SUST_B_2D_ARRAY_V2B64_TRAP
4882   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4884 defm SUST_B_2D_ARRAY_V2B8_ZERO
4885   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4886 defm SUST_B_2D_ARRAY_V2B16_ZERO
4887   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4888 defm SUST_B_2D_ARRAY_V2B32_ZERO
4889   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4890 defm SUST_B_2D_ARRAY_V2B64_ZERO
4891   : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4893 defm SUST_P_2D_ARRAY_V2B8_TRAP
4894   : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4895 defm SUST_P_2D_ARRAY_V2B16_TRAP
4896   : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4897 defm SUST_P_2D_ARRAY_V2B32_TRAP
4898   : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4900 class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4901     : NVPTXInst<(outs),
4902                 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4903                                 intype:$r, intype:$g, intype:$b, intype:$a)),
4904                 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4905                 []>;
4906 multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4907   def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4908   def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4911 defm SUST_B_2D_ARRAY_V4B8_CLAMP
4912   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4913 defm SUST_B_2D_ARRAY_V4B16_CLAMP
4914   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4915 defm SUST_B_2D_ARRAY_V4B32_CLAMP
4916   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4918 defm SUST_B_2D_ARRAY_V4B8_TRAP
4919   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4920 defm SUST_B_2D_ARRAY_V4B16_TRAP
4921   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4922 defm SUST_B_2D_ARRAY_V4B32_TRAP
4923   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4925 defm SUST_B_2D_ARRAY_V4B8_ZERO
4926   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4927 defm SUST_B_2D_ARRAY_V4B16_ZERO
4928   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4929 defm SUST_B_2D_ARRAY_V4B32_ZERO
4930   : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4932 defm SUST_P_2D_ARRAY_V4B8_TRAP
4933   : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4934 defm SUST_P_2D_ARRAY_V4B16_TRAP
4935   : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4936 defm SUST_P_2D_ARRAY_V4B32_TRAP
4937   : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4939 class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4940     : NVPTXInst<(outs),
4941                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4942                                 intype:$r)),
4943                 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4944                 []>;
4945 multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4946   def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4947   def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4950 defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4951 defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4952 defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4953 defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4955 defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4956 defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4957 defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4958 defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4960 defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4961 defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4962 defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4963 defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4965 defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4966 defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4967 defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4969 class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4970     : NVPTXInst<(outs),
4971                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4972                                 intype:$r, intype:$g)),
4973                 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4974                 []>;
4975 multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4976   def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4977   def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4980 defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4981 defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4982 defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4983 defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4985 defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4986 defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4987 defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4988 defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4990 defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4991 defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4992 defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4993 defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4995 defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4996 defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4997 defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4999 class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5000     : NVPTXInst<(outs),
5001                 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5002                                 intype:$r, intype:$g, intype:$b, intype:$a)),
5003                 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
5004                 []>;
5005 multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
5006   def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5007   def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
5010 defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
5011 defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
5012 defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
5014 defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
5015 defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
5016 defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
5018 defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
5019 defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
5020 defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
5022 defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
5023 defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
5024 defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
5028 // Surface store instruction patterns
5029 // I'm not sure why we can't just include these in the instruction definitions,
5030 // but TableGen complains of type errors :(
5032 // .clamp variant
5033 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5034            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5035           (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5037 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5038            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5039           (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5041 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5042            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5043           (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5045 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5046            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5047           (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5049 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5050            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5051           (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5052            Int16Regs:$r, Int16Regs:$g)>;
5054 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5055            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5056           (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5057            Int16Regs:$r, Int16Regs:$g)>;
5059 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5060            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5061           (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5062            Int32Regs:$r, Int32Regs:$g)>;
5064 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5065            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5066           (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5067            Int64Regs:$r, Int64Regs:$g)>;
5069 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5070            Int64Regs:$s, Int32Regs:$x,
5071            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5072           (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5073            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5075 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5076            Int64Regs:$s, Int32Regs:$x,
5077            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5078           (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5079            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5081 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5082            Int64Regs:$s, Int32Regs:$x,
5083            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5084           (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5085            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5089 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5090            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5091           (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5092            Int16Regs:$r)>;
5094 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5095            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5096           (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5097            Int16Regs:$r)>;
5099 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5100            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5101           (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5102            Int32Regs:$r)>;
5104 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5105            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5106           (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5107            Int64Regs:$r)>;
5109 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5110           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5111           (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5112            Int16Regs:$r, Int16Regs:$g)>;
5114 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5115           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5116           (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5117            Int16Regs:$r, Int16Regs:$g)>;
5119 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5120           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5121           (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5122            Int32Regs:$r, Int32Regs:$g)>;
5124 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5125           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5126           (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5127            Int64Regs:$r, Int64Regs:$g)>;
5129 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5130            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5131            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5132           (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5133            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5135 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5136            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5137            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5138           (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5139            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5141 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5142            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5143            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5144           (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5145            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5149 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5150            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5151           (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5152            Int16Regs:$r)>;
5154 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5155            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5156           (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5157            Int16Regs:$r)>;
5159 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5160            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5161           (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5162            Int32Regs:$r)>;
5164 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5165            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5166           (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5167            Int64Regs:$r)>;
5169 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5170           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5171           (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5172            Int16Regs:$r, Int16Regs:$g)>;
5174 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5175           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5176           (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5177            Int16Regs:$r, Int16Regs:$g)>;
5179 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5180           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5181           (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5182            Int32Regs:$r, Int32Regs:$g)>;
5184 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5185           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5186           (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5187            Int64Regs:$r, Int64Regs:$g)>;
5189 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5190            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5191            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5192           (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5193            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5195 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5196            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5197            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5198           (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5199            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5201 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5202            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5203            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5204           (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5205            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5209 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5210           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5211           (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5212            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5213            Int16Regs:$r)>;
5215 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5216           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5217           (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5218            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5219            Int16Regs:$r)>;
5221 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5222           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5223           (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5224            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5225            Int32Regs:$r)>;
5227 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5228           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5229           (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5230            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5231            Int64Regs:$r)>;
5233 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5234            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5235            Int16Regs:$r, Int16Regs:$g),
5236           (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5237            Int32Regs:$x, Int32Regs:$y,
5238            Int16Regs:$r, Int16Regs:$g)>;
5240 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5241            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5242            Int16Regs:$r, Int16Regs:$g),
5243           (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5244            Int32Regs:$x, Int32Regs:$y,
5245            Int16Regs:$r, Int16Regs:$g)>;
5247 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5248            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5249            Int32Regs:$g),
5250           (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5251            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5253 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5254            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5255            Int64Regs:$g),
5256           (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5257            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5259 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5260            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5261            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5262           (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5263            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5264            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5266 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5267            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5268            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5269           (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5270            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5271            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5273 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5274            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5275            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5276           (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5277            Int32Regs:$x, Int32Regs:$y,
5278            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5282 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5283            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5284            Int16Regs:$r),
5285           (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5286            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5287            Int16Regs:$r)>;
5289 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5290            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5291            Int16Regs:$r),
5292           (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5293            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5294            Int16Regs:$r)>;
5296 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5297            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5298            Int32Regs:$r),
5299           (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5300            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5301            Int32Regs:$r)>;
5303 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5304            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5305            Int64Regs:$r),
5306           (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5307            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5308            Int64Regs:$r)>;
5310 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5311            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5312            Int16Regs:$r, Int16Regs:$g),
5313           (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5314            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5315            Int16Regs:$r, Int16Regs:$g)>;
5317 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5318            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5319            Int16Regs:$r, Int16Regs:$g),
5320           (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5321            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5322            Int16Regs:$r, Int16Regs:$g)>;
5324 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5325            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5326            Int32Regs:$r, Int32Regs:$g),
5327           (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5328            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5329            Int32Regs:$r, Int32Regs:$g)>;
5331 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5332            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5333            Int64Regs:$r, Int64Regs:$g),
5334           (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5335            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5336            Int64Regs:$r, Int64Regs:$g)>;
5338 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5339            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5340            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5341           (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5342            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5343            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5345 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5346            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5347            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5348           (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5349            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5350            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5352 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5353            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5354            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5355           (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5356            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5357            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5360 // .trap variant
5361 def : Pat<(int_nvvm_sust_b_1d_i8_trap
5362            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5363           (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5365 def : Pat<(int_nvvm_sust_b_1d_i16_trap
5366            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5367           (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5369 def : Pat<(int_nvvm_sust_b_1d_i32_trap
5370            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5371           (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5373 def : Pat<(int_nvvm_sust_b_1d_i64_trap
5374            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5375           (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5377 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5378            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5379           (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5380            Int16Regs:$r, Int16Regs:$g)>;
5382 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5383            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5384           (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5385            Int16Regs:$r, Int16Regs:$g)>;
5387 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5388            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5389           (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5390            Int32Regs:$r, Int32Regs:$g)>;
5392 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5393            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5394           (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5395            Int64Regs:$r, Int64Regs:$g)>;
5397 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5398            Int64Regs:$s, Int32Regs:$x,
5399            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5400           (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5401            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5403 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5404            Int64Regs:$s, Int32Regs:$x,
5405            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5406           (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5407            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5409 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5410            Int64Regs:$s, Int32Regs:$x,
5411            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5412           (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5413            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5417 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5418            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5419           (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5420            Int16Regs:$r)>;
5422 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5423            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5424           (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5425            Int16Regs:$r)>;
5427 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5428            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5429           (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5430            Int32Regs:$r)>;
5432 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5433            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5434           (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5435            Int64Regs:$r)>;
5437 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5438           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5439           (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5440            Int16Regs:$r, Int16Regs:$g)>;
5442 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5443           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5444           (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5445            Int16Regs:$r, Int16Regs:$g)>;
5447 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5448           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5449           (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5450            Int32Regs:$r, Int32Regs:$g)>;
5452 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5453           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5454           (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5455            Int64Regs:$r, Int64Regs:$g)>;
5457 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5458            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5459            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5460           (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5461            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5463 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5464            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5465            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5466           (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5467            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5469 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5470            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5471            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5472           (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5473            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5477 def : Pat<(int_nvvm_sust_b_2d_i8_trap
5478            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5479           (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5480            Int16Regs:$r)>;
5482 def : Pat<(int_nvvm_sust_b_2d_i16_trap
5483            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5484           (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5485            Int16Regs:$r)>;
5487 def : Pat<(int_nvvm_sust_b_2d_i32_trap
5488            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5489           (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5490            Int32Regs:$r)>;
5492 def : Pat<(int_nvvm_sust_b_2d_i64_trap
5493            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5494           (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5495            Int64Regs:$r)>;
5497 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5498           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5499           (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5500            Int16Regs:$r, Int16Regs:$g)>;
5502 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5503           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5504           (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5505            Int16Regs:$r, Int16Regs:$g)>;
5507 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5508           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5509           (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5510            Int32Regs:$r, Int32Regs:$g)>;
5512 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5513           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5514           (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5515            Int64Regs:$r, Int64Regs:$g)>;
5517 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5518            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5519            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5520           (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5521            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5523 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5524            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5525            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5526           (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5527            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5529 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5530            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5531            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5532           (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5533            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5537 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5538           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5539           (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5540            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5541            Int16Regs:$r)>;
5543 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5544           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5545           (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5546            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5547            Int16Regs:$r)>;
5549 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5550           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5551           (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5552            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5553            Int32Regs:$r)>;
5555 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5556           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5557           (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5558            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5559            Int64Regs:$r)>;
5561 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5562            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5563            Int16Regs:$r, Int16Regs:$g),
5564           (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5565            Int32Regs:$x, Int32Regs:$y,
5566            Int16Regs:$r, Int16Regs:$g)>;
5568 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5569            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5570            Int16Regs:$r, Int16Regs:$g),
5571           (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5572            Int32Regs:$x, Int32Regs:$y,
5573            Int16Regs:$r, Int16Regs:$g)>;
5575 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5576            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5577            Int32Regs:$g),
5578           (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5579            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5581 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5582            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5583            Int64Regs:$g),
5584           (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5585            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5587 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5588            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5589            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5590           (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5591            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5592            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5594 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5595            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5596            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5597           (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5598            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5599            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5601 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5602            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5603            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5604           (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5605            Int32Regs:$x, Int32Regs:$y,
5606            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5610 def : Pat<(int_nvvm_sust_b_3d_i8_trap
5611            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5612            Int16Regs:$r),
5613           (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5614            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5615            Int16Regs:$r)>;
5617 def : Pat<(int_nvvm_sust_b_3d_i16_trap
5618            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5619            Int16Regs:$r),
5620           (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5621            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5622            Int16Regs:$r)>;
5624 def : Pat<(int_nvvm_sust_b_3d_i32_trap
5625            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5626            Int32Regs:$r),
5627           (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5628            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5629            Int32Regs:$r)>;
5631 def : Pat<(int_nvvm_sust_b_3d_i64_trap
5632            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5633            Int64Regs:$r),
5634           (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5635            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5636            Int64Regs:$r)>;
5638 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5639            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5640            Int16Regs:$r, Int16Regs:$g),
5641           (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5642            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5643            Int16Regs:$r, Int16Regs:$g)>;
5645 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5646            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5647            Int16Regs:$r, Int16Regs:$g),
5648           (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5649            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5650            Int16Regs:$r, Int16Regs:$g)>;
5652 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5653            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5654            Int32Regs:$r, Int32Regs:$g),
5655           (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5656            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5657            Int32Regs:$r, Int32Regs:$g)>;
5659 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5660            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5661            Int64Regs:$r, Int64Regs:$g),
5662           (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5663            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5664            Int64Regs:$r, Int64Regs:$g)>;
5666 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5667            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5668            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5669           (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5670            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5671            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5673 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5674            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5675            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5676           (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5677            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5678            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5680 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5681            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5682            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5683           (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5684            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5685            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5688 // .zero variant
5689 def : Pat<(int_nvvm_sust_b_1d_i8_zero
5690            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5691           (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5693 def : Pat<(int_nvvm_sust_b_1d_i16_zero
5694            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5695           (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5697 def : Pat<(int_nvvm_sust_b_1d_i32_zero
5698            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5699           (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5701 def : Pat<(int_nvvm_sust_b_1d_i64_zero
5702            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5703           (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5705 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5706            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5707           (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5708            Int16Regs:$r, Int16Regs:$g)>;
5710 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5711            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5712           (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5713            Int16Regs:$r, Int16Regs:$g)>;
5715 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5716            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5717           (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5718            Int32Regs:$r, Int32Regs:$g)>;
5720 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5721            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5722           (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5723            Int64Regs:$r, Int64Regs:$g)>;
5725 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5726            Int64Regs:$s, Int32Regs:$x,
5727            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5728           (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5729            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5731 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5732            Int64Regs:$s, Int32Regs:$x,
5733            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5734           (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5735            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5737 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5738            Int64Regs:$s, Int32Regs:$x,
5739            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5740           (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5741            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5745 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5746            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5747           (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5748            Int16Regs:$r)>;
5750 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5751            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5752           (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5753            Int16Regs:$r)>;
5755 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5756            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5757           (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5758            Int32Regs:$r)>;
5760 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5761            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5762           (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5763            Int64Regs:$r)>;
5765 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5766           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5767           (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5768            Int16Regs:$r, Int16Regs:$g)>;
5770 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5771           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5772           (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5773            Int16Regs:$r, Int16Regs:$g)>;
5775 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5776           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5777           (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5778            Int32Regs:$r, Int32Regs:$g)>;
5780 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5781           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5782           (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5783            Int64Regs:$r, Int64Regs:$g)>;
5785 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5786            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5787            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5788           (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5789            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5791 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5792            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5793            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5794           (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5795            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5797 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5798            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5799            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5800           (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5801            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5805 def : Pat<(int_nvvm_sust_b_2d_i8_zero
5806            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5807           (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5808            Int16Regs:$r)>;
5810 def : Pat<(int_nvvm_sust_b_2d_i16_zero
5811            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5812           (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5813            Int16Regs:$r)>;
5815 def : Pat<(int_nvvm_sust_b_2d_i32_zero
5816            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5817           (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5818            Int32Regs:$r)>;
5820 def : Pat<(int_nvvm_sust_b_2d_i64_zero
5821            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5822           (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5823            Int64Regs:$r)>;
5825 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5826           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5827           (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5828            Int16Regs:$r, Int16Regs:$g)>;
5830 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5831           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5832           (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5833            Int16Regs:$r, Int16Regs:$g)>;
5835 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5836           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5837           (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5838            Int32Regs:$r, Int32Regs:$g)>;
5840 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5841           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5842           (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5843            Int64Regs:$r, Int64Regs:$g)>;
5845 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5846            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5847            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5848           (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5849            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5851 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5852            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5853            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5854           (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5855            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5857 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5858            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5859            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5860           (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5861            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5865 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5866           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5867           (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5868            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5869            Int16Regs:$r)>;
5871 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5872           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873           (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5874            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5875            Int16Regs:$r)>;
5877 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5878           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5879           (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5880            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5881            Int32Regs:$r)>;
5883 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5884           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5885           (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5886            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5887            Int64Regs:$r)>;
5889 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5890            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5891            Int16Regs:$r, Int16Regs:$g),
5892           (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5893            Int32Regs:$x, Int32Regs:$y,
5894            Int16Regs:$r, Int16Regs:$g)>;
5896 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5897            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5898            Int16Regs:$r, Int16Regs:$g),
5899           (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5900            Int32Regs:$x, Int32Regs:$y,
5901            Int16Regs:$r, Int16Regs:$g)>;
5903 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5904            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5905            Int32Regs:$g),
5906           (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5907            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5909 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5910            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5911            Int64Regs:$g),
5912           (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5913            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5915 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5916            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5917            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5918           (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5919            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5920            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5922 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5923            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5924            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5925           (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5926            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5927            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5929 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5930            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5931            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5932           (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5933            Int32Regs:$x, Int32Regs:$y,
5934            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5938 def : Pat<(int_nvvm_sust_b_3d_i8_zero
5939            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5940            Int16Regs:$r),
5941           (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5942            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5943            Int16Regs:$r)>;
5945 def : Pat<(int_nvvm_sust_b_3d_i16_zero
5946            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5947            Int16Regs:$r),
5948           (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5949            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5950            Int16Regs:$r)>;
5952 def : Pat<(int_nvvm_sust_b_3d_i32_zero
5953            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5954            Int32Regs:$r),
5955           (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5956            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5957            Int32Regs:$r)>;
5959 def : Pat<(int_nvvm_sust_b_3d_i64_zero
5960            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5961            Int64Regs:$r),
5962           (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5963            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5964            Int64Regs:$r)>;
5966 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5967            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5968            Int16Regs:$r, Int16Regs:$g),
5969           (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5970            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5971            Int16Regs:$r, Int16Regs:$g)>;
5973 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5974            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5975            Int16Regs:$r, Int16Regs:$g),
5976           (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5977            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5978            Int16Regs:$r, Int16Regs:$g)>;
5980 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5981            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5982            Int32Regs:$r, Int32Regs:$g),
5983           (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5984            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5985            Int32Regs:$r, Int32Regs:$g)>;
5987 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5988            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5989            Int64Regs:$r, Int64Regs:$g),
5990           (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5991            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5992            Int64Regs:$r, Int64Regs:$g)>;
5994 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5995            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5996            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5997           (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5998            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5999            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6001 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6002            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6003            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6004           (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
6005            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6006            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6008 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6009            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6010            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6011           (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
6012            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6013            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6018 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6019            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6020           (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6022 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6023            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6024           (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6026 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6027            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6028           (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6030 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6031            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6032           (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6033            Int16Regs:$r, Int16Regs:$g)>;
6035 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6036            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6037           (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6038            Int16Regs:$r, Int16Regs:$g)>;
6040 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6041            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6042           (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6043            Int32Regs:$r, Int32Regs:$g)>;
6045 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6046            Int64Regs:$s, Int32Regs:$x,
6047            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6048           (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6049            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6051 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6052            Int64Regs:$s, Int32Regs:$x,
6053            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6054           (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6055            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6057 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6058            Int64Regs:$s, Int32Regs:$x,
6059            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6060           (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6061            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6065 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6066            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6067           (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6068            Int16Regs:$r)>;
6070 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6071            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6072           (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6073            Int16Regs:$r)>;
6075 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6076            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6077           (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6078            Int32Regs:$r)>;
6080 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6081           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6082           (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6083            Int16Regs:$r, Int16Regs:$g)>;
6085 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6086           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6087           (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6088            Int16Regs:$r, Int16Regs:$g)>;
6090 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6091           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6092           (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6093            Int32Regs:$r, Int32Regs:$g)>;
6095 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6096            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6097            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6098           (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6099            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6101 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6102            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6103            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6104           (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6105            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6107 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6108            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6110           (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6111            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6115 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6116            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6117           (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6118            Int16Regs:$r)>;
6120 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6121            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6122           (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6123            Int16Regs:$r)>;
6125 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6126            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6127           (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6128            Int32Regs:$r)>;
6130 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6131           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6132           (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6133            Int16Regs:$r, Int16Regs:$g)>;
6135 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6136           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6137           (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6138            Int16Regs:$r, Int16Regs:$g)>;
6140 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6141           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6142           (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6143            Int32Regs:$r, Int32Regs:$g)>;
6145 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6146            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6147            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6148           (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6149            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6151 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6152            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6153            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6154           (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6155            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6157 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6158            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6159            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6160           (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6161            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6165 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6166           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6167           (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6168            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6169            Int16Regs:$r)>;
6171 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6172           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6173           (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6174            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6175            Int16Regs:$r)>;
6177 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6178           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6179           (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6180            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6181            Int32Regs:$r)>;
6183 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6184            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6185            Int16Regs:$r, Int16Regs:$g),
6186           (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6187            Int32Regs:$x, Int32Regs:$y,
6188            Int16Regs:$r, Int16Regs:$g)>;
6190 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6191            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6192            Int16Regs:$r, Int16Regs:$g),
6193           (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6194            Int32Regs:$x, Int32Regs:$y,
6195            Int16Regs:$r, Int16Regs:$g)>;
6197 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6198            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6199            Int32Regs:$g),
6200           (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6201            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6203 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6204            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6205            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6206           (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6207            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6208            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6210 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6211            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6212            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6213           (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6214            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6215            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6217 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6218            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6219            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6220           (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6221            Int32Regs:$x, Int32Regs:$y,
6222            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6226 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6227            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6228            Int16Regs:$r),
6229           (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6230            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6231            Int16Regs:$r)>;
6233 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6234            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6235            Int16Regs:$r),
6236           (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6237            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6238            Int16Regs:$r)>;
6240 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6241            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6242            Int32Regs:$r),
6243           (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6244            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6245            Int32Regs:$r)>;
6247 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6248            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6249            Int16Regs:$r, Int16Regs:$g),
6250           (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6251            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6252            Int16Regs:$r, Int16Regs:$g)>;
6254 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6255            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6256            Int16Regs:$r, Int16Regs:$g),
6257           (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6258            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6259            Int16Regs:$r, Int16Regs:$g)>;
6261 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6262            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6263            Int32Regs:$r, Int32Regs:$g),
6264           (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6265            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6266            Int32Regs:$r, Int32Regs:$g)>;
6268 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6269            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6270            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6271           (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6272            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6273            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6275 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6276            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6277            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6278           (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6279            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6280            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6282 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6283            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6284            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6285           (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6286            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6287            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6289 //-----------------------------------
6290 // Read Special Registers
6291 //-----------------------------------
6293 class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6294   : NVPTXInst<(outs Int64Regs:$d), (ins),
6295               !strconcat("mov.u64 \t$d, %", regname, ";"),
6296               [(set Int64Regs:$d, (intop))]>,
6297     Requires<Preds>;
6299 class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6300   : NVPTXInst<(outs Int32Regs:$d), (ins),
6301               !strconcat("mov.u32 \t$d, %", regname, ";"),
6302               [(set Int32Regs:$d, (intop))]>,
6303     Requires<Preds>;
6305 multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6306    foreach suffix = ["x", "y", "z", "w"] in {
6307       defvar reg = regname # "." # suffix;
6308       defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6309       def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
6310    }
6313 // TODO Add read vector-version of special registers
6315 defm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
6316 defm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
6317 defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6318 defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6320 defm INT_PTX_SREG_CLUSTERID :
6321        PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6322 defm INT_PTX_SREG_NCLUSTERID :
6323        PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6324 defm INT_PTX_SREG_CLUSTER_CTAID :
6325        PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6326 defm INT_PTX_SREG_CLUSTER_NCTAID:
6327        PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6329 def  INT_PTX_SREG_CLUSTER_CTARANK :
6330        PTX_READ_SREG_R32<"cluster_ctarank",
6331                          int_nvvm_read_ptx_sreg_cluster_ctarank,
6332                          [hasSM<90>, hasPTX<78>]>;
6333 def  INT_PTX_SREG_CLUSTER_NCTARANK:
6334        PTX_READ_SREG_R32<"cluster_nctarank",
6335                          int_nvvm_read_ptx_sreg_cluster_nctarank,
6336                          [hasSM<90>, hasPTX<78>]>;
6339 def INT_PTX_SREG_LANEID :
6340     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6341 def INT_PTX_SREG_WARPID :
6342     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6343 def INT_PTX_SREG_NWARPID :
6344     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6345 def INT_PTX_SREG_SMID :
6346     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6347 def INT_PTX_SREG_NSMID :
6348     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6349 def INT_PTX_SREG_GRIDID :
6350     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6352 def INT_PTX_SREG_LANEMASK_EQ :
6353     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6354 def INT_PTX_SREG_LANEMASK_LE :
6355     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6356 def INT_PTX_SREG_LANEMASK_LT :
6357     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6358 def INT_PTX_SREG_LANEMASK_GE :
6359     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6360 def INT_PTX_SREG_LANEMASK_GT :
6361     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6363 def INT_PTX_SREG_CLOCK :
6364     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6365 def INT_PTX_SREG_CLOCK64 :
6366     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6368 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6369 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6370 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6371 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6373 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6374 // handle the constant.
6375 def INT_PTX_SREG_WARPSIZE :
6376     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6377               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6379 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6380 // In addition to target-independent fields provided by WMMA_REGS, it adds
6381 // the fields commonly used to implement specific PTX instruction -- register
6382 // types and names, constraints, parts of assembly, etc.
6383 class WMMA_REGINFO<WMMA_REGS r, string op>
6384       : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6385   // NVPTX register types used to carry fragment data.
6386   NVPTXRegClass regclass = !cond(
6387     !eq(ptx_elt_type, "f16") : Int32Regs,
6388     !eq(ptx_elt_type, "f32") : Float32Regs,
6389     !eq(ptx_elt_type, "f64") : Float64Regs,
6390     !eq(ptx_elt_type, "bf16") : Int32Regs,
6391     !eq(ptx_elt_type, "tf32") : Int32Regs,
6392     !eq(ptx_elt_type, "s32") : Int32Regs,
6393     !eq(ptx_elt_type, "b16") : Int32Regs,
6394     !eq(ptx_elt_type, "s8") : Int32Regs,
6395     !eq(ptx_elt_type, "u8") : Int32Regs,
6396     !eq(ptx_elt_type, "s4") : Int32Regs,
6397     !eq(ptx_elt_type, "u4") : Int32Regs,
6398     !eq(ptx_elt_type, "b1") : Int32Regs);
6400   // Instruction input/output arguments for the fragment.
6401   list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6403   // List of register names for the fragment -- ["ra0", "ra1",...]
6404   list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6406   // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6407   string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6409   // Predicates for particular fragment variant. Technically those are
6410   // per-instruction predicates, but currently all fragments that can be used in
6411   // a given instruction are subject to the same constraints, so an instruction
6412   // can use predicates from any of its fragments. If/when this is no
6413   // longer the case, we can concat all per-fragment predicates to enforce that
6414   // all fragments of the instruction are viable.
6415   list<Predicate> Predicates = !cond(
6416     // fp16 -> fp16/fp32 @ m16n16k16
6417     !and(!eq(geom, "m16n16k16"),
6418          !or(!eq(ptx_elt_type, "f16"),
6419              !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6421     !and(!eq(geom,"m8n8k4"),
6422          !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6424     // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6425     !and(!or(!eq(geom, "m8n32k16"),
6426              !eq(geom, "m32n8k16")),
6427          !or(!eq(ptx_elt_type, "f16"),
6428              !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6430     // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6431     !and(!or(!eq(geom,"m16n16k16"),
6432              !eq(geom,"m8n32k16"),
6433              !eq(geom,"m32n8k16")),
6434          !or(!eq(ptx_elt_type, "u8"),
6435              !eq(ptx_elt_type, "s8"),
6436              !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6438     !and(!or(!eq(geom,"m16n16k16"),
6439              !eq(geom,"m8n32k16"),
6440              !eq(geom,"m32n8k16")),
6441          !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6443     !and(!eq(geom,"m16n16k8"),
6444          !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6446     !and(!eq(geom,"m16n16k8"),
6447          !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6449     // b1 -> s32 @ m8n8k128(b1)
6450     !and(!ne(op,"mma"),
6451          !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6453     // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6454     !and(!ne(op,"mma"),
6455          !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6457     !or(!eq(geom,"m16n8k8"),
6458         !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6460     !and(!ne(ptx_elt_type,"f64"),
6461          !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6463     // mma m8n8k32 requires higher PTX version
6464     !and(!eq(op,"mma"),
6465          !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6467     !and(!eq(ptx_elt_type,"f64"),
6468          !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6470     !and(!eq(op,"mma"),
6471          !or(!eq(geom, "m16n8k16"),
6472              !eq(geom, "m16n8k4"),
6473              !eq(geom, "m16n8k32"),
6474              !eq(geom, "m16n8k64"),
6475              !eq(geom, "m8n8k128"),
6476              !eq(geom, "m16n8k128"),
6477              !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6479     !and(!eq(op,"ldmatrix"),
6480          !eq(ptx_elt_type,"b16"),
6481          !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6483   // template DAGs for instruction inputs/output.
6484   dag Outs = !dag(outs, ptx_regs, reg_names);
6485   dag Ins = !dag(ins, ptx_regs, reg_names);
6488 // Convert dag of arguments into a dag to match given intrinsic.
6489 class BuildPatternI<Intrinsic Intr, dag Ins> {
6490   // Build a dag pattern that matches the intrinsic call.
6491   dag ret = !foreach(tmp, Ins,
6492                           !subst(imem, ADDRvar,
6493                           !subst(MEMri64, ADDRri64,
6494                           !subst(MEMri, ADDRri,
6495                           !subst(ins, Intr, tmp)))));
6498 // Same as above, but uses PatFrag instead of an Intrinsic.
6499 class BuildPatternPF<PatFrag Intr, dag Ins> {
6500   // Build a dag pattern that matches the intrinsic call.
6501   dag ret = !foreach(tmp, Ins,
6502                           !subst(imem, ADDRvar,
6503                           !subst(MEMri64, ADDRri64,
6504                           !subst(MEMri, ADDRri,
6505                           !subst(ins, Intr, tmp)))));
6508 // Common WMMA-related fields used for building patterns for all MMA instructions.
6509 class WMMA_INSTR<string _Intr, list<dag> _Args>
6510   : NVPTXInst<(outs), (ins), "?", []> {
6511   Intrinsic Intr = !cast<Intrinsic>(_Intr);
6512   // Concatenate all arguments into a single dag.
6513   dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6514   // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6515   dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6519 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6522 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6523                 DAGOperand SrcOp>
6524   : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6525                               [!con((ins SrcOp:$src),
6526                                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6527     Requires<Frag.Predicates> {
6528   // Load/store intrinsics are overloaded on pointer's address space.
6529   // To match the right intrinsic, we need to build AS-constrained PatFrag.
6530   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6531   dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6532   dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6533   // Build PatFrag that only matches particular address space.
6534   PatFrag IntrFrag = PatFrag<PFOperands,
6535                              PFOperandsIntr,
6536                              !cond(!eq(Space, ".shared"): AS_match.shared,
6537                                    !eq(Space, ".global"): AS_match.global,
6538                                    true: AS_match.generic)>;
6539   // Build AS-constrained pattern.
6540   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6542   let OutOperandList = Frag.Outs;
6543   let InOperandList = !con(Args, (ins MmaCode:$ptx));
6544   let AsmString = "wmma.load."
6545                   # Frag.frag
6546                   # ".sync"
6547                   # "${ptx:aligned}"
6548                   # "." # Layout
6549                   # "." # Frag.geom
6550                   # Space
6551                   # "." # Frag.ptx_elt_type # " \t"
6552                   # Frag.regstring
6553                   # ", [$src]"
6554                   # !if(WithStride, ", $ldm", "")
6555                   # ";";
6559 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6561 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6562                    bit WithStride, DAGOperand DstOp>
6563   : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6564                [!con((ins DstOp:$dst),
6565                      Frag.Ins,
6566                      !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6567     Requires<Frag.Predicates> {
6569   // Load/store intrinsics are overloaded on pointer's address space.
6570   // To match the right intrinsic, we need to build AS-constrained PatFrag.
6571   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6572   dag PFOperands = !con((ops node:$dst),
6573                         !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6574                         !if(WithStride, (ops node:$ldm), (ops)));
6575   // Build PatFrag that only matches particular address space.
6576   PatFrag IntrFrag = PatFrag<PFOperands,
6577                              !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6578                              !cond(!eq(Space, ".shared"): AS_match.shared,
6579                                    !eq(Space, ".global"): AS_match.global,
6580                                    true: AS_match.generic)>;
6581   // Build AS-constrained pattern.
6582   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6584   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6585   let OutOperandList = (outs);
6586   let AsmString = "wmma.store.d.sync"
6587                   # "${ptx:aligned}"
6588                   # "." # Layout
6589                   # "." # Frag.geom
6590                   # Space
6591                   # "." # Frag.ptx_elt_type
6592                   # " \t[$dst],"
6593                   # Frag.regstring
6594                   # !if(WithStride, ", $ldm", "")
6595                   # ";";
6598 // Create all load/store variants
6599 defset list<WMMA_INSTR> MMA_LDSTs  = {
6600   foreach layout = ["row", "col"] in {
6601     foreach stride = [false, true] in {
6602       foreach space = [".global", ".shared", ""] in {
6603         foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6604           foreach frag = NVVM_MMA_OPS.all_ld_ops in
6605             if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6606               def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6607           foreach frag = NVVM_MMA_OPS.all_st_ops in
6608             if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6609               def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6610         } // addr
6611       } // space
6612     } // stride
6613   } // layout
6614 } // defset
6616 // B1 instruction variants need extra constraints.
6617 class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6618   string Op = b1op;
6619   WMMA_REGINFO Frag = FragA;
6620   list<Predicate> ret = !listconcat(
6621     FragA.Predicates,
6622     !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6623   );
6625 // WMMA.MMA
6626 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6627                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6628                string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6629   : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6630                          [FragA.Ins, FragB.Ins, FragC.Ins]>,
6631     // Requires does not seem to have effect on Instruction w/o Patterns.
6632     // We set it here anyways and propagate to the Pat<> we construct below.
6633     Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6634   let OutOperandList = FragD.Outs;
6635   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6636   string TypeList = !cond(
6637     !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6638                                      # "." # FragC.ptx_elt_type,
6639     1: "." # FragD.ptx_elt_type
6640        # "." # FragA.ptx_elt_type
6641        # "." # FragB.ptx_elt_type
6642        # "." # FragC.ptx_elt_type,
6643   );
6644   let AsmString = "wmma.mma"
6645                   # b1op
6646                   # ".sync"
6647                   # "${ptx:aligned}"
6648                   # "." # ALayout
6649                   # "." # BLayout
6650                   # "." # FragA.geom
6651                   # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6652                   # TypeList
6653                   # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6654                   # FragD.regstring # ",\n\t\t"
6655                   # FragA.regstring # ",\n\t\t"
6656                   # FragB.regstring # ",\n\t\t"
6657                   # FragC.regstring # ";";
6660 defset list<WMMA_INSTR> WMMAs  = {
6661   foreach layout_a = ["row", "col"] in {
6662     foreach layout_b = ["row", "col"] in {
6663       foreach satf = [0, 1] in {
6664         foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6665           foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6666             foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6667               if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6668                 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6669                               WMMA_REGINFO<op[1], "wmma.mma">,
6670                               WMMA_REGINFO<op[2], "wmma.mma">,
6671                               WMMA_REGINFO<op[3], "wmma.mma">,
6672                               layout_a, layout_b, satf, rnd, b1op>;
6673               }
6674             } // b1op
6675           } // op
6676         } // rnd
6677       } // satf
6678     } // layout_b
6679   } // layout_a
6680 } // defset
6682 // MMA
6683 class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6684                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6685                string ALayout, string BLayout, int Satfinite, string b1op>
6686   : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6687                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6688     // Requires does not seem to have effect on Instruction w/o Patterns.
6689     // We set it here anyways and propagate to the Pat<> we construct below.
6690   Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6691   let OutOperandList = FragD.Outs;
6692   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6693   string TypeList = "." # FragD.ptx_elt_type
6694                     # "." # FragA.ptx_elt_type
6695                     # "." # FragB.ptx_elt_type
6696                     # "." # FragC.ptx_elt_type;
6697   let AsmString = "mma.sync.aligned."
6698                   # FragA.geom
6699                   # "." # ALayout
6700                   # "." # BLayout
6701                   # !if(Satfinite, ".satfinite", "")
6702                   # TypeList
6703                   # b1op # "\n\t\t"
6704                   # FragD.regstring # ",\n\t\t"
6705                   # FragA.regstring # ",\n\t\t"
6706                   # FragB.regstring # ",\n\t\t"
6707                   # FragC.regstring # ";";
6710 defset list<WMMA_INSTR> MMAs  = {
6711   foreach layout_a = ["row", "col"] in {
6712     foreach layout_b = ["row", "col"] in {
6713       foreach satf = [0, 1] in {
6714         foreach op = NVVM_MMA_OPS.all_mma_ops in {
6715           foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6716             if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6717               def : MMA<WMMA_REGINFO<op[0], "mma">,
6718                         WMMA_REGINFO<op[1], "mma">,
6719                         WMMA_REGINFO<op[2], "mma">,
6720                         WMMA_REGINFO<op[3], "mma">,
6721                         layout_a, layout_b, satf, b1op>;
6722             }
6723           } // b1op
6724         } // op
6725       } // satf
6726     } // layout_b
6727   } // layout_a
6728 } // defset
6731 // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6733 class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6734                DAGOperand SrcOp>
6735   : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6736     Requires<Frag.Predicates> {
6737   // Build PatFrag that only matches particular address space.
6738   PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6739                              !cond(!eq(Space, ".shared"): AS_match.shared,
6740                                    true: AS_match.generic)>;
6741   // Build AS-constrained pattern.
6742   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6744   let OutOperandList = Frag.Outs;
6745   let InOperandList = !con(Args, (ins MmaCode:$ptx));
6746   let AsmString = "ldmatrix.sync.aligned."
6747                   # Frag.geom
6748                   # "." # Frag.frag
6749                   # !if(Transposed, ".trans", "")
6750                   # Space
6751                   # "." # Frag.ptx_elt_type
6752                   # " " # Frag.regstring # ", [$src];";
6755 // Create all ldmatrix variants
6756 defset list<WMMA_INSTR> LDMATRIXs  = {
6757   foreach transposed = [false, true] in {
6758     foreach space = [".shared", ""] in {
6759       foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6760         foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6761           if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6762             def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6763                             addr>;
6764       } // addr
6765     } // space
6766   } // transposed
6767 } // defset
6769 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6770 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6771 // the instruction record.
6772 class MMA_PAT<WMMA_INSTR wi>
6773       : Pat<wi.IntrinsicPattern,
6774             !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6775                  (wi ptx.version))>,
6776         Requires<wi.Predicates>;
6778 // Build intrinsic->instruction patterns for all MMA instructions.
6779 foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6780   def : MMA_PAT<mma>;
6782 multiclass MAPA<string suffix, Intrinsic Intr> {
6783   def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6784               "mapa" # suffix # ".u32\t$d, $a, $b;",
6785               [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6786     Requires<[hasSM<90>, hasPTX<78>]>;
6787   def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6788               "mapa" # suffix # ".u32\t$d, $a, $b;",
6789               [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6790     Requires<[hasSM<90>, hasPTX<78>]>;
6791   def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6792               "mapa" # suffix # ".u64\t$d, $a, $b;",
6793               [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6794     Requires<[hasSM<90>, hasPTX<78>]>;
6795   def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6796               "mapa" # suffix # ".u64\t$d, $a, $b;",
6797               [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6798     Requires<[hasSM<90>, hasPTX<78>]>;
6801 defm mapa  : MAPA<"", int_nvvm_mapa>;
6802 defm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6805 multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6806   def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6807               "getctarank" # suffix # ".u32\t$d, $a;",
6808               [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6809     Requires<[hasSM<90>, hasPTX<78>]>;
6810   def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6811               "getctarank" # suffix # ".u64\t$d, $a;",
6812               [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6813     Requires<[hasSM<90>, hasPTX<78>]>;
6816 defm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
6817 defm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6819 def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6820               "mov.pred\t$d, %is_explicit_cluster;",
6821               [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6822     Requires<[hasSM<90>, hasPTX<78>]>;
6824 // setmaxnreg inc/dec intrinsics
6825 let isConvergent = true in {
6826 multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
6827   def : NVPTXInst<(outs), (ins i32imm:$reg_count),
6828           "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
6829           [(Intr timm:$reg_count)]>,
6830     Requires<[hasSM90a, hasPTX<80>]>;
6833 defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
6834 defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
6835 } // isConvergent