1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3 ; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
5 declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1)
6 declare i32 @llvm.amdgcn.permlanex16.var(i32, i32, i32, i1, i1)
7 declare i32 @llvm.amdgcn.workitem.id.x()
8 declare i32 @llvm.amdgcn.workitem.id.y()
10 define amdgpu_kernel void @v_permlane16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
11 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vv:
12 ; GFX12-SDAG: ; %bb.0:
13 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
14 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
15 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
16 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
17 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
18 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1
19 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
20 ; GFX12-SDAG-NEXT: s_nop 0
21 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
22 ; GFX12-SDAG-NEXT: s_endpgm
24 ; GFX12-GISEL-LABEL: v_permlane16var_b32_vv:
25 ; GFX12-GISEL: ; %bb.0:
26 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
27 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
28 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
29 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
30 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1
31 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
32 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
33 ; GFX12-GISEL-NEXT: s_nop 0
34 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
35 ; GFX12-GISEL-NEXT: s_endpgm
36 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false)
37 store i32 %v, ptr addrspace(1) %out
41 define amdgpu_kernel void @v_permlane16var_b32_vi(ptr addrspace(1) %out, i32 %src0) {
42 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vi:
43 ; GFX12-SDAG: ; %bb.0:
44 ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
45 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1
46 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
47 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
48 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
49 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0
50 ; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
51 ; GFX12-SDAG-NEXT: s_nop 0
52 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
53 ; GFX12-SDAG-NEXT: s_endpgm
55 ; GFX12-GISEL-LABEL: v_permlane16var_b32_vi:
56 ; GFX12-GISEL: ; %bb.0:
57 ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
58 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
59 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2
60 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
61 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0
62 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
63 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
64 ; GFX12-GISEL-NEXT: s_nop 0
65 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
66 ; GFX12-GISEL-NEXT: s_endpgm
67 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false)
68 store i32 %v, ptr addrspace(1) %out
72 define amdgpu_kernel void @v_permlane16var_b32_vl(ptr addrspace(1) %out, i32 %src0) {
73 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vl:
74 ; GFX12-SDAG: ; %bb.0:
75 ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
76 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0xc1d1
77 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
78 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
79 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
80 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0
81 ; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
82 ; GFX12-SDAG-NEXT: s_nop 0
83 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
84 ; GFX12-SDAG-NEXT: s_endpgm
86 ; GFX12-GISEL-LABEL: v_permlane16var_b32_vl:
87 ; GFX12-GISEL: ; %bb.0:
88 ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
89 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
90 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2
91 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
92 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0
93 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
94 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
95 ; GFX12-GISEL-NEXT: s_nop 0
96 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
97 ; GFX12-GISEL-NEXT: s_endpgm
98 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false)
99 store i32 %v, ptr addrspace(1) %out
103 define amdgpu_kernel void @v_permlane16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
104 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vvv:
105 ; GFX12-SDAG: ; %bb.0:
106 ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
107 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
108 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
109 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
110 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0
111 ; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
112 ; GFX12-SDAG-NEXT: s_nop 0
113 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
114 ; GFX12-SDAG-NEXT: s_endpgm
116 ; GFX12-GISEL-LABEL: v_permlane16var_b32_vvv:
117 ; GFX12-GISEL: ; %bb.0:
118 ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
119 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
120 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
121 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
122 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0
123 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
124 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
125 ; GFX12-GISEL-NEXT: s_nop 0
126 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
127 ; GFX12-GISEL-NEXT: s_endpgm
128 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
129 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false)
130 store i32 %v, ptr addrspace(1) %out
134 define amdgpu_kernel void @v_permlane16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
135 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi:
136 ; GFX12-SDAG: ; %bb.0:
137 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
138 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
139 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
140 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
141 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
142 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
143 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
144 ; GFX12-SDAG-NEXT: s_nop 0
145 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
146 ; GFX12-SDAG-NEXT: s_endpgm
148 ; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi:
149 ; GFX12-GISEL: ; %bb.0:
150 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
151 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
152 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
153 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
154 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
155 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
156 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
157 ; GFX12-GISEL-NEXT: s_nop 0
158 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
159 ; GFX12-GISEL-NEXT: s_endpgm
160 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false)
161 store i32 %v, ptr addrspace(1) %out
165 define amdgpu_kernel void @v_permlane16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
166 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_bc:
167 ; GFX12-SDAG: ; %bb.0:
168 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
169 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
170 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
171 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
172 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
173 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
174 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
175 ; GFX12-SDAG-NEXT: s_nop 0
176 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
177 ; GFX12-SDAG-NEXT: s_endpgm
179 ; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_bc:
180 ; GFX12-GISEL: ; %bb.0:
181 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
182 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
183 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
184 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
185 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
186 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
187 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
188 ; GFX12-GISEL-NEXT: s_nop 0
189 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
190 ; GFX12-GISEL-NEXT: s_endpgm
191 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true)
192 store i32 %v, ptr addrspace(1) %out
196 define amdgpu_kernel void @v_permlane16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
197 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi_bc:
198 ; GFX12-SDAG: ; %bb.0:
199 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
200 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
201 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
202 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
203 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
204 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
205 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
206 ; GFX12-SDAG-NEXT: s_nop 0
207 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
208 ; GFX12-SDAG-NEXT: s_endpgm
210 ; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi_bc:
211 ; GFX12-GISEL: ; %bb.0:
212 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
213 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
214 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
215 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
216 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
217 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
218 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
219 ; GFX12-GISEL-NEXT: s_nop 0
220 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
221 ; GFX12-GISEL-NEXT: s_endpgm
222 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true)
223 store i32 %v, ptr addrspace(1) %out
227 define amdgpu_kernel void @v_permlanex16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
228 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv:
229 ; GFX12-SDAG: ; %bb.0:
230 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
231 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
232 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
233 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
234 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
235 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1
236 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
237 ; GFX12-SDAG-NEXT: s_nop 0
238 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
239 ; GFX12-SDAG-NEXT: s_endpgm
241 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv:
242 ; GFX12-GISEL: ; %bb.0:
243 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
244 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
245 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
246 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
247 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1
248 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
249 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
250 ; GFX12-GISEL-NEXT: s_nop 0
251 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
252 ; GFX12-GISEL-NEXT: s_endpgm
253 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false)
254 store i32 %v, ptr addrspace(1) %out
258 define amdgpu_kernel void @v_permlanex16var_b32_vi(ptr addrspace(1) %out, i32 %src0) {
259 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_vi:
260 ; GFX12-SDAG: ; %bb.0:
261 ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
262 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1
263 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
264 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
265 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
266 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0
267 ; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
268 ; GFX12-SDAG-NEXT: s_nop 0
269 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
270 ; GFX12-SDAG-NEXT: s_endpgm
272 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_vi:
273 ; GFX12-GISEL: ; %bb.0:
274 ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
275 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
276 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2
277 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
278 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0
279 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
280 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
281 ; GFX12-GISEL-NEXT: s_nop 0
282 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
283 ; GFX12-GISEL-NEXT: s_endpgm
284 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false)
285 store i32 %v, ptr addrspace(1) %out
289 define amdgpu_kernel void @v_permlanex16var_b32_vl(ptr addrspace(1) %out, i32 %src0) {
290 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_vl:
291 ; GFX12-SDAG: ; %bb.0:
292 ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
293 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0xc1d1
294 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
295 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
296 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
297 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0
298 ; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
299 ; GFX12-SDAG-NEXT: s_nop 0
300 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
301 ; GFX12-SDAG-NEXT: s_endpgm
303 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_vl:
304 ; GFX12-GISEL: ; %bb.0:
305 ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
306 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
307 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2
308 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
309 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0
310 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
311 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
312 ; GFX12-GISEL-NEXT: s_nop 0
313 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
314 ; GFX12-GISEL-NEXT: s_endpgm
315 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false)
316 store i32 %v, ptr addrspace(1) %out
320 define amdgpu_kernel void @v_permlanex16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
321 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_vvv:
322 ; GFX12-SDAG: ; %bb.0:
323 ; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
324 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
325 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
326 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
327 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0
328 ; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
329 ; GFX12-SDAG-NEXT: s_nop 0
330 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
331 ; GFX12-SDAG-NEXT: s_endpgm
333 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_vvv:
334 ; GFX12-GISEL: ; %bb.0:
335 ; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
336 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
337 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
338 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
339 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0
340 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
341 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
342 ; GFX12-GISEL-NEXT: s_nop 0
343 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
344 ; GFX12-GISEL-NEXT: s_endpgm
345 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
346 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false)
347 store i32 %v, ptr addrspace(1) %out
351 define amdgpu_kernel void @v_permlanex16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
352 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi:
353 ; GFX12-SDAG: ; %bb.0:
354 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
355 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
356 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
357 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
358 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
359 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
360 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
361 ; GFX12-SDAG-NEXT: s_nop 0
362 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
363 ; GFX12-SDAG-NEXT: s_endpgm
365 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi:
366 ; GFX12-GISEL: ; %bb.0:
367 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
368 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
369 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
370 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
371 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
372 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
373 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
374 ; GFX12-GISEL-NEXT: s_nop 0
375 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
376 ; GFX12-GISEL-NEXT: s_endpgm
377 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false)
378 store i32 %v, ptr addrspace(1) %out
382 define amdgpu_kernel void @v_permlanex16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
383 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_bc:
384 ; GFX12-SDAG: ; %bb.0:
385 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
386 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
387 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
388 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
389 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
390 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
391 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
392 ; GFX12-SDAG-NEXT: s_nop 0
393 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
394 ; GFX12-SDAG-NEXT: s_endpgm
396 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_bc:
397 ; GFX12-GISEL: ; %bb.0:
398 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
399 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
400 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
401 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
402 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
403 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
404 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
405 ; GFX12-GISEL-NEXT: s_nop 0
406 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
407 ; GFX12-GISEL-NEXT: s_endpgm
408 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true)
409 store i32 %v, ptr addrspace(1) %out
413 define amdgpu_kernel void @v_permlanex16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
414 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi_bc:
415 ; GFX12-SDAG: ; %bb.0:
416 ; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
417 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
418 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
419 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
420 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
421 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
422 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
423 ; GFX12-SDAG-NEXT: s_nop 0
424 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
425 ; GFX12-SDAG-NEXT: s_endpgm
427 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi_bc:
428 ; GFX12-GISEL: ; %bb.0:
429 ; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
430 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
431 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
432 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
433 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
434 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
435 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
436 ; GFX12-GISEL-NEXT: s_nop 0
437 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
438 ; GFX12-GISEL-NEXT: s_endpgm
439 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true)
440 store i32 %v, ptr addrspace(1) %out
444 define amdgpu_kernel void @v_permlane16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
445 ; GFX12-SDAG-LABEL: v_permlane16var_b32_tid_tid:
446 ; GFX12-SDAG: ; %bb.0:
447 ; GFX12-SDAG-NEXT: s_clause 0x1
448 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
449 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
450 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
451 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
452 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
453 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1
454 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
455 ; GFX12-SDAG-NEXT: s_nop 0
456 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
457 ; GFX12-SDAG-NEXT: s_endpgm
459 ; GFX12-GISEL-LABEL: v_permlane16var_b32_tid_tid:
460 ; GFX12-GISEL: ; %bb.0:
461 ; GFX12-GISEL-NEXT: s_clause 0x1
462 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
463 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
464 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
465 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
466 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
467 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1
468 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
469 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
470 ; GFX12-GISEL-NEXT: s_nop 0
471 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
472 ; GFX12-GISEL-NEXT: s_endpgm
473 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
474 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false)
475 store i32 %v, ptr addrspace(1) %out
479 define amdgpu_kernel void @v_permlane16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
480 ; GFX12-SDAG-LABEL: v_permlane16var_b32_undef_tid:
481 ; GFX12-SDAG: ; %bb.0:
482 ; GFX12-SDAG-NEXT: s_clause 0x1
483 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
484 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
485 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
486 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
487 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
488 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1
489 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
490 ; GFX12-SDAG-NEXT: s_nop 0
491 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
492 ; GFX12-SDAG-NEXT: s_endpgm
494 ; GFX12-GISEL-LABEL: v_permlane16var_b32_undef_tid:
495 ; GFX12-GISEL: ; %bb.0:
496 ; GFX12-GISEL-NEXT: s_clause 0x1
497 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
498 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
499 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
500 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
501 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
502 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1
503 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
504 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
505 ; GFX12-GISEL-NEXT: s_nop 0
506 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
507 ; GFX12-GISEL-NEXT: s_endpgm
508 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
509 %undef = freeze i32 poison
510 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false)
511 store i32 %v, ptr addrspace(1) %out
515 define amdgpu_kernel void @v_permlane16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
516 ; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid:
517 ; GFX12-SDAG: ; %bb.0:
518 ; GFX12-SDAG-NEXT: s_clause 0x1
519 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
520 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
521 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0x3039
522 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
523 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v2, s2
524 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
525 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v0, v2
526 ; GFX12-SDAG-NEXT: global_store_b32 v3, v1, s[0:1]
527 ; GFX12-SDAG-NEXT: s_nop 0
528 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
529 ; GFX12-SDAG-NEXT: s_endpgm
531 ; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid:
532 ; GFX12-GISEL: ; %bb.0:
533 ; GFX12-GISEL-NEXT: s_clause 0x1
534 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
535 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
536 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
537 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2
538 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
539 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v0, v2
540 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
541 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
542 ; GFX12-GISEL-NEXT: s_nop 0
543 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
544 ; GFX12-GISEL-NEXT: s_endpgm
545 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
546 %v = call i32 @llvm.amdgcn.permlane16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false)
547 store i32 %v, ptr addrspace(1) %out
551 define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
552 ; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi:
553 ; GFX12-SDAG: ; %bb.0:
554 ; GFX12-SDAG-NEXT: s_clause 0x1
555 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
556 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
557 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
558 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
559 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
560 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
561 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
562 ; GFX12-SDAG-NEXT: s_nop 0
563 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
564 ; GFX12-SDAG-NEXT: s_endpgm
566 ; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi:
567 ; GFX12-GISEL: ; %bb.0:
568 ; GFX12-GISEL-NEXT: s_clause 0x1
569 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
570 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
571 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
572 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
573 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
574 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
575 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
576 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
577 ; GFX12-GISEL-NEXT: s_nop 0
578 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
579 ; GFX12-GISEL-NEXT: s_endpgm
580 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
581 %undef = freeze i32 poison
582 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false)
583 store i32 %v, ptr addrspace(1) %out
587 define amdgpu_kernel void @v_permlane16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
588 ; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_bc:
589 ; GFX12-SDAG: ; %bb.0:
590 ; GFX12-SDAG-NEXT: s_clause 0x1
591 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
592 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
593 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
594 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
595 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
596 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
597 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
598 ; GFX12-SDAG-NEXT: s_nop 0
599 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
600 ; GFX12-SDAG-NEXT: s_endpgm
602 ; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_bc:
603 ; GFX12-GISEL: ; %bb.0:
604 ; GFX12-GISEL-NEXT: s_clause 0x1
605 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
606 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
607 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
608 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
609 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
610 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
611 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
612 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
613 ; GFX12-GISEL-NEXT: s_nop 0
614 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
615 ; GFX12-GISEL-NEXT: s_endpgm
616 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
617 %undef = freeze i32 poison
618 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true)
619 store i32 %v, ptr addrspace(1) %out
623 define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
624 ; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi_bc:
625 ; GFX12-SDAG: ; %bb.0:
626 ; GFX12-SDAG-NEXT: s_clause 0x1
627 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
628 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
629 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
630 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
631 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
632 ; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
633 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
634 ; GFX12-SDAG-NEXT: s_nop 0
635 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
636 ; GFX12-SDAG-NEXT: s_endpgm
638 ; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi_bc:
639 ; GFX12-GISEL: ; %bb.0:
640 ; GFX12-GISEL-NEXT: s_clause 0x1
641 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
642 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
643 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
644 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
645 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
646 ; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
647 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
648 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
649 ; GFX12-GISEL-NEXT: s_nop 0
650 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
651 ; GFX12-GISEL-NEXT: s_endpgm
652 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
653 %undef = freeze i32 poison
654 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true)
655 store i32 %v, ptr addrspace(1) %out
659 define amdgpu_kernel void @v_permlanex16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
660 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_tid_tid:
661 ; GFX12-SDAG: ; %bb.0:
662 ; GFX12-SDAG-NEXT: s_clause 0x1
663 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
664 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
665 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
666 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
667 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
668 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1
669 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
670 ; GFX12-SDAG-NEXT: s_nop 0
671 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
672 ; GFX12-SDAG-NEXT: s_endpgm
674 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_tid_tid:
675 ; GFX12-GISEL: ; %bb.0:
676 ; GFX12-GISEL-NEXT: s_clause 0x1
677 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
678 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
679 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
680 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
681 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
682 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1
683 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
684 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
685 ; GFX12-GISEL-NEXT: s_nop 0
686 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
687 ; GFX12-GISEL-NEXT: s_endpgm
688 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
689 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false)
690 store i32 %v, ptr addrspace(1) %out
694 define amdgpu_kernel void @v_permlanex16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
695 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_undef_tid:
696 ; GFX12-SDAG: ; %bb.0:
697 ; GFX12-SDAG-NEXT: s_clause 0x1
698 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
699 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
700 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
701 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
702 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
703 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1
704 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
705 ; GFX12-SDAG-NEXT: s_nop 0
706 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
707 ; GFX12-SDAG-NEXT: s_endpgm
709 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_undef_tid:
710 ; GFX12-GISEL: ; %bb.0:
711 ; GFX12-GISEL-NEXT: s_clause 0x1
712 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
713 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
714 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
715 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
716 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
717 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1
718 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
719 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
720 ; GFX12-GISEL-NEXT: s_nop 0
721 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
722 ; GFX12-GISEL-NEXT: s_endpgm
723 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
724 %undef = freeze i32 poison
725 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false)
726 store i32 %v, ptr addrspace(1) %out
730 define amdgpu_kernel void @v_permlanex16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
731 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid:
732 ; GFX12-SDAG: ; %bb.0:
733 ; GFX12-SDAG-NEXT: s_clause 0x1
734 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
735 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
736 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0x3039
737 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
738 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v2, s2
739 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
740 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v0, v2
741 ; GFX12-SDAG-NEXT: global_store_b32 v3, v1, s[0:1]
742 ; GFX12-SDAG-NEXT: s_nop 0
743 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
744 ; GFX12-SDAG-NEXT: s_endpgm
746 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid:
747 ; GFX12-GISEL: ; %bb.0:
748 ; GFX12-GISEL-NEXT: s_clause 0x1
749 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
750 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
751 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
752 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2
753 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
754 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v0, v2
755 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
756 ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
757 ; GFX12-GISEL-NEXT: s_nop 0
758 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
759 ; GFX12-GISEL-NEXT: s_endpgm
760 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
761 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false)
762 store i32 %v, ptr addrspace(1) %out
766 define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
767 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi:
768 ; GFX12-SDAG: ; %bb.0:
769 ; GFX12-SDAG-NEXT: s_clause 0x1
770 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
771 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
772 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
773 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
774 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
775 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
776 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
777 ; GFX12-SDAG-NEXT: s_nop 0
778 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
779 ; GFX12-SDAG-NEXT: s_endpgm
781 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi:
782 ; GFX12-GISEL: ; %bb.0:
783 ; GFX12-GISEL-NEXT: s_clause 0x1
784 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
785 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
786 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
787 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
788 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
789 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
790 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
791 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
792 ; GFX12-GISEL-NEXT: s_nop 0
793 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
794 ; GFX12-GISEL-NEXT: s_endpgm
795 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
796 %undef = freeze i32 poison
797 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false)
798 store i32 %v, ptr addrspace(1) %out
802 define amdgpu_kernel void @v_permlanex16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
803 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_bc:
804 ; GFX12-SDAG: ; %bb.0:
805 ; GFX12-SDAG-NEXT: s_clause 0x1
806 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
807 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
808 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
809 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
810 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
811 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
812 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
813 ; GFX12-SDAG-NEXT: s_nop 0
814 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
815 ; GFX12-SDAG-NEXT: s_endpgm
817 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_bc:
818 ; GFX12-GISEL: ; %bb.0:
819 ; GFX12-GISEL-NEXT: s_clause 0x1
820 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
821 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
822 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
823 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
824 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
825 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
826 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
827 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
828 ; GFX12-GISEL-NEXT: s_nop 0
829 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
830 ; GFX12-GISEL-NEXT: s_endpgm
831 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
832 %undef = freeze i32 poison
833 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true)
834 store i32 %v, ptr addrspace(1) %out
838 define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
839 ; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi_bc:
840 ; GFX12-SDAG: ; %bb.0:
841 ; GFX12-SDAG-NEXT: s_clause 0x1
842 ; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
843 ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
844 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
845 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
846 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
847 ; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
848 ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
849 ; GFX12-SDAG-NEXT: s_nop 0
850 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
851 ; GFX12-SDAG-NEXT: s_endpgm
853 ; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi_bc:
854 ; GFX12-GISEL: ; %bb.0:
855 ; GFX12-GISEL-NEXT: s_clause 0x1
856 ; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
857 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
858 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
859 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
860 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
861 ; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
862 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
863 ; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
864 ; GFX12-GISEL-NEXT: s_nop 0
865 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
866 ; GFX12-GISEL-NEXT: s_endpgm
867 %tidx = call i32 @llvm.amdgcn.workitem.id.x()
868 %undef = freeze i32 poison
869 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true)
870 store i32 %v, ptr addrspace(1) %out