1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v2bf16_v2bf16__u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v2bf16_v2bf16__u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
13 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> poison
14 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
18 define void @v_shuffle_v2bf16_v2bf16__0_u(ptr addrspace(1) inreg %ptr) {
19 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__0_u:
21 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
23 ; GFX900-NEXT: ;;#ASMSTART
24 ; GFX900-NEXT: ; def v1
25 ; GFX900-NEXT: ;;#ASMEND
26 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
27 ; GFX900-NEXT: s_waitcnt vmcnt(0)
28 ; GFX900-NEXT: s_setpc_b64 s[30:31]
30 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__0_u:
32 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
34 ; GFX90A-NEXT: ;;#ASMSTART
35 ; GFX90A-NEXT: ; def v1
36 ; GFX90A-NEXT: ;;#ASMEND
37 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
38 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
39 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
41 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_u:
43 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
45 ; GFX940-NEXT: ;;#ASMSTART
46 ; GFX940-NEXT: ; def v1
47 ; GFX940-NEXT: ;;#ASMEND
48 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
49 ; GFX940-NEXT: s_waitcnt vmcnt(0)
50 ; GFX940-NEXT: s_setpc_b64 s[30:31]
51 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
52 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
53 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
57 define void @v_shuffle_v2bf16_v2bf16__1_u(ptr addrspace(1) inreg %ptr) {
58 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__1_u:
60 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX900-NEXT: ;;#ASMSTART
62 ; GFX900-NEXT: ; def v1
63 ; GFX900-NEXT: ;;#ASMEND
64 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
65 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
66 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
67 ; GFX900-NEXT: s_waitcnt vmcnt(0)
68 ; GFX900-NEXT: s_setpc_b64 s[30:31]
70 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__1_u:
72 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX90A-NEXT: ;;#ASMSTART
74 ; GFX90A-NEXT: ; def v1
75 ; GFX90A-NEXT: ;;#ASMEND
76 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
77 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
78 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
79 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
80 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
82 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_u:
84 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX940-NEXT: ;;#ASMSTART
86 ; GFX940-NEXT: ; def v1
87 ; GFX940-NEXT: ;;#ASMEND
88 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
89 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
90 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
91 ; GFX940-NEXT: s_waitcnt vmcnt(0)
92 ; GFX940-NEXT: s_setpc_b64 s[30:31]
93 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
94 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
95 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
99 define void @v_shuffle_v2bf16_v2bf16__2_u(ptr addrspace(1) inreg %ptr) {
100 ; GFX9-LABEL: v_shuffle_v2bf16_v2bf16__2_u:
102 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
104 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
105 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 poison>
106 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
110 define void @v_shuffle_v2bf16_v2bf16__3_u(ptr addrspace(1) inreg %ptr) {
111 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__3_u:
113 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX900-NEXT: ;;#ASMSTART
115 ; GFX900-NEXT: ; def v1
116 ; GFX900-NEXT: ;;#ASMEND
117 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
118 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
119 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
120 ; GFX900-NEXT: s_waitcnt vmcnt(0)
121 ; GFX900-NEXT: s_setpc_b64 s[30:31]
123 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__3_u:
125 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX90A-NEXT: ;;#ASMSTART
127 ; GFX90A-NEXT: ; def v1
128 ; GFX90A-NEXT: ;;#ASMEND
129 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
130 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
131 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
132 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
133 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
135 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_u:
137 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX940-NEXT: ;;#ASMSTART
139 ; GFX940-NEXT: ; def v1
140 ; GFX940-NEXT: ;;#ASMEND
141 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
142 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
143 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
144 ; GFX940-NEXT: s_waitcnt vmcnt(0)
145 ; GFX940-NEXT: s_setpc_b64 s[30:31]
146 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
147 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
148 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 poison>
149 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
153 define void @v_shuffle_v2bf16_v2bf16__3_0(ptr addrspace(1) inreg %ptr) {
154 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__3_0:
156 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX900-NEXT: ;;#ASMSTART
158 ; GFX900-NEXT: ; def v1
159 ; GFX900-NEXT: ;;#ASMEND
160 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
161 ; GFX900-NEXT: ;;#ASMSTART
162 ; GFX900-NEXT: ; def v2
163 ; GFX900-NEXT: ;;#ASMEND
164 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16
165 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
166 ; GFX900-NEXT: s_waitcnt vmcnt(0)
167 ; GFX900-NEXT: s_setpc_b64 s[30:31]
169 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__3_0:
171 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX90A-NEXT: ;;#ASMSTART
173 ; GFX90A-NEXT: ; def v1
174 ; GFX90A-NEXT: ;;#ASMEND
175 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
176 ; GFX90A-NEXT: ;;#ASMSTART
177 ; GFX90A-NEXT: ; def v2
178 ; GFX90A-NEXT: ;;#ASMEND
179 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v2, 16
180 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
181 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
182 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
184 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_0:
186 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX940-NEXT: ;;#ASMSTART
188 ; GFX940-NEXT: ; def v1
189 ; GFX940-NEXT: ;;#ASMEND
190 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
191 ; GFX940-NEXT: ;;#ASMSTART
192 ; GFX940-NEXT: ; def v2
193 ; GFX940-NEXT: ;;#ASMEND
194 ; GFX940-NEXT: s_nop 0
195 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16
196 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
197 ; GFX940-NEXT: s_waitcnt vmcnt(0)
198 ; GFX940-NEXT: s_setpc_b64 s[30:31]
199 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
200 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
201 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 0>
202 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
206 define void @v_shuffle_v2bf16_v2bf16__3_1(ptr addrspace(1) inreg %ptr) {
207 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__3_1:
209 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX900-NEXT: ;;#ASMSTART
211 ; GFX900-NEXT: ; def v1
212 ; GFX900-NEXT: ;;#ASMEND
213 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
214 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
215 ; GFX900-NEXT: ;;#ASMSTART
216 ; GFX900-NEXT: ; def v2
217 ; GFX900-NEXT: ;;#ASMEND
218 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
219 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
220 ; GFX900-NEXT: s_waitcnt vmcnt(0)
221 ; GFX900-NEXT: s_setpc_b64 s[30:31]
223 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__3_1:
225 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226 ; GFX90A-NEXT: ;;#ASMSTART
227 ; GFX90A-NEXT: ; def v1
228 ; GFX90A-NEXT: ;;#ASMEND
229 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
230 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
231 ; GFX90A-NEXT: ;;#ASMSTART
232 ; GFX90A-NEXT: ; def v2
233 ; GFX90A-NEXT: ;;#ASMEND
234 ; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4
235 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
236 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
237 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
239 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_1:
241 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX940-NEXT: ;;#ASMSTART
243 ; GFX940-NEXT: ; def v1
244 ; GFX940-NEXT: ;;#ASMEND
245 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
246 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
247 ; GFX940-NEXT: ;;#ASMSTART
248 ; GFX940-NEXT: ; def v2
249 ; GFX940-NEXT: ;;#ASMEND
250 ; GFX940-NEXT: s_nop 0
251 ; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2
252 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
253 ; GFX940-NEXT: s_waitcnt vmcnt(0)
254 ; GFX940-NEXT: s_setpc_b64 s[30:31]
255 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
256 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
257 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 1>
258 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
262 define void @v_shuffle_v2bf16_v2bf16__3_2(ptr addrspace(1) inreg %ptr) {
263 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__3_2:
265 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX900-NEXT: ;;#ASMSTART
267 ; GFX900-NEXT: ; def v1
268 ; GFX900-NEXT: ;;#ASMEND
269 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
270 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
271 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
272 ; GFX900-NEXT: s_waitcnt vmcnt(0)
273 ; GFX900-NEXT: s_setpc_b64 s[30:31]
275 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__3_2:
277 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX90A-NEXT: ;;#ASMSTART
279 ; GFX90A-NEXT: ; def v1
280 ; GFX90A-NEXT: ;;#ASMEND
281 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
282 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
283 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
284 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
285 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
287 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_2:
289 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX940-NEXT: ;;#ASMSTART
291 ; GFX940-NEXT: ; def v1
292 ; GFX940-NEXT: ;;#ASMEND
293 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
294 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
295 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
296 ; GFX940-NEXT: s_waitcnt vmcnt(0)
297 ; GFX940-NEXT: s_setpc_b64 s[30:31]
298 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
299 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
300 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 2>
301 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
305 define void @v_shuffle_v2bf16_v2bf16__3_3(ptr addrspace(1) inreg %ptr) {
306 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__3_3:
308 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GFX900-NEXT: ;;#ASMSTART
310 ; GFX900-NEXT: ; def v1
311 ; GFX900-NEXT: ;;#ASMEND
312 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
313 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
314 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
315 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
316 ; GFX900-NEXT: s_waitcnt vmcnt(0)
317 ; GFX900-NEXT: s_setpc_b64 s[30:31]
319 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__3_3:
321 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX90A-NEXT: ;;#ASMSTART
323 ; GFX90A-NEXT: ; def v1
324 ; GFX90A-NEXT: ;;#ASMEND
325 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
326 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
327 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
328 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
329 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
330 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
332 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_3:
334 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX940-NEXT: ;;#ASMSTART
336 ; GFX940-NEXT: ; def v1
337 ; GFX940-NEXT: ;;#ASMEND
338 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
339 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
340 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
341 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
342 ; GFX940-NEXT: s_waitcnt vmcnt(0)
343 ; GFX940-NEXT: s_setpc_b64 s[30:31]
344 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
345 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
346 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 3>
347 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
351 define void @v_shuffle_v2bf16_v2bf16__u_0(ptr addrspace(1) inreg %ptr) {
352 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__u_0:
354 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX900-NEXT: ;;#ASMSTART
356 ; GFX900-NEXT: ; def v1
357 ; GFX900-NEXT: ;;#ASMEND
358 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
359 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1
360 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
361 ; GFX900-NEXT: s_waitcnt vmcnt(0)
362 ; GFX900-NEXT: s_setpc_b64 s[30:31]
364 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__u_0:
366 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367 ; GFX90A-NEXT: ;;#ASMSTART
368 ; GFX90A-NEXT: ; def v1
369 ; GFX90A-NEXT: ;;#ASMEND
370 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
371 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1
372 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
373 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
374 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
376 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__u_0:
378 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX940-NEXT: ;;#ASMSTART
380 ; GFX940-NEXT: ; def v1
381 ; GFX940-NEXT: ;;#ASMEND
382 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
383 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1
384 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
385 ; GFX940-NEXT: s_waitcnt vmcnt(0)
386 ; GFX940-NEXT: s_setpc_b64 s[30:31]
387 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
388 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 poison, i32 0>
389 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
393 define void @v_shuffle_v2bf16_v2bf16__0_0(ptr addrspace(1) inreg %ptr) {
394 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__0_0:
396 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397 ; GFX900-NEXT: ;;#ASMSTART
398 ; GFX900-NEXT: ; def v1
399 ; GFX900-NEXT: ;;#ASMEND
400 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
401 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
402 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
403 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
404 ; GFX900-NEXT: s_waitcnt vmcnt(0)
405 ; GFX900-NEXT: s_setpc_b64 s[30:31]
407 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__0_0:
409 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410 ; GFX90A-NEXT: ;;#ASMSTART
411 ; GFX90A-NEXT: ; def v1
412 ; GFX90A-NEXT: ;;#ASMEND
413 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
414 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
415 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
416 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
417 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
418 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
420 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_0:
422 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423 ; GFX940-NEXT: ;;#ASMSTART
424 ; GFX940-NEXT: ; def v1
425 ; GFX940-NEXT: ;;#ASMEND
426 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
427 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
428 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
429 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
430 ; GFX940-NEXT: s_waitcnt vmcnt(0)
431 ; GFX940-NEXT: s_setpc_b64 s[30:31]
432 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
433 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> zeroinitializer
434 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
438 define void @v_shuffle_v2bf16_v2bf16__1_0(ptr addrspace(1) inreg %ptr) {
439 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__1_0:
441 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; GFX900-NEXT: ;;#ASMSTART
443 ; GFX900-NEXT: ; def v1
444 ; GFX900-NEXT: ;;#ASMEND
445 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
446 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
447 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
448 ; GFX900-NEXT: s_waitcnt vmcnt(0)
449 ; GFX900-NEXT: s_setpc_b64 s[30:31]
451 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__1_0:
453 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX90A-NEXT: ;;#ASMSTART
455 ; GFX90A-NEXT: ; def v1
456 ; GFX90A-NEXT: ;;#ASMEND
457 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
458 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
459 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
460 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
461 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
463 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_0:
465 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466 ; GFX940-NEXT: ;;#ASMSTART
467 ; GFX940-NEXT: ; def v1
468 ; GFX940-NEXT: ;;#ASMEND
469 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
470 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
471 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
472 ; GFX940-NEXT: s_waitcnt vmcnt(0)
473 ; GFX940-NEXT: s_setpc_b64 s[30:31]
474 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
475 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 0>
476 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
480 define void @v_shuffle_v2bf16_v2bf16__2_0(ptr addrspace(1) inreg %ptr) {
481 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__2_0:
483 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484 ; GFX900-NEXT: ;;#ASMSTART
485 ; GFX900-NEXT: ; def v1
486 ; GFX900-NEXT: ;;#ASMEND
487 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
488 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1
489 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
490 ; GFX900-NEXT: s_waitcnt vmcnt(0)
491 ; GFX900-NEXT: s_setpc_b64 s[30:31]
493 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__2_0:
495 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496 ; GFX90A-NEXT: ;;#ASMSTART
497 ; GFX90A-NEXT: ; def v1
498 ; GFX90A-NEXT: ;;#ASMEND
499 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
500 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1
501 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
502 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
503 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
505 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__2_0:
507 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; GFX940-NEXT: ;;#ASMSTART
509 ; GFX940-NEXT: ; def v1
510 ; GFX940-NEXT: ;;#ASMEND
511 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
512 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1
513 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
514 ; GFX940-NEXT: s_waitcnt vmcnt(0)
515 ; GFX940-NEXT: s_setpc_b64 s[30:31]
516 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
517 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 0>
518 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
522 define void @v_shuffle_v2bf16_v2bf16__u_1(ptr addrspace(1) inreg %ptr) {
523 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__u_1:
525 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
527 ; GFX900-NEXT: ;;#ASMSTART
528 ; GFX900-NEXT: ; def v1
529 ; GFX900-NEXT: ;;#ASMEND
530 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
531 ; GFX900-NEXT: s_waitcnt vmcnt(0)
532 ; GFX900-NEXT: s_setpc_b64 s[30:31]
534 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__u_1:
536 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
538 ; GFX90A-NEXT: ;;#ASMSTART
539 ; GFX90A-NEXT: ; def v1
540 ; GFX90A-NEXT: ;;#ASMEND
541 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
542 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
543 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
545 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__u_1:
547 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
549 ; GFX940-NEXT: ;;#ASMSTART
550 ; GFX940-NEXT: ; def v1
551 ; GFX940-NEXT: ;;#ASMEND
552 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
553 ; GFX940-NEXT: s_waitcnt vmcnt(0)
554 ; GFX940-NEXT: s_setpc_b64 s[30:31]
555 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
556 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 poison, i32 1>
557 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
561 define void @v_shuffle_v2bf16_v2bf16__0_1(ptr addrspace(1) inreg %ptr) {
562 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__0_1:
564 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
566 ; GFX900-NEXT: ;;#ASMSTART
567 ; GFX900-NEXT: ; def v1
568 ; GFX900-NEXT: ;;#ASMEND
569 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
570 ; GFX900-NEXT: s_waitcnt vmcnt(0)
571 ; GFX900-NEXT: s_setpc_b64 s[30:31]
573 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__0_1:
575 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
577 ; GFX90A-NEXT: ;;#ASMSTART
578 ; GFX90A-NEXT: ; def v1
579 ; GFX90A-NEXT: ;;#ASMEND
580 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
581 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
582 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
584 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_1:
586 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
587 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
588 ; GFX940-NEXT: ;;#ASMSTART
589 ; GFX940-NEXT: ; def v1
590 ; GFX940-NEXT: ;;#ASMEND
591 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
592 ; GFX940-NEXT: s_waitcnt vmcnt(0)
593 ; GFX940-NEXT: s_setpc_b64 s[30:31]
594 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
595 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 1>
596 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
600 define void @v_shuffle_v2bf16_v2bf16__1_1(ptr addrspace(1) inreg %ptr) {
601 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__1_1:
603 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; GFX900-NEXT: ;;#ASMSTART
605 ; GFX900-NEXT: ; def v1
606 ; GFX900-NEXT: ;;#ASMEND
607 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
608 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
609 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
610 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
611 ; GFX900-NEXT: s_waitcnt vmcnt(0)
612 ; GFX900-NEXT: s_setpc_b64 s[30:31]
614 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__1_1:
616 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617 ; GFX90A-NEXT: ;;#ASMSTART
618 ; GFX90A-NEXT: ; def v1
619 ; GFX90A-NEXT: ;;#ASMEND
620 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
621 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
622 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
623 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
624 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
625 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
627 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_1:
629 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630 ; GFX940-NEXT: ;;#ASMSTART
631 ; GFX940-NEXT: ; def v1
632 ; GFX940-NEXT: ;;#ASMEND
633 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
634 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
635 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
636 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
637 ; GFX940-NEXT: s_waitcnt vmcnt(0)
638 ; GFX940-NEXT: s_setpc_b64 s[30:31]
639 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
640 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 1>
641 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
645 define void @v_shuffle_v2bf16_v2bf16__2_1(ptr addrspace(1) inreg %ptr) {
646 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__2_1:
648 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
650 ; GFX900-NEXT: ;;#ASMSTART
651 ; GFX900-NEXT: ; def v1
652 ; GFX900-NEXT: ;;#ASMEND
653 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
654 ; GFX900-NEXT: s_waitcnt vmcnt(0)
655 ; GFX900-NEXT: s_setpc_b64 s[30:31]
657 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__2_1:
659 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
661 ; GFX90A-NEXT: ;;#ASMSTART
662 ; GFX90A-NEXT: ; def v1
663 ; GFX90A-NEXT: ;;#ASMEND
664 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
665 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
666 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
668 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__2_1:
670 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
671 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
672 ; GFX940-NEXT: ;;#ASMSTART
673 ; GFX940-NEXT: ; def v1
674 ; GFX940-NEXT: ;;#ASMEND
675 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
676 ; GFX940-NEXT: s_waitcnt vmcnt(0)
677 ; GFX940-NEXT: s_setpc_b64 s[30:31]
678 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
679 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 1>
680 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
684 define void @v_shuffle_v2bf16_v2bf16__u_2(ptr addrspace(1) inreg %ptr) {
685 ; GFX9-LABEL: v_shuffle_v2bf16_v2bf16__u_2:
687 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688 ; GFX9-NEXT: s_setpc_b64 s[30:31]
689 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
690 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 poison, i32 2>
691 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
695 define void @v_shuffle_v2bf16_v2bf16__0_2(ptr addrspace(1) inreg %ptr) {
696 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__0_2:
698 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
700 ; GFX900-NEXT: ;;#ASMSTART
701 ; GFX900-NEXT: ; def v1
702 ; GFX900-NEXT: ;;#ASMEND
703 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
704 ; GFX900-NEXT: s_waitcnt vmcnt(0)
705 ; GFX900-NEXT: s_setpc_b64 s[30:31]
707 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__0_2:
709 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
711 ; GFX90A-NEXT: ;;#ASMSTART
712 ; GFX90A-NEXT: ; def v1
713 ; GFX90A-NEXT: ;;#ASMEND
714 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
715 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
716 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
718 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_2:
720 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
722 ; GFX940-NEXT: ;;#ASMSTART
723 ; GFX940-NEXT: ; def v1
724 ; GFX940-NEXT: ;;#ASMEND
725 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
726 ; GFX940-NEXT: s_waitcnt vmcnt(0)
727 ; GFX940-NEXT: s_setpc_b64 s[30:31]
728 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
729 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 2>
730 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
734 define void @v_shuffle_v2bf16_v2bf16__1_2(ptr addrspace(1) inreg %ptr) {
735 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__1_2:
737 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738 ; GFX900-NEXT: ;;#ASMSTART
739 ; GFX900-NEXT: ; def v1
740 ; GFX900-NEXT: ;;#ASMEND
741 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
742 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
743 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
744 ; GFX900-NEXT: s_waitcnt vmcnt(0)
745 ; GFX900-NEXT: s_setpc_b64 s[30:31]
747 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__1_2:
749 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750 ; GFX90A-NEXT: ;;#ASMSTART
751 ; GFX90A-NEXT: ; def v1
752 ; GFX90A-NEXT: ;;#ASMEND
753 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
754 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
755 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
756 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
757 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
759 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_2:
761 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762 ; GFX940-NEXT: ;;#ASMSTART
763 ; GFX940-NEXT: ; def v1
764 ; GFX940-NEXT: ;;#ASMEND
765 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
766 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
767 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
768 ; GFX940-NEXT: s_waitcnt vmcnt(0)
769 ; GFX940-NEXT: s_setpc_b64 s[30:31]
770 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
771 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 2>
772 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
776 define void @v_shuffle_v2bf16_v2bf16__2_2(ptr addrspace(1) inreg %ptr) {
777 ; GFX9-LABEL: v_shuffle_v2bf16_v2bf16__2_2:
779 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
780 ; GFX9-NEXT: s_setpc_b64 s[30:31]
781 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
782 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 2>
783 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
787 define void @v_shuffle_v2bf16_v2bf16__u_3(ptr addrspace(1) inreg %ptr) {
788 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__u_3:
790 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
791 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
792 ; GFX900-NEXT: ;;#ASMSTART
793 ; GFX900-NEXT: ; def v1
794 ; GFX900-NEXT: ;;#ASMEND
795 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
796 ; GFX900-NEXT: s_waitcnt vmcnt(0)
797 ; GFX900-NEXT: s_setpc_b64 s[30:31]
799 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__u_3:
801 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
803 ; GFX90A-NEXT: ;;#ASMSTART
804 ; GFX90A-NEXT: ; def v1
805 ; GFX90A-NEXT: ;;#ASMEND
806 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
807 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
808 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
810 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__u_3:
812 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
814 ; GFX940-NEXT: ;;#ASMSTART
815 ; GFX940-NEXT: ; def v1
816 ; GFX940-NEXT: ;;#ASMEND
817 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
818 ; GFX940-NEXT: s_waitcnt vmcnt(0)
819 ; GFX940-NEXT: s_setpc_b64 s[30:31]
820 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
821 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
822 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 poison, i32 3>
823 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
827 define void @v_shuffle_v2bf16_v2bf16__0_3(ptr addrspace(1) inreg %ptr) {
828 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__0_3:
830 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
831 ; GFX900-NEXT: ;;#ASMSTART
832 ; GFX900-NEXT: ; def v1
833 ; GFX900-NEXT: ;;#ASMEND
834 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
835 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
836 ; GFX900-NEXT: ;;#ASMSTART
837 ; GFX900-NEXT: ; def v2
838 ; GFX900-NEXT: ;;#ASMEND
839 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v2
840 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
841 ; GFX900-NEXT: s_waitcnt vmcnt(0)
842 ; GFX900-NEXT: s_setpc_b64 s[30:31]
844 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__0_3:
846 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847 ; GFX90A-NEXT: ;;#ASMSTART
848 ; GFX90A-NEXT: ; def v1
849 ; GFX90A-NEXT: ;;#ASMEND
850 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
851 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
852 ; GFX90A-NEXT: ;;#ASMSTART
853 ; GFX90A-NEXT: ; def v2
854 ; GFX90A-NEXT: ;;#ASMEND
855 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v2
856 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
857 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
858 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
860 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_3:
862 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; GFX940-NEXT: ;;#ASMSTART
864 ; GFX940-NEXT: ; def v1
865 ; GFX940-NEXT: ;;#ASMEND
866 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
867 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
868 ; GFX940-NEXT: ;;#ASMSTART
869 ; GFX940-NEXT: ; def v2
870 ; GFX940-NEXT: ;;#ASMEND
871 ; GFX940-NEXT: s_nop 0
872 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2
873 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
874 ; GFX940-NEXT: s_waitcnt vmcnt(0)
875 ; GFX940-NEXT: s_setpc_b64 s[30:31]
876 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
877 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
878 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 0, i32 3>
879 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
883 define void @v_shuffle_v2bf16_v2bf16__1_3(ptr addrspace(1) inreg %ptr) {
884 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__1_3:
886 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
887 ; GFX900-NEXT: ;;#ASMSTART
888 ; GFX900-NEXT: ; def v1
889 ; GFX900-NEXT: ;;#ASMEND
890 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
891 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
892 ; GFX900-NEXT: ;;#ASMSTART
893 ; GFX900-NEXT: ; def v2
894 ; GFX900-NEXT: ;;#ASMEND
895 ; GFX900-NEXT: v_perm_b32 v1, v2, v1, s4
896 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
897 ; GFX900-NEXT: s_waitcnt vmcnt(0)
898 ; GFX900-NEXT: s_setpc_b64 s[30:31]
900 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__1_3:
902 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903 ; GFX90A-NEXT: ;;#ASMSTART
904 ; GFX90A-NEXT: ; def v1
905 ; GFX90A-NEXT: ;;#ASMEND
906 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
907 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
908 ; GFX90A-NEXT: ;;#ASMSTART
909 ; GFX90A-NEXT: ; def v2
910 ; GFX90A-NEXT: ;;#ASMEND
911 ; GFX90A-NEXT: v_perm_b32 v1, v2, v1, s4
912 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
913 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
914 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
916 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_3:
918 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
919 ; GFX940-NEXT: ;;#ASMSTART
920 ; GFX940-NEXT: ; def v1
921 ; GFX940-NEXT: ;;#ASMEND
922 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
923 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
924 ; GFX940-NEXT: ;;#ASMSTART
925 ; GFX940-NEXT: ; def v2
926 ; GFX940-NEXT: ;;#ASMEND
927 ; GFX940-NEXT: s_nop 0
928 ; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2
929 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
930 ; GFX940-NEXT: s_waitcnt vmcnt(0)
931 ; GFX940-NEXT: s_setpc_b64 s[30:31]
932 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
933 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
934 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 1, i32 3>
935 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
939 define void @v_shuffle_v2bf16_v2bf16__2_3(ptr addrspace(1) inreg %ptr) {
940 ; GFX900-LABEL: v_shuffle_v2bf16_v2bf16__2_3:
942 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
944 ; GFX900-NEXT: ;;#ASMSTART
945 ; GFX900-NEXT: ; def v1
946 ; GFX900-NEXT: ;;#ASMEND
947 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
948 ; GFX900-NEXT: s_waitcnt vmcnt(0)
949 ; GFX900-NEXT: s_setpc_b64 s[30:31]
951 ; GFX90A-LABEL: v_shuffle_v2bf16_v2bf16__2_3:
953 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
955 ; GFX90A-NEXT: ;;#ASMSTART
956 ; GFX90A-NEXT: ; def v1
957 ; GFX90A-NEXT: ;;#ASMEND
958 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
959 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
960 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
962 ; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__2_3:
964 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
965 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
966 ; GFX940-NEXT: ;;#ASMSTART
967 ; GFX940-NEXT: ; def v1
968 ; GFX940-NEXT: ;;#ASMEND
969 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
970 ; GFX940-NEXT: s_waitcnt vmcnt(0)
971 ; GFX940-NEXT: s_setpc_b64 s[30:31]
972 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
973 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
974 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 2, i32 3>
975 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4
979 define void @s_shuffle_v2bf16_v2bf16__u_u() {
980 ; GFX9-LABEL: s_shuffle_v2bf16_v2bf16__u_u:
982 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; GFX9-NEXT: ;;#ASMSTART
984 ; GFX9-NEXT: ; use s8
985 ; GFX9-NEXT: ;;#ASMEND
986 ; GFX9-NEXT: s_setpc_b64 s[30:31]
987 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
988 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> poison
989 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
993 define void @s_shuffle_v2bf16_v2bf16__0_u() {
994 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__0_u:
996 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GFX900-NEXT: ;;#ASMSTART
998 ; GFX900-NEXT: ; def s8
999 ; GFX900-NEXT: ;;#ASMEND
1000 ; GFX900-NEXT: ;;#ASMSTART
1001 ; GFX900-NEXT: ; use s8
1002 ; GFX900-NEXT: ;;#ASMEND
1003 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1005 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__0_u:
1007 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008 ; GFX90A-NEXT: ;;#ASMSTART
1009 ; GFX90A-NEXT: ; def s8
1010 ; GFX90A-NEXT: ;;#ASMEND
1011 ; GFX90A-NEXT: ;;#ASMSTART
1012 ; GFX90A-NEXT: ; use s8
1013 ; GFX90A-NEXT: ;;#ASMEND
1014 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1016 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_u:
1018 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019 ; GFX940-NEXT: ;;#ASMSTART
1020 ; GFX940-NEXT: ; def s8
1021 ; GFX940-NEXT: ;;#ASMEND
1022 ; GFX940-NEXT: s_nop 0
1023 ; GFX940-NEXT: ;;#ASMSTART
1024 ; GFX940-NEXT: ; use s8
1025 ; GFX940-NEXT: ;;#ASMEND
1026 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1027 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1028 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
1029 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1033 define void @s_shuffle_v2bf16_v2bf16__1_u() {
1034 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__1_u:
1036 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037 ; GFX900-NEXT: ;;#ASMSTART
1038 ; GFX900-NEXT: ; def s4
1039 ; GFX900-NEXT: ;;#ASMEND
1040 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
1041 ; GFX900-NEXT: ;;#ASMSTART
1042 ; GFX900-NEXT: ; use s8
1043 ; GFX900-NEXT: ;;#ASMEND
1044 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1046 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__1_u:
1048 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1049 ; GFX90A-NEXT: ;;#ASMSTART
1050 ; GFX90A-NEXT: ; def s4
1051 ; GFX90A-NEXT: ;;#ASMEND
1052 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
1053 ; GFX90A-NEXT: ;;#ASMSTART
1054 ; GFX90A-NEXT: ; use s8
1055 ; GFX90A-NEXT: ;;#ASMEND
1056 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1058 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_u:
1060 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1061 ; GFX940-NEXT: ;;#ASMSTART
1062 ; GFX940-NEXT: ; def s0
1063 ; GFX940-NEXT: ;;#ASMEND
1064 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
1065 ; GFX940-NEXT: ;;#ASMSTART
1066 ; GFX940-NEXT: ; use s8
1067 ; GFX940-NEXT: ;;#ASMEND
1068 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1069 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1070 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
1071 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1075 define void @s_shuffle_v2bf16_v2bf16__2_u() {
1076 ; GFX9-LABEL: s_shuffle_v2bf16_v2bf16__2_u:
1078 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1079 ; GFX9-NEXT: ;;#ASMSTART
1080 ; GFX9-NEXT: ; use s8
1081 ; GFX9-NEXT: ;;#ASMEND
1082 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1083 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1084 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 poison>
1085 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1089 define void @s_shuffle_v2bf16_v2bf16__3_u() {
1090 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__3_u:
1092 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1093 ; GFX900-NEXT: ;;#ASMSTART
1094 ; GFX900-NEXT: ; def s4
1095 ; GFX900-NEXT: ;;#ASMEND
1096 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
1097 ; GFX900-NEXT: ;;#ASMSTART
1098 ; GFX900-NEXT: ; use s8
1099 ; GFX900-NEXT: ;;#ASMEND
1100 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1102 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__3_u:
1104 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105 ; GFX90A-NEXT: ;;#ASMSTART
1106 ; GFX90A-NEXT: ; def s4
1107 ; GFX90A-NEXT: ;;#ASMEND
1108 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
1109 ; GFX90A-NEXT: ;;#ASMSTART
1110 ; GFX90A-NEXT: ; use s8
1111 ; GFX90A-NEXT: ;;#ASMEND
1112 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1114 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_u:
1116 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1117 ; GFX940-NEXT: ;;#ASMSTART
1118 ; GFX940-NEXT: ; def s0
1119 ; GFX940-NEXT: ;;#ASMEND
1120 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
1121 ; GFX940-NEXT: ;;#ASMSTART
1122 ; GFX940-NEXT: ; use s8
1123 ; GFX940-NEXT: ;;#ASMEND
1124 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1125 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1126 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1127 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 poison>
1128 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1132 define void @s_shuffle_v2bf16_v2bf16__3_0() {
1133 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__3_0:
1135 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136 ; GFX900-NEXT: ;;#ASMSTART
1137 ; GFX900-NEXT: ; def s5
1138 ; GFX900-NEXT: ;;#ASMEND
1139 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
1140 ; GFX900-NEXT: ;;#ASMSTART
1141 ; GFX900-NEXT: ; def s4
1142 ; GFX900-NEXT: ;;#ASMEND
1143 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1144 ; GFX900-NEXT: ;;#ASMSTART
1145 ; GFX900-NEXT: ; use s8
1146 ; GFX900-NEXT: ;;#ASMEND
1147 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1149 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__3_0:
1151 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152 ; GFX90A-NEXT: ;;#ASMSTART
1153 ; GFX90A-NEXT: ; def s5
1154 ; GFX90A-NEXT: ;;#ASMEND
1155 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
1156 ; GFX90A-NEXT: ;;#ASMSTART
1157 ; GFX90A-NEXT: ; def s4
1158 ; GFX90A-NEXT: ;;#ASMEND
1159 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1160 ; GFX90A-NEXT: ;;#ASMSTART
1161 ; GFX90A-NEXT: ; use s8
1162 ; GFX90A-NEXT: ;;#ASMEND
1163 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1165 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_0:
1167 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GFX940-NEXT: ;;#ASMSTART
1169 ; GFX940-NEXT: ; def s1
1170 ; GFX940-NEXT: ;;#ASMEND
1171 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
1172 ; GFX940-NEXT: ;;#ASMSTART
1173 ; GFX940-NEXT: ; def s0
1174 ; GFX940-NEXT: ;;#ASMEND
1175 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
1176 ; GFX940-NEXT: ;;#ASMSTART
1177 ; GFX940-NEXT: ; use s8
1178 ; GFX940-NEXT: ;;#ASMEND
1179 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1180 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1181 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1182 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 0>
1183 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1187 define void @s_shuffle_v2bf16_v2bf16__3_1() {
1188 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__3_1:
1190 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1191 ; GFX900-NEXT: ;;#ASMSTART
1192 ; GFX900-NEXT: ; def s4
1193 ; GFX900-NEXT: ;;#ASMEND
1194 ; GFX900-NEXT: ;;#ASMSTART
1195 ; GFX900-NEXT: ; def s5
1196 ; GFX900-NEXT: ;;#ASMEND
1197 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
1198 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
1199 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1200 ; GFX900-NEXT: ;;#ASMSTART
1201 ; GFX900-NEXT: ; use s8
1202 ; GFX900-NEXT: ;;#ASMEND
1203 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1205 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__3_1:
1207 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1208 ; GFX90A-NEXT: ;;#ASMSTART
1209 ; GFX90A-NEXT: ; def s4
1210 ; GFX90A-NEXT: ;;#ASMEND
1211 ; GFX90A-NEXT: ;;#ASMSTART
1212 ; GFX90A-NEXT: ; def s5
1213 ; GFX90A-NEXT: ;;#ASMEND
1214 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
1215 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
1216 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1217 ; GFX90A-NEXT: ;;#ASMSTART
1218 ; GFX90A-NEXT: ; use s8
1219 ; GFX90A-NEXT: ;;#ASMEND
1220 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1222 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_1:
1224 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225 ; GFX940-NEXT: ;;#ASMSTART
1226 ; GFX940-NEXT: ; def s0
1227 ; GFX940-NEXT: ;;#ASMEND
1228 ; GFX940-NEXT: ;;#ASMSTART
1229 ; GFX940-NEXT: ; def s1
1230 ; GFX940-NEXT: ;;#ASMEND
1231 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
1232 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
1233 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
1234 ; GFX940-NEXT: ;;#ASMSTART
1235 ; GFX940-NEXT: ; use s8
1236 ; GFX940-NEXT: ;;#ASMEND
1237 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1238 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1239 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1240 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 1>
1241 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1245 define void @s_shuffle_v2bf16_v2bf16__3_2() {
1246 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__3_2:
1248 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249 ; GFX900-NEXT: ;;#ASMSTART
1250 ; GFX900-NEXT: ; def s4
1251 ; GFX900-NEXT: ;;#ASMEND
1252 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
1253 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1254 ; GFX900-NEXT: ;;#ASMSTART
1255 ; GFX900-NEXT: ; use s8
1256 ; GFX900-NEXT: ;;#ASMEND
1257 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1259 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__3_2:
1261 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262 ; GFX90A-NEXT: ;;#ASMSTART
1263 ; GFX90A-NEXT: ; def s4
1264 ; GFX90A-NEXT: ;;#ASMEND
1265 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
1266 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1267 ; GFX90A-NEXT: ;;#ASMSTART
1268 ; GFX90A-NEXT: ; use s8
1269 ; GFX90A-NEXT: ;;#ASMEND
1270 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1272 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_2:
1274 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275 ; GFX940-NEXT: ;;#ASMSTART
1276 ; GFX940-NEXT: ; def s0
1277 ; GFX940-NEXT: ;;#ASMEND
1278 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
1279 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
1280 ; GFX940-NEXT: ;;#ASMSTART
1281 ; GFX940-NEXT: ; use s8
1282 ; GFX940-NEXT: ;;#ASMEND
1283 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1284 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1285 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1286 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 2>
1287 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1291 define void @s_shuffle_v2bf16_v2bf16__3_3() {
1292 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__3_3:
1294 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295 ; GFX900-NEXT: ;;#ASMSTART
1296 ; GFX900-NEXT: ; def s4
1297 ; GFX900-NEXT: ;;#ASMEND
1298 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
1299 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1300 ; GFX900-NEXT: ;;#ASMSTART
1301 ; GFX900-NEXT: ; use s8
1302 ; GFX900-NEXT: ;;#ASMEND
1303 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1305 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__3_3:
1307 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308 ; GFX90A-NEXT: ;;#ASMSTART
1309 ; GFX90A-NEXT: ; def s4
1310 ; GFX90A-NEXT: ;;#ASMEND
1311 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
1312 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1313 ; GFX90A-NEXT: ;;#ASMSTART
1314 ; GFX90A-NEXT: ; use s8
1315 ; GFX90A-NEXT: ;;#ASMEND
1316 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1318 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_3:
1320 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321 ; GFX940-NEXT: ;;#ASMSTART
1322 ; GFX940-NEXT: ; def s0
1323 ; GFX940-NEXT: ;;#ASMEND
1324 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
1325 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
1326 ; GFX940-NEXT: ;;#ASMSTART
1327 ; GFX940-NEXT: ; use s8
1328 ; GFX940-NEXT: ;;#ASMEND
1329 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1330 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1331 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1332 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 3, i32 3>
1333 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1337 define void @s_shuffle_v2bf16_v2bf16__u_0() {
1338 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__u_0:
1340 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1341 ; GFX900-NEXT: ;;#ASMSTART
1342 ; GFX900-NEXT: ; def s4
1343 ; GFX900-NEXT: ;;#ASMEND
1344 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
1345 ; GFX900-NEXT: ;;#ASMSTART
1346 ; GFX900-NEXT: ; use s8
1347 ; GFX900-NEXT: ;;#ASMEND
1348 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1350 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__u_0:
1352 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353 ; GFX90A-NEXT: ;;#ASMSTART
1354 ; GFX90A-NEXT: ; def s4
1355 ; GFX90A-NEXT: ;;#ASMEND
1356 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
1357 ; GFX90A-NEXT: ;;#ASMSTART
1358 ; GFX90A-NEXT: ; use s8
1359 ; GFX90A-NEXT: ;;#ASMEND
1360 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1362 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__u_0:
1364 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365 ; GFX940-NEXT: ;;#ASMSTART
1366 ; GFX940-NEXT: ; def s0
1367 ; GFX940-NEXT: ;;#ASMEND
1368 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
1369 ; GFX940-NEXT: ;;#ASMSTART
1370 ; GFX940-NEXT: ; use s8
1371 ; GFX940-NEXT: ;;#ASMEND
1372 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1373 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1374 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 poison, i32 0>
1375 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1379 define void @s_shuffle_v2bf16_v2bf16__0_0() {
1380 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__0_0:
1382 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1383 ; GFX900-NEXT: ;;#ASMSTART
1384 ; GFX900-NEXT: ; def s4
1385 ; GFX900-NEXT: ;;#ASMEND
1386 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1387 ; GFX900-NEXT: ;;#ASMSTART
1388 ; GFX900-NEXT: ; use s8
1389 ; GFX900-NEXT: ;;#ASMEND
1390 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1392 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__0_0:
1394 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395 ; GFX90A-NEXT: ;;#ASMSTART
1396 ; GFX90A-NEXT: ; def s4
1397 ; GFX90A-NEXT: ;;#ASMEND
1398 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1399 ; GFX90A-NEXT: ;;#ASMSTART
1400 ; GFX90A-NEXT: ; use s8
1401 ; GFX90A-NEXT: ;;#ASMEND
1402 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1404 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_0:
1406 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1407 ; GFX940-NEXT: ;;#ASMSTART
1408 ; GFX940-NEXT: ; def s0
1409 ; GFX940-NEXT: ;;#ASMEND
1410 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
1411 ; GFX940-NEXT: ;;#ASMSTART
1412 ; GFX940-NEXT: ; use s8
1413 ; GFX940-NEXT: ;;#ASMEND
1414 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1415 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1416 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> zeroinitializer
1417 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1421 define void @s_shuffle_v2bf16_v2bf16__1_0() {
1422 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__1_0:
1424 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GFX900-NEXT: ;;#ASMSTART
1426 ; GFX900-NEXT: ; def s4
1427 ; GFX900-NEXT: ;;#ASMEND
1428 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
1429 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1430 ; GFX900-NEXT: ;;#ASMSTART
1431 ; GFX900-NEXT: ; use s8
1432 ; GFX900-NEXT: ;;#ASMEND
1433 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1435 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__1_0:
1437 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1438 ; GFX90A-NEXT: ;;#ASMSTART
1439 ; GFX90A-NEXT: ; def s4
1440 ; GFX90A-NEXT: ;;#ASMEND
1441 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
1442 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1443 ; GFX90A-NEXT: ;;#ASMSTART
1444 ; GFX90A-NEXT: ; use s8
1445 ; GFX90A-NEXT: ;;#ASMEND
1446 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1448 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_0:
1450 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1451 ; GFX940-NEXT: ;;#ASMSTART
1452 ; GFX940-NEXT: ; def s0
1453 ; GFX940-NEXT: ;;#ASMEND
1454 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
1455 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
1456 ; GFX940-NEXT: ;;#ASMSTART
1457 ; GFX940-NEXT: ; use s8
1458 ; GFX940-NEXT: ;;#ASMEND
1459 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1460 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1461 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 0>
1462 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1466 define void @s_shuffle_v2bf16_v2bf16__2_0() {
1467 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__2_0:
1469 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1470 ; GFX900-NEXT: ;;#ASMSTART
1471 ; GFX900-NEXT: ; def s4
1472 ; GFX900-NEXT: ;;#ASMEND
1473 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
1474 ; GFX900-NEXT: ;;#ASMSTART
1475 ; GFX900-NEXT: ; use s8
1476 ; GFX900-NEXT: ;;#ASMEND
1477 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1479 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__2_0:
1481 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1482 ; GFX90A-NEXT: ;;#ASMSTART
1483 ; GFX90A-NEXT: ; def s4
1484 ; GFX90A-NEXT: ;;#ASMEND
1485 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
1486 ; GFX90A-NEXT: ;;#ASMSTART
1487 ; GFX90A-NEXT: ; use s8
1488 ; GFX90A-NEXT: ;;#ASMEND
1489 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1491 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__2_0:
1493 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1494 ; GFX940-NEXT: ;;#ASMSTART
1495 ; GFX940-NEXT: ; def s0
1496 ; GFX940-NEXT: ;;#ASMEND
1497 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
1498 ; GFX940-NEXT: ;;#ASMSTART
1499 ; GFX940-NEXT: ; use s8
1500 ; GFX940-NEXT: ;;#ASMEND
1501 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1502 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1503 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 0>
1504 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1508 define void @s_shuffle_v2bf16_v2bf16__u_1() {
1509 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__u_1:
1511 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1512 ; GFX900-NEXT: ;;#ASMSTART
1513 ; GFX900-NEXT: ; def s8
1514 ; GFX900-NEXT: ;;#ASMEND
1515 ; GFX900-NEXT: ;;#ASMSTART
1516 ; GFX900-NEXT: ; use s8
1517 ; GFX900-NEXT: ;;#ASMEND
1518 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1520 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__u_1:
1522 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523 ; GFX90A-NEXT: ;;#ASMSTART
1524 ; GFX90A-NEXT: ; def s8
1525 ; GFX90A-NEXT: ;;#ASMEND
1526 ; GFX90A-NEXT: ;;#ASMSTART
1527 ; GFX90A-NEXT: ; use s8
1528 ; GFX90A-NEXT: ;;#ASMEND
1529 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1531 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__u_1:
1533 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1534 ; GFX940-NEXT: ;;#ASMSTART
1535 ; GFX940-NEXT: ; def s8
1536 ; GFX940-NEXT: ;;#ASMEND
1537 ; GFX940-NEXT: s_nop 0
1538 ; GFX940-NEXT: ;;#ASMSTART
1539 ; GFX940-NEXT: ; use s8
1540 ; GFX940-NEXT: ;;#ASMEND
1541 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1542 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1543 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 poison, i32 1>
1544 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1548 define void @s_shuffle_v2bf16_v2bf16__0_1() {
1549 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__0_1:
1551 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1552 ; GFX900-NEXT: ;;#ASMSTART
1553 ; GFX900-NEXT: ; def s8
1554 ; GFX900-NEXT: ;;#ASMEND
1555 ; GFX900-NEXT: ;;#ASMSTART
1556 ; GFX900-NEXT: ; use s8
1557 ; GFX900-NEXT: ;;#ASMEND
1558 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1560 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__0_1:
1562 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1563 ; GFX90A-NEXT: ;;#ASMSTART
1564 ; GFX90A-NEXT: ; def s8
1565 ; GFX90A-NEXT: ;;#ASMEND
1566 ; GFX90A-NEXT: ;;#ASMSTART
1567 ; GFX90A-NEXT: ; use s8
1568 ; GFX90A-NEXT: ;;#ASMEND
1569 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1571 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_1:
1573 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574 ; GFX940-NEXT: ;;#ASMSTART
1575 ; GFX940-NEXT: ; def s8
1576 ; GFX940-NEXT: ;;#ASMEND
1577 ; GFX940-NEXT: s_nop 0
1578 ; GFX940-NEXT: ;;#ASMSTART
1579 ; GFX940-NEXT: ; use s8
1580 ; GFX940-NEXT: ;;#ASMEND
1581 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1582 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1583 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 1>
1584 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1588 define void @s_shuffle_v2bf16_v2bf16__1_1() {
1589 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__1_1:
1591 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592 ; GFX900-NEXT: ;;#ASMSTART
1593 ; GFX900-NEXT: ; def s4
1594 ; GFX900-NEXT: ;;#ASMEND
1595 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
1596 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1597 ; GFX900-NEXT: ;;#ASMSTART
1598 ; GFX900-NEXT: ; use s8
1599 ; GFX900-NEXT: ;;#ASMEND
1600 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1602 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__1_1:
1604 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1605 ; GFX90A-NEXT: ;;#ASMSTART
1606 ; GFX90A-NEXT: ; def s4
1607 ; GFX90A-NEXT: ;;#ASMEND
1608 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
1609 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1610 ; GFX90A-NEXT: ;;#ASMSTART
1611 ; GFX90A-NEXT: ; use s8
1612 ; GFX90A-NEXT: ;;#ASMEND
1613 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1615 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_1:
1617 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618 ; GFX940-NEXT: ;;#ASMSTART
1619 ; GFX940-NEXT: ; def s0
1620 ; GFX940-NEXT: ;;#ASMEND
1621 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
1622 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
1623 ; GFX940-NEXT: ;;#ASMSTART
1624 ; GFX940-NEXT: ; use s8
1625 ; GFX940-NEXT: ;;#ASMEND
1626 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1627 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1628 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 1>
1629 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1633 define void @s_shuffle_v2bf16_v2bf16__2_1() {
1634 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__2_1:
1636 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1637 ; GFX900-NEXT: ;;#ASMSTART
1638 ; GFX900-NEXT: ; def s8
1639 ; GFX900-NEXT: ;;#ASMEND
1640 ; GFX900-NEXT: ;;#ASMSTART
1641 ; GFX900-NEXT: ; use s8
1642 ; GFX900-NEXT: ;;#ASMEND
1643 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1645 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__2_1:
1647 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648 ; GFX90A-NEXT: ;;#ASMSTART
1649 ; GFX90A-NEXT: ; def s8
1650 ; GFX90A-NEXT: ;;#ASMEND
1651 ; GFX90A-NEXT: ;;#ASMSTART
1652 ; GFX90A-NEXT: ; use s8
1653 ; GFX90A-NEXT: ;;#ASMEND
1654 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1656 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__2_1:
1658 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1659 ; GFX940-NEXT: ;;#ASMSTART
1660 ; GFX940-NEXT: ; def s8
1661 ; GFX940-NEXT: ;;#ASMEND
1662 ; GFX940-NEXT: s_nop 0
1663 ; GFX940-NEXT: ;;#ASMSTART
1664 ; GFX940-NEXT: ; use s8
1665 ; GFX940-NEXT: ;;#ASMEND
1666 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1667 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1668 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 1>
1669 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1673 define void @s_shuffle_v2bf16_v2bf16__u_2() {
1674 ; GFX9-LABEL: s_shuffle_v2bf16_v2bf16__u_2:
1676 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677 ; GFX9-NEXT: ;;#ASMSTART
1678 ; GFX9-NEXT: ; use s8
1679 ; GFX9-NEXT: ;;#ASMEND
1680 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1681 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1682 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 poison, i32 2>
1683 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1687 define void @s_shuffle_v2bf16_v2bf16__0_2() {
1688 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__0_2:
1690 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1691 ; GFX900-NEXT: ;;#ASMSTART
1692 ; GFX900-NEXT: ; def s8
1693 ; GFX900-NEXT: ;;#ASMEND
1694 ; GFX900-NEXT: ;;#ASMSTART
1695 ; GFX900-NEXT: ; use s8
1696 ; GFX900-NEXT: ;;#ASMEND
1697 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1699 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__0_2:
1701 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1702 ; GFX90A-NEXT: ;;#ASMSTART
1703 ; GFX90A-NEXT: ; def s8
1704 ; GFX90A-NEXT: ;;#ASMEND
1705 ; GFX90A-NEXT: ;;#ASMSTART
1706 ; GFX90A-NEXT: ; use s8
1707 ; GFX90A-NEXT: ;;#ASMEND
1708 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1710 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_2:
1712 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; GFX940-NEXT: ;;#ASMSTART
1714 ; GFX940-NEXT: ; def s8
1715 ; GFX940-NEXT: ;;#ASMEND
1716 ; GFX940-NEXT: s_nop 0
1717 ; GFX940-NEXT: ;;#ASMSTART
1718 ; GFX940-NEXT: ; use s8
1719 ; GFX940-NEXT: ;;#ASMEND
1720 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1721 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1722 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 2>
1723 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1727 define void @s_shuffle_v2bf16_v2bf16__1_2() {
1728 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__1_2:
1730 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1731 ; GFX900-NEXT: ;;#ASMSTART
1732 ; GFX900-NEXT: ; def s4
1733 ; GFX900-NEXT: ;;#ASMEND
1734 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
1735 ; GFX900-NEXT: ;;#ASMSTART
1736 ; GFX900-NEXT: ; use s8
1737 ; GFX900-NEXT: ;;#ASMEND
1738 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1740 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__1_2:
1742 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1743 ; GFX90A-NEXT: ;;#ASMSTART
1744 ; GFX90A-NEXT: ; def s4
1745 ; GFX90A-NEXT: ;;#ASMEND
1746 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
1747 ; GFX90A-NEXT: ;;#ASMSTART
1748 ; GFX90A-NEXT: ; use s8
1749 ; GFX90A-NEXT: ;;#ASMEND
1750 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1752 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_2:
1754 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1755 ; GFX940-NEXT: ;;#ASMSTART
1756 ; GFX940-NEXT: ; def s0
1757 ; GFX940-NEXT: ;;#ASMEND
1758 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
1759 ; GFX940-NEXT: ;;#ASMSTART
1760 ; GFX940-NEXT: ; use s8
1761 ; GFX940-NEXT: ;;#ASMEND
1762 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1763 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1764 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 2>
1765 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1769 define void @s_shuffle_v2bf16_v2bf16__2_2() {
1770 ; GFX9-LABEL: s_shuffle_v2bf16_v2bf16__2_2:
1772 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1773 ; GFX9-NEXT: ;;#ASMSTART
1774 ; GFX9-NEXT: ; use s8
1775 ; GFX9-NEXT: ;;#ASMEND
1776 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1777 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1778 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 2>
1779 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1783 define void @s_shuffle_v2bf16_v2bf16__u_3() {
1784 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__u_3:
1786 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1787 ; GFX900-NEXT: ;;#ASMSTART
1788 ; GFX900-NEXT: ; def s8
1789 ; GFX900-NEXT: ;;#ASMEND
1790 ; GFX900-NEXT: ;;#ASMSTART
1791 ; GFX900-NEXT: ; use s8
1792 ; GFX900-NEXT: ;;#ASMEND
1793 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1795 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__u_3:
1797 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1798 ; GFX90A-NEXT: ;;#ASMSTART
1799 ; GFX90A-NEXT: ; def s8
1800 ; GFX90A-NEXT: ;;#ASMEND
1801 ; GFX90A-NEXT: ;;#ASMSTART
1802 ; GFX90A-NEXT: ; use s8
1803 ; GFX90A-NEXT: ;;#ASMEND
1804 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1806 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__u_3:
1808 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1809 ; GFX940-NEXT: ;;#ASMSTART
1810 ; GFX940-NEXT: ; def s8
1811 ; GFX940-NEXT: ;;#ASMEND
1812 ; GFX940-NEXT: s_nop 0
1813 ; GFX940-NEXT: ;;#ASMSTART
1814 ; GFX940-NEXT: ; use s8
1815 ; GFX940-NEXT: ;;#ASMEND
1816 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1817 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1818 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1819 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 poison, i32 3>
1820 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1824 define void @s_shuffle_v2bf16_v2bf16__0_3() {
1825 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__0_3:
1827 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1828 ; GFX900-NEXT: ;;#ASMSTART
1829 ; GFX900-NEXT: ; def s5
1830 ; GFX900-NEXT: ;;#ASMEND
1831 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
1832 ; GFX900-NEXT: ;;#ASMSTART
1833 ; GFX900-NEXT: ; def s4
1834 ; GFX900-NEXT: ;;#ASMEND
1835 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
1836 ; GFX900-NEXT: ;;#ASMSTART
1837 ; GFX900-NEXT: ; use s8
1838 ; GFX900-NEXT: ;;#ASMEND
1839 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1841 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__0_3:
1843 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1844 ; GFX90A-NEXT: ;;#ASMSTART
1845 ; GFX90A-NEXT: ; def s5
1846 ; GFX90A-NEXT: ;;#ASMEND
1847 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
1848 ; GFX90A-NEXT: ;;#ASMSTART
1849 ; GFX90A-NEXT: ; def s4
1850 ; GFX90A-NEXT: ;;#ASMEND
1851 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
1852 ; GFX90A-NEXT: ;;#ASMSTART
1853 ; GFX90A-NEXT: ; use s8
1854 ; GFX90A-NEXT: ;;#ASMEND
1855 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1857 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_3:
1859 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1860 ; GFX940-NEXT: ;;#ASMSTART
1861 ; GFX940-NEXT: ; def s1
1862 ; GFX940-NEXT: ;;#ASMEND
1863 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
1864 ; GFX940-NEXT: ;;#ASMSTART
1865 ; GFX940-NEXT: ; def s0
1866 ; GFX940-NEXT: ;;#ASMEND
1867 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
1868 ; GFX940-NEXT: ;;#ASMSTART
1869 ; GFX940-NEXT: ; use s8
1870 ; GFX940-NEXT: ;;#ASMEND
1871 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1872 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1873 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1874 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 0, i32 3>
1875 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1879 define void @s_shuffle_v2bf16_v2bf16__1_3() {
1880 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__1_3:
1882 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883 ; GFX900-NEXT: ;;#ASMSTART
1884 ; GFX900-NEXT: ; def s4
1885 ; GFX900-NEXT: ;;#ASMEND
1886 ; GFX900-NEXT: ;;#ASMSTART
1887 ; GFX900-NEXT: ; def s5
1888 ; GFX900-NEXT: ;;#ASMEND
1889 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
1890 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
1891 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
1892 ; GFX900-NEXT: ;;#ASMSTART
1893 ; GFX900-NEXT: ; use s8
1894 ; GFX900-NEXT: ;;#ASMEND
1895 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1897 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__1_3:
1899 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1900 ; GFX90A-NEXT: ;;#ASMSTART
1901 ; GFX90A-NEXT: ; def s4
1902 ; GFX90A-NEXT: ;;#ASMEND
1903 ; GFX90A-NEXT: ;;#ASMSTART
1904 ; GFX90A-NEXT: ; def s5
1905 ; GFX90A-NEXT: ;;#ASMEND
1906 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
1907 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
1908 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
1909 ; GFX90A-NEXT: ;;#ASMSTART
1910 ; GFX90A-NEXT: ; use s8
1911 ; GFX90A-NEXT: ;;#ASMEND
1912 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1914 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_3:
1916 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1917 ; GFX940-NEXT: ;;#ASMSTART
1918 ; GFX940-NEXT: ; def s0
1919 ; GFX940-NEXT: ;;#ASMEND
1920 ; GFX940-NEXT: ;;#ASMSTART
1921 ; GFX940-NEXT: ; def s1
1922 ; GFX940-NEXT: ;;#ASMEND
1923 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
1924 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
1925 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
1926 ; GFX940-NEXT: ;;#ASMSTART
1927 ; GFX940-NEXT: ; use s8
1928 ; GFX940-NEXT: ;;#ASMEND
1929 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1930 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1931 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1932 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 1, i32 3>
1933 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1937 define void @s_shuffle_v2bf16_v2bf16__2_3() {
1938 ; GFX900-LABEL: s_shuffle_v2bf16_v2bf16__2_3:
1940 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1941 ; GFX900-NEXT: ;;#ASMSTART
1942 ; GFX900-NEXT: ; def s8
1943 ; GFX900-NEXT: ;;#ASMEND
1944 ; GFX900-NEXT: ;;#ASMSTART
1945 ; GFX900-NEXT: ; use s8
1946 ; GFX900-NEXT: ;;#ASMEND
1947 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1949 ; GFX90A-LABEL: s_shuffle_v2bf16_v2bf16__2_3:
1951 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1952 ; GFX90A-NEXT: ;;#ASMSTART
1953 ; GFX90A-NEXT: ; def s8
1954 ; GFX90A-NEXT: ;;#ASMEND
1955 ; GFX90A-NEXT: ;;#ASMSTART
1956 ; GFX90A-NEXT: ; use s8
1957 ; GFX90A-NEXT: ;;#ASMEND
1958 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1960 ; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__2_3:
1962 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1963 ; GFX940-NEXT: ;;#ASMSTART
1964 ; GFX940-NEXT: ; def s8
1965 ; GFX940-NEXT: ;;#ASMEND
1966 ; GFX940-NEXT: s_nop 0
1967 ; GFX940-NEXT: ;;#ASMSTART
1968 ; GFX940-NEXT: ; use s8
1969 ; GFX940-NEXT: ;;#ASMEND
1970 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1971 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
1972 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
1973 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> <i32 2, i32 3>
1974 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf)
1977 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1978 ; GFX90APLUS: {{.*}}