1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v3bf16_v2bf16__u_u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v3bf16_v2bf16__u_u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
13 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> poison
14 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
18 define void @v_shuffle_v3bf16_v2bf16__0_u_u(ptr addrspace(1) inreg %ptr) {
19 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__0_u_u:
21 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
23 ; GFX900-NEXT: ;;#ASMSTART
24 ; GFX900-NEXT: ; def v1
25 ; GFX900-NEXT: ;;#ASMEND
26 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
27 ; GFX900-NEXT: s_waitcnt vmcnt(0)
28 ; GFX900-NEXT: s_setpc_b64 s[30:31]
30 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__0_u_u:
32 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
34 ; GFX90A-NEXT: ;;#ASMSTART
35 ; GFX90A-NEXT: ; def v1
36 ; GFX90A-NEXT: ;;#ASMEND
37 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
38 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
39 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
41 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_u_u:
43 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
45 ; GFX940-NEXT: ;;#ASMSTART
46 ; GFX940-NEXT: ; def v1
47 ; GFX940-NEXT: ;;#ASMEND
48 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
49 ; GFX940-NEXT: s_waitcnt vmcnt(0)
50 ; GFX940-NEXT: s_setpc_b64 s[30:31]
51 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
52 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
53 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
57 define void @v_shuffle_v3bf16_v2bf16__1_u_u(ptr addrspace(1) inreg %ptr) {
58 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__1_u_u:
60 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX900-NEXT: ;;#ASMSTART
62 ; GFX900-NEXT: ; def v1
63 ; GFX900-NEXT: ;;#ASMEND
64 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
65 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
66 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
67 ; GFX900-NEXT: s_waitcnt vmcnt(0)
68 ; GFX900-NEXT: s_setpc_b64 s[30:31]
70 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__1_u_u:
72 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX90A-NEXT: ;;#ASMSTART
74 ; GFX90A-NEXT: ; def v1
75 ; GFX90A-NEXT: ;;#ASMEND
76 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
77 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
78 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
79 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
80 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
82 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_u_u:
84 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX940-NEXT: ;;#ASMSTART
86 ; GFX940-NEXT: ; def v1
87 ; GFX940-NEXT: ;;#ASMEND
88 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
89 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
90 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
91 ; GFX940-NEXT: s_waitcnt vmcnt(0)
92 ; GFX940-NEXT: s_setpc_b64 s[30:31]
93 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
94 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
95 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
99 define void @v_shuffle_v3bf16_v2bf16__2_u_u(ptr addrspace(1) inreg %ptr) {
100 ; GFX9-LABEL: v_shuffle_v3bf16_v2bf16__2_u_u:
102 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
104 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
105 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
106 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
110 define void @v_shuffle_v3bf16_v2bf16__3_u_u(ptr addrspace(1) inreg %ptr) {
111 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_u:
113 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX900-NEXT: ;;#ASMSTART
115 ; GFX900-NEXT: ; def v1
116 ; GFX900-NEXT: ;;#ASMEND
117 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
118 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
119 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
120 ; GFX900-NEXT: s_waitcnt vmcnt(0)
121 ; GFX900-NEXT: s_setpc_b64 s[30:31]
123 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_u:
125 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX90A-NEXT: ;;#ASMSTART
127 ; GFX90A-NEXT: ; def v1
128 ; GFX90A-NEXT: ;;#ASMEND
129 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
130 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
131 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
132 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
133 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
135 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_u:
137 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX940-NEXT: ;;#ASMSTART
139 ; GFX940-NEXT: ; def v1
140 ; GFX940-NEXT: ;;#ASMEND
141 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
142 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
143 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
144 ; GFX940-NEXT: s_waitcnt vmcnt(0)
145 ; GFX940-NEXT: s_setpc_b64 s[30:31]
146 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
147 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
148 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 poison>
149 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
153 define void @v_shuffle_v3bf16_v2bf16__3_0_u(ptr addrspace(1) inreg %ptr) {
154 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_0_u:
156 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX900-NEXT: ;;#ASMSTART
158 ; GFX900-NEXT: ; def v1
159 ; GFX900-NEXT: ;;#ASMEND
160 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
161 ; GFX900-NEXT: ;;#ASMSTART
162 ; GFX900-NEXT: ; def v2
163 ; GFX900-NEXT: ;;#ASMEND
164 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16
165 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
166 ; GFX900-NEXT: s_waitcnt vmcnt(0)
167 ; GFX900-NEXT: s_setpc_b64 s[30:31]
169 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_0_u:
171 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX90A-NEXT: ;;#ASMSTART
173 ; GFX90A-NEXT: ; def v1
174 ; GFX90A-NEXT: ;;#ASMEND
175 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
176 ; GFX90A-NEXT: ;;#ASMSTART
177 ; GFX90A-NEXT: ; def v2
178 ; GFX90A-NEXT: ;;#ASMEND
179 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v2, 16
180 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
181 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
182 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
184 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_u:
186 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX940-NEXT: ;;#ASMSTART
188 ; GFX940-NEXT: ; def v1
189 ; GFX940-NEXT: ;;#ASMEND
190 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
191 ; GFX940-NEXT: ;;#ASMSTART
192 ; GFX940-NEXT: ; def v2
193 ; GFX940-NEXT: ;;#ASMEND
194 ; GFX940-NEXT: s_nop 0
195 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16
196 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
197 ; GFX940-NEXT: s_waitcnt vmcnt(0)
198 ; GFX940-NEXT: s_setpc_b64 s[30:31]
199 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
200 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
201 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 poison>
202 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
206 define void @v_shuffle_v3bf16_v2bf16__3_1_u(ptr addrspace(1) inreg %ptr) {
207 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_1_u:
209 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX900-NEXT: ;;#ASMSTART
211 ; GFX900-NEXT: ; def v1
212 ; GFX900-NEXT: ;;#ASMEND
213 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
214 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
215 ; GFX900-NEXT: ;;#ASMSTART
216 ; GFX900-NEXT: ; def v2
217 ; GFX900-NEXT: ;;#ASMEND
218 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
219 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
220 ; GFX900-NEXT: s_waitcnt vmcnt(0)
221 ; GFX900-NEXT: s_setpc_b64 s[30:31]
223 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_1_u:
225 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226 ; GFX90A-NEXT: ;;#ASMSTART
227 ; GFX90A-NEXT: ; def v1
228 ; GFX90A-NEXT: ;;#ASMEND
229 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
230 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
231 ; GFX90A-NEXT: ;;#ASMSTART
232 ; GFX90A-NEXT: ; def v2
233 ; GFX90A-NEXT: ;;#ASMEND
234 ; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4
235 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
236 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
237 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
239 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_u:
241 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX940-NEXT: ;;#ASMSTART
243 ; GFX940-NEXT: ; def v1
244 ; GFX940-NEXT: ;;#ASMEND
245 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
246 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
247 ; GFX940-NEXT: ;;#ASMSTART
248 ; GFX940-NEXT: ; def v2
249 ; GFX940-NEXT: ;;#ASMEND
250 ; GFX940-NEXT: s_nop 0
251 ; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2
252 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
253 ; GFX940-NEXT: s_waitcnt vmcnt(0)
254 ; GFX940-NEXT: s_setpc_b64 s[30:31]
255 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
256 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
257 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 poison>
258 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
262 define void @v_shuffle_v3bf16_v2bf16__3_2_u(ptr addrspace(1) inreg %ptr) {
263 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_2_u:
265 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX900-NEXT: ;;#ASMSTART
267 ; GFX900-NEXT: ; def v1
268 ; GFX900-NEXT: ;;#ASMEND
269 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
270 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
271 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
272 ; GFX900-NEXT: s_waitcnt vmcnt(0)
273 ; GFX900-NEXT: s_setpc_b64 s[30:31]
275 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_2_u:
277 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX90A-NEXT: ;;#ASMSTART
279 ; GFX90A-NEXT: ; def v1
280 ; GFX90A-NEXT: ;;#ASMEND
281 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
282 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
283 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
284 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
285 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
287 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_u:
289 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX940-NEXT: ;;#ASMSTART
291 ; GFX940-NEXT: ; def v1
292 ; GFX940-NEXT: ;;#ASMEND
293 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
294 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
295 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
296 ; GFX940-NEXT: s_waitcnt vmcnt(0)
297 ; GFX940-NEXT: s_setpc_b64 s[30:31]
298 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
299 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
300 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 poison>
301 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
305 define void @v_shuffle_v3bf16_v2bf16__3_3_u(ptr addrspace(1) inreg %ptr) {
306 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_3_u:
308 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GFX900-NEXT: ;;#ASMSTART
310 ; GFX900-NEXT: ; def v1
311 ; GFX900-NEXT: ;;#ASMEND
312 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
313 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
314 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
315 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
316 ; GFX900-NEXT: s_waitcnt vmcnt(0)
317 ; GFX900-NEXT: s_setpc_b64 s[30:31]
319 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_3_u:
321 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX90A-NEXT: ;;#ASMSTART
323 ; GFX90A-NEXT: ; def v1
324 ; GFX90A-NEXT: ;;#ASMEND
325 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
326 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
327 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
328 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
329 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
330 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
332 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_u:
334 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX940-NEXT: ;;#ASMSTART
336 ; GFX940-NEXT: ; def v1
337 ; GFX940-NEXT: ;;#ASMEND
338 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
339 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
340 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
341 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
342 ; GFX940-NEXT: s_waitcnt vmcnt(0)
343 ; GFX940-NEXT: s_setpc_b64 s[30:31]
344 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
345 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
346 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 poison>
347 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
351 define void @v_shuffle_v3bf16_v2bf16__3_3_0(ptr addrspace(1) inreg %ptr) {
352 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_3_0:
354 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
356 ; GFX900-NEXT: ;;#ASMSTART
357 ; GFX900-NEXT: ; def v1
358 ; GFX900-NEXT: ;;#ASMEND
359 ; GFX900-NEXT: ;;#ASMSTART
360 ; GFX900-NEXT: ; def v2
361 ; GFX900-NEXT: ;;#ASMEND
362 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
363 ; GFX900-NEXT: v_perm_b32 v2, v2, v2, s4
364 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
365 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
366 ; GFX900-NEXT: s_waitcnt vmcnt(0)
367 ; GFX900-NEXT: s_setpc_b64 s[30:31]
369 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_3_0:
371 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
373 ; GFX90A-NEXT: ;;#ASMSTART
374 ; GFX90A-NEXT: ; def v1
375 ; GFX90A-NEXT: ;;#ASMEND
376 ; GFX90A-NEXT: ;;#ASMSTART
377 ; GFX90A-NEXT: ; def v2
378 ; GFX90A-NEXT: ;;#ASMEND
379 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
380 ; GFX90A-NEXT: v_perm_b32 v2, v2, v2, s4
381 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
382 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
383 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
384 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
386 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_0:
388 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
390 ; GFX940-NEXT: ;;#ASMSTART
391 ; GFX940-NEXT: ; def v1
392 ; GFX940-NEXT: ;;#ASMEND
393 ; GFX940-NEXT: ;;#ASMSTART
394 ; GFX940-NEXT: ; def v2
395 ; GFX940-NEXT: ;;#ASMEND
396 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
397 ; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2
398 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
399 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
400 ; GFX940-NEXT: s_waitcnt vmcnt(0)
401 ; GFX940-NEXT: s_setpc_b64 s[30:31]
402 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
403 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
404 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 0>
405 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
409 define void @v_shuffle_v3bf16_v2bf16__3_3_1(ptr addrspace(1) inreg %ptr) {
410 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_3_1:
412 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
414 ; GFX900-NEXT: ;;#ASMSTART
415 ; GFX900-NEXT: ; def v1
416 ; GFX900-NEXT: ;;#ASMEND
417 ; GFX900-NEXT: ;;#ASMSTART
418 ; GFX900-NEXT: ; def v2
419 ; GFX900-NEXT: ;;#ASMEND
420 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
421 ; GFX900-NEXT: v_perm_b32 v2, v2, v2, s4
422 ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
423 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
424 ; GFX900-NEXT: s_waitcnt vmcnt(0)
425 ; GFX900-NEXT: s_setpc_b64 s[30:31]
427 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_3_1:
429 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
431 ; GFX90A-NEXT: ;;#ASMSTART
432 ; GFX90A-NEXT: ; def v1
433 ; GFX90A-NEXT: ;;#ASMEND
434 ; GFX90A-NEXT: ;;#ASMSTART
435 ; GFX90A-NEXT: ; def v2
436 ; GFX90A-NEXT: ;;#ASMEND
437 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
438 ; GFX90A-NEXT: v_perm_b32 v2, v2, v2, s4
439 ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
440 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
441 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
442 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
444 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_1:
446 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
448 ; GFX940-NEXT: ;;#ASMSTART
449 ; GFX940-NEXT: ; def v1
450 ; GFX940-NEXT: ;;#ASMEND
451 ; GFX940-NEXT: ;;#ASMSTART
452 ; GFX940-NEXT: ; def v2
453 ; GFX940-NEXT: ;;#ASMEND
454 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
455 ; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2
456 ; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1
457 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
458 ; GFX940-NEXT: s_waitcnt vmcnt(0)
459 ; GFX940-NEXT: s_setpc_b64 s[30:31]
460 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
461 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
462 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 1>
463 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
467 define void @v_shuffle_v3bf16_v2bf16__3_3_2(ptr addrspace(1) inreg %ptr) {
468 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_3_2:
470 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
472 ; GFX900-NEXT: ;;#ASMSTART
473 ; GFX900-NEXT: ; def v1
474 ; GFX900-NEXT: ;;#ASMEND
475 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
476 ; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4
477 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
478 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
479 ; GFX900-NEXT: s_waitcnt vmcnt(0)
480 ; GFX900-NEXT: s_setpc_b64 s[30:31]
482 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_3_2:
484 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
486 ; GFX90A-NEXT: ;;#ASMSTART
487 ; GFX90A-NEXT: ; def v1
488 ; GFX90A-NEXT: ;;#ASMEND
489 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
490 ; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4
491 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
492 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
493 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
494 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
496 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_2:
498 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
500 ; GFX940-NEXT: ;;#ASMSTART
501 ; GFX940-NEXT: ; def v1
502 ; GFX940-NEXT: ;;#ASMEND
503 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
504 ; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2
505 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
506 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
507 ; GFX940-NEXT: s_waitcnt vmcnt(0)
508 ; GFX940-NEXT: s_setpc_b64 s[30:31]
509 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
510 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
511 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 2>
512 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
516 define void @v_shuffle_v3bf16_v2bf16__3_3_3(ptr addrspace(1) inreg %ptr) {
517 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_3_3:
519 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520 ; GFX900-NEXT: ;;#ASMSTART
521 ; GFX900-NEXT: ; def v1
522 ; GFX900-NEXT: ;;#ASMEND
523 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
524 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
525 ; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4
526 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
527 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
528 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
529 ; GFX900-NEXT: s_waitcnt vmcnt(0)
530 ; GFX900-NEXT: s_setpc_b64 s[30:31]
532 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_3_3:
534 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; GFX90A-NEXT: ;;#ASMSTART
536 ; GFX90A-NEXT: ; def v1
537 ; GFX90A-NEXT: ;;#ASMEND
538 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
539 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
540 ; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4
541 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
542 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
543 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
544 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
545 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
547 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_3:
549 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550 ; GFX940-NEXT: ;;#ASMSTART
551 ; GFX940-NEXT: ; def v1
552 ; GFX940-NEXT: ;;#ASMEND
553 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
554 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
555 ; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2
556 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
557 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
558 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
559 ; GFX940-NEXT: s_waitcnt vmcnt(0)
560 ; GFX940-NEXT: s_setpc_b64 s[30:31]
561 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
562 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
563 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 3>
564 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
568 define void @v_shuffle_v3bf16_v2bf16__u_0_0(ptr addrspace(1) inreg %ptr) {
569 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__u_0_0:
571 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
573 ; GFX900-NEXT: ;;#ASMSTART
574 ; GFX900-NEXT: ; def v1
575 ; GFX900-NEXT: ;;#ASMEND
576 ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
577 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
578 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
579 ; GFX900-NEXT: s_waitcnt vmcnt(0)
580 ; GFX900-NEXT: s_setpc_b64 s[30:31]
582 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__u_0_0:
584 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
586 ; GFX90A-NEXT: ;;#ASMSTART
587 ; GFX90A-NEXT: ; def v1
588 ; GFX90A-NEXT: ;;#ASMEND
589 ; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
590 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
591 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
592 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
593 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
595 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__u_0_0:
597 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
599 ; GFX940-NEXT: ;;#ASMSTART
600 ; GFX940-NEXT: ; def v1
601 ; GFX940-NEXT: ;;#ASMEND
602 ; GFX940-NEXT: s_nop 0
603 ; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1
604 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
605 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
606 ; GFX940-NEXT: s_waitcnt vmcnt(0)
607 ; GFX940-NEXT: s_setpc_b64 s[30:31]
608 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
609 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 poison, i32 0, i32 0>
610 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
614 define void @v_shuffle_v3bf16_v2bf16__0_0_0(ptr addrspace(1) inreg %ptr) {
615 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__0_0_0:
617 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
619 ; GFX900-NEXT: ;;#ASMSTART
620 ; GFX900-NEXT: ; def v1
621 ; GFX900-NEXT: ;;#ASMEND
622 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
623 ; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4
624 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
625 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
626 ; GFX900-NEXT: s_waitcnt vmcnt(0)
627 ; GFX900-NEXT: s_setpc_b64 s[30:31]
629 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__0_0_0:
631 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
632 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
633 ; GFX90A-NEXT: ;;#ASMSTART
634 ; GFX90A-NEXT: ; def v1
635 ; GFX90A-NEXT: ;;#ASMEND
636 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
637 ; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4
638 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
639 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
640 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
641 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
643 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_0_0:
645 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
647 ; GFX940-NEXT: ;;#ASMSTART
648 ; GFX940-NEXT: ; def v1
649 ; GFX940-NEXT: ;;#ASMEND
650 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
651 ; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2
652 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
653 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
654 ; GFX940-NEXT: s_waitcnt vmcnt(0)
655 ; GFX940-NEXT: s_setpc_b64 s[30:31]
656 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
657 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> zeroinitializer
658 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
662 define void @v_shuffle_v3bf16_v2bf16__1_0_0(ptr addrspace(1) inreg %ptr) {
663 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__1_0_0:
665 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
667 ; GFX900-NEXT: ;;#ASMSTART
668 ; GFX900-NEXT: ; def v1
669 ; GFX900-NEXT: ;;#ASMEND
670 ; GFX900-NEXT: v_alignbit_b32 v2, v1, v1, 16
671 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
672 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
673 ; GFX900-NEXT: s_waitcnt vmcnt(0)
674 ; GFX900-NEXT: s_setpc_b64 s[30:31]
676 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__1_0_0:
678 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
680 ; GFX90A-NEXT: ;;#ASMSTART
681 ; GFX90A-NEXT: ; def v1
682 ; GFX90A-NEXT: ;;#ASMEND
683 ; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16
684 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
685 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
686 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
687 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
689 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_0_0:
691 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
693 ; GFX940-NEXT: ;;#ASMSTART
694 ; GFX940-NEXT: ; def v1
695 ; GFX940-NEXT: ;;#ASMEND
696 ; GFX940-NEXT: s_nop 0
697 ; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16
698 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
699 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
700 ; GFX940-NEXT: s_waitcnt vmcnt(0)
701 ; GFX940-NEXT: s_setpc_b64 s[30:31]
702 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
703 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 0, i32 0>
704 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
708 define void @v_shuffle_v3bf16_v2bf16__2_0_0(ptr addrspace(1) inreg %ptr) {
709 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__2_0_0:
711 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
712 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
713 ; GFX900-NEXT: ;;#ASMSTART
714 ; GFX900-NEXT: ; def v1
715 ; GFX900-NEXT: ;;#ASMEND
716 ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
717 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
718 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
719 ; GFX900-NEXT: s_waitcnt vmcnt(0)
720 ; GFX900-NEXT: s_setpc_b64 s[30:31]
722 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__2_0_0:
724 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
726 ; GFX90A-NEXT: ;;#ASMSTART
727 ; GFX90A-NEXT: ; def v1
728 ; GFX90A-NEXT: ;;#ASMEND
729 ; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1
730 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
731 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
732 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
733 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
735 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__2_0_0:
737 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
739 ; GFX940-NEXT: ;;#ASMSTART
740 ; GFX940-NEXT: ; def v1
741 ; GFX940-NEXT: ;;#ASMEND
742 ; GFX940-NEXT: s_nop 0
743 ; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1
744 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
745 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
746 ; GFX940-NEXT: s_waitcnt vmcnt(0)
747 ; GFX940-NEXT: s_setpc_b64 s[30:31]
748 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
749 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 0>
750 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
754 define void @v_shuffle_v3bf16_v2bf16__3_0_0(ptr addrspace(1) inreg %ptr) {
755 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_0_0:
757 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
759 ; GFX900-NEXT: ;;#ASMSTART
760 ; GFX900-NEXT: ; def v1
761 ; GFX900-NEXT: ;;#ASMEND
762 ; GFX900-NEXT: ;;#ASMSTART
763 ; GFX900-NEXT: ; def v2
764 ; GFX900-NEXT: ;;#ASMEND
765 ; GFX900-NEXT: v_alignbit_b32 v2, v1, v2, 16
766 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
767 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
768 ; GFX900-NEXT: s_waitcnt vmcnt(0)
769 ; GFX900-NEXT: s_setpc_b64 s[30:31]
771 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_0_0:
773 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
774 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
775 ; GFX90A-NEXT: ;;#ASMSTART
776 ; GFX90A-NEXT: ; def v1
777 ; GFX90A-NEXT: ;;#ASMEND
778 ; GFX90A-NEXT: ;;#ASMSTART
779 ; GFX90A-NEXT: ; def v2
780 ; GFX90A-NEXT: ;;#ASMEND
781 ; GFX90A-NEXT: v_alignbit_b32 v2, v1, v2, 16
782 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
783 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
784 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
785 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
787 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_0:
789 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
791 ; GFX940-NEXT: ;;#ASMSTART
792 ; GFX940-NEXT: ; def v1
793 ; GFX940-NEXT: ;;#ASMEND
794 ; GFX940-NEXT: ;;#ASMSTART
795 ; GFX940-NEXT: ; def v2
796 ; GFX940-NEXT: ;;#ASMEND
797 ; GFX940-NEXT: s_nop 0
798 ; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16
799 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
800 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
801 ; GFX940-NEXT: s_waitcnt vmcnt(0)
802 ; GFX940-NEXT: s_setpc_b64 s[30:31]
803 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
804 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
805 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 0>
806 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
810 define void @v_shuffle_v3bf16_v2bf16__3_u_0(ptr addrspace(1) inreg %ptr) {
811 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_0:
813 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
815 ; GFX900-NEXT: ;;#ASMSTART
816 ; GFX900-NEXT: ; def v1
817 ; GFX900-NEXT: ;;#ASMEND
818 ; GFX900-NEXT: ;;#ASMSTART
819 ; GFX900-NEXT: ; def v2
820 ; GFX900-NEXT: ;;#ASMEND
821 ; GFX900-NEXT: v_alignbit_b32 v2, s4, v2, 16
822 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
823 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
824 ; GFX900-NEXT: s_waitcnt vmcnt(0)
825 ; GFX900-NEXT: s_setpc_b64 s[30:31]
827 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_0:
829 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
831 ; GFX90A-NEXT: ;;#ASMSTART
832 ; GFX90A-NEXT: ; def v1
833 ; GFX90A-NEXT: ;;#ASMEND
834 ; GFX90A-NEXT: ;;#ASMSTART
835 ; GFX90A-NEXT: ; def v2
836 ; GFX90A-NEXT: ;;#ASMEND
837 ; GFX90A-NEXT: v_alignbit_b32 v2, s4, v2, 16
838 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
839 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
840 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
841 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
843 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_0:
845 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
846 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
847 ; GFX940-NEXT: ;;#ASMSTART
848 ; GFX940-NEXT: ; def v1
849 ; GFX940-NEXT: ;;#ASMEND
850 ; GFX940-NEXT: ;;#ASMSTART
851 ; GFX940-NEXT: ; def v2
852 ; GFX940-NEXT: ;;#ASMEND
853 ; GFX940-NEXT: s_nop 0
854 ; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16
855 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
856 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
857 ; GFX940-NEXT: s_waitcnt vmcnt(0)
858 ; GFX940-NEXT: s_setpc_b64 s[30:31]
859 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
860 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
861 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 0>
862 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
866 define void @v_shuffle_v3bf16_v2bf16__3_1_0(ptr addrspace(1) inreg %ptr) {
867 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_1_0:
869 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
871 ; GFX900-NEXT: ;;#ASMSTART
872 ; GFX900-NEXT: ; def v1
873 ; GFX900-NEXT: ;;#ASMEND
874 ; GFX900-NEXT: ;;#ASMSTART
875 ; GFX900-NEXT: ; def v2
876 ; GFX900-NEXT: ;;#ASMEND
877 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
878 ; GFX900-NEXT: v_perm_b32 v2, v1, v2, s4
879 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
880 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
881 ; GFX900-NEXT: s_waitcnt vmcnt(0)
882 ; GFX900-NEXT: s_setpc_b64 s[30:31]
884 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_1_0:
886 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
887 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
888 ; GFX90A-NEXT: ;;#ASMSTART
889 ; GFX90A-NEXT: ; def v1
890 ; GFX90A-NEXT: ;;#ASMEND
891 ; GFX90A-NEXT: ;;#ASMSTART
892 ; GFX90A-NEXT: ; def v2
893 ; GFX90A-NEXT: ;;#ASMEND
894 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
895 ; GFX90A-NEXT: v_perm_b32 v2, v1, v2, s4
896 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
897 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
898 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
899 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
901 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_0:
903 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
905 ; GFX940-NEXT: ;;#ASMSTART
906 ; GFX940-NEXT: ; def v1
907 ; GFX940-NEXT: ;;#ASMEND
908 ; GFX940-NEXT: ;;#ASMSTART
909 ; GFX940-NEXT: ; def v2
910 ; GFX940-NEXT: ;;#ASMEND
911 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
912 ; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2
913 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
914 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
915 ; GFX940-NEXT: s_waitcnt vmcnt(0)
916 ; GFX940-NEXT: s_setpc_b64 s[30:31]
917 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
918 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
919 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 0>
920 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
924 define void @v_shuffle_v3bf16_v2bf16__3_2_0(ptr addrspace(1) inreg %ptr) {
925 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_2_0:
927 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
929 ; GFX900-NEXT: ;;#ASMSTART
930 ; GFX900-NEXT: ; def v1
931 ; GFX900-NEXT: ;;#ASMEND
932 ; GFX900-NEXT: ;;#ASMSTART
933 ; GFX900-NEXT: ; def v2
934 ; GFX900-NEXT: ;;#ASMEND
935 ; GFX900-NEXT: v_alignbit_b32 v2, v2, v2, 16
936 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
937 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
938 ; GFX900-NEXT: s_waitcnt vmcnt(0)
939 ; GFX900-NEXT: s_setpc_b64 s[30:31]
941 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_2_0:
943 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
945 ; GFX90A-NEXT: ;;#ASMSTART
946 ; GFX90A-NEXT: ; def v1
947 ; GFX90A-NEXT: ;;#ASMEND
948 ; GFX90A-NEXT: ;;#ASMSTART
949 ; GFX90A-NEXT: ; def v2
950 ; GFX90A-NEXT: ;;#ASMEND
951 ; GFX90A-NEXT: v_alignbit_b32 v2, v2, v2, 16
952 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
953 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
954 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
955 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
957 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_0:
959 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
961 ; GFX940-NEXT: ;;#ASMSTART
962 ; GFX940-NEXT: ; def v1
963 ; GFX940-NEXT: ;;#ASMEND
964 ; GFX940-NEXT: ;;#ASMSTART
965 ; GFX940-NEXT: ; def v2
966 ; GFX940-NEXT: ;;#ASMEND
967 ; GFX940-NEXT: s_nop 0
968 ; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16
969 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
970 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
971 ; GFX940-NEXT: s_waitcnt vmcnt(0)
972 ; GFX940-NEXT: s_setpc_b64 s[30:31]
973 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
974 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
975 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 0>
976 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
980 define void @v_shuffle_v3bf16_v2bf16__u_1_1(ptr addrspace(1) inreg %ptr) {
981 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__u_1_1:
983 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
985 ; GFX900-NEXT: ;;#ASMSTART
986 ; GFX900-NEXT: ; def v1
987 ; GFX900-NEXT: ;;#ASMEND
988 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
989 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
990 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
991 ; GFX900-NEXT: s_waitcnt vmcnt(0)
992 ; GFX900-NEXT: s_setpc_b64 s[30:31]
994 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__u_1_1:
996 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
998 ; GFX90A-NEXT: ;;#ASMSTART
999 ; GFX90A-NEXT: ; def v1
1000 ; GFX90A-NEXT: ;;#ASMEND
1001 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1002 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1003 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1004 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1005 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1007 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__u_1_1:
1009 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1010 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1011 ; GFX940-NEXT: ;;#ASMSTART
1012 ; GFX940-NEXT: ; def v1
1013 ; GFX940-NEXT: ;;#ASMEND
1014 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1015 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1016 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1017 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1018 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1019 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1020 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 poison, i32 1, i32 1>
1021 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1025 define void @v_shuffle_v3bf16_v2bf16__0_1_1(ptr addrspace(1) inreg %ptr) {
1026 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__0_1_1:
1028 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1029 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1030 ; GFX900-NEXT: ;;#ASMSTART
1031 ; GFX900-NEXT: ; def v1
1032 ; GFX900-NEXT: ;;#ASMEND
1033 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1034 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1035 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1036 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1037 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1039 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__0_1_1:
1041 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1042 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1043 ; GFX90A-NEXT: ;;#ASMSTART
1044 ; GFX90A-NEXT: ; def v1
1045 ; GFX90A-NEXT: ;;#ASMEND
1046 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1047 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1048 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1049 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1050 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1052 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_1_1:
1054 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1056 ; GFX940-NEXT: ;;#ASMSTART
1057 ; GFX940-NEXT: ; def v1
1058 ; GFX940-NEXT: ;;#ASMEND
1059 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1060 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1061 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1062 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1063 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1064 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1065 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 1>
1066 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1070 define void @v_shuffle_v3bf16_v2bf16__1_1_1(ptr addrspace(1) inreg %ptr) {
1071 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__1_1_1:
1073 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074 ; GFX900-NEXT: ;;#ASMSTART
1075 ; GFX900-NEXT: ; def v1
1076 ; GFX900-NEXT: ;;#ASMEND
1077 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1078 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1079 ; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4
1080 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1081 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1082 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1083 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1084 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1086 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__1_1_1:
1088 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1089 ; GFX90A-NEXT: ;;#ASMSTART
1090 ; GFX90A-NEXT: ; def v1
1091 ; GFX90A-NEXT: ;;#ASMEND
1092 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1093 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1094 ; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4
1095 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1096 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1097 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1098 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1099 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1101 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_1_1:
1103 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104 ; GFX940-NEXT: ;;#ASMSTART
1105 ; GFX940-NEXT: ; def v1
1106 ; GFX940-NEXT: ;;#ASMEND
1107 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1108 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1109 ; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2
1110 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1111 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1112 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1113 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1114 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1115 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1116 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
1117 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1121 define void @v_shuffle_v3bf16_v2bf16__2_1_1(ptr addrspace(1) inreg %ptr) {
1122 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__2_1_1:
1124 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1125 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1126 ; GFX900-NEXT: ;;#ASMSTART
1127 ; GFX900-NEXT: ; def v1
1128 ; GFX900-NEXT: ;;#ASMEND
1129 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1130 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1131 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1132 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1133 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1135 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__2_1_1:
1137 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1139 ; GFX90A-NEXT: ;;#ASMSTART
1140 ; GFX90A-NEXT: ; def v1
1141 ; GFX90A-NEXT: ;;#ASMEND
1142 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1143 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1144 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1145 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1146 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1148 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__2_1_1:
1150 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1152 ; GFX940-NEXT: ;;#ASMSTART
1153 ; GFX940-NEXT: ; def v1
1154 ; GFX940-NEXT: ;;#ASMEND
1155 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1156 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1157 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1158 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1159 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1160 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1161 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 1, i32 1>
1162 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1166 define void @v_shuffle_v3bf16_v2bf16__3_1_1(ptr addrspace(1) inreg %ptr) {
1167 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_1_1:
1169 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1170 ; GFX900-NEXT: ;;#ASMSTART
1171 ; GFX900-NEXT: ; def v1
1172 ; GFX900-NEXT: ;;#ASMEND
1173 ; GFX900-NEXT: ;;#ASMSTART
1174 ; GFX900-NEXT: ; def v2
1175 ; GFX900-NEXT: ;;#ASMEND
1176 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1177 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1178 ; GFX900-NEXT: v_perm_b32 v2, v1, v2, s4
1179 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1180 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1181 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1182 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1183 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1185 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_1_1:
1187 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1188 ; GFX90A-NEXT: ;;#ASMSTART
1189 ; GFX90A-NEXT: ; def v1
1190 ; GFX90A-NEXT: ;;#ASMEND
1191 ; GFX90A-NEXT: ;;#ASMSTART
1192 ; GFX90A-NEXT: ; def v2
1193 ; GFX90A-NEXT: ;;#ASMEND
1194 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1195 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1196 ; GFX90A-NEXT: v_perm_b32 v2, v1, v2, s4
1197 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1198 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1199 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1200 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1201 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1203 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_1:
1205 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206 ; GFX940-NEXT: ;;#ASMSTART
1207 ; GFX940-NEXT: ; def v1
1208 ; GFX940-NEXT: ;;#ASMEND
1209 ; GFX940-NEXT: ;;#ASMSTART
1210 ; GFX940-NEXT: ; def v2
1211 ; GFX940-NEXT: ;;#ASMEND
1212 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1213 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1214 ; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2
1215 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1216 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1217 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1218 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1219 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1220 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1221 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1222 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 1>
1223 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1227 define void @v_shuffle_v3bf16_v2bf16__3_u_1(ptr addrspace(1) inreg %ptr) {
1228 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
1230 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1231 ; GFX900-NEXT: ;;#ASMSTART
1232 ; GFX900-NEXT: ; def v1
1233 ; GFX900-NEXT: ;;#ASMEND
1234 ; GFX900-NEXT: ;;#ASMSTART
1235 ; GFX900-NEXT: ; def v2
1236 ; GFX900-NEXT: ;;#ASMEND
1237 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1238 ; GFX900-NEXT: v_alignbit_b32 v2, s4, v2, 16
1239 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1240 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1241 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1242 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1243 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1245 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
1247 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248 ; GFX90A-NEXT: ;;#ASMSTART
1249 ; GFX90A-NEXT: ; def v1
1250 ; GFX90A-NEXT: ;;#ASMEND
1251 ; GFX90A-NEXT: ;;#ASMSTART
1252 ; GFX90A-NEXT: ; def v2
1253 ; GFX90A-NEXT: ;;#ASMEND
1254 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1255 ; GFX90A-NEXT: v_alignbit_b32 v2, s4, v2, 16
1256 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1257 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1258 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1259 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1260 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1262 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
1264 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265 ; GFX940-NEXT: ;;#ASMSTART
1266 ; GFX940-NEXT: ; def v1
1267 ; GFX940-NEXT: ;;#ASMEND
1268 ; GFX940-NEXT: ;;#ASMSTART
1269 ; GFX940-NEXT: ; def v2
1270 ; GFX940-NEXT: ;;#ASMEND
1271 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1272 ; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16
1273 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1274 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1275 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1276 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1277 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1278 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1279 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1280 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 1>
1281 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1285 define void @v_shuffle_v3bf16_v2bf16__3_0_1(ptr addrspace(1) inreg %ptr) {
1286 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_0_1:
1288 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1289 ; GFX900-NEXT: ;;#ASMSTART
1290 ; GFX900-NEXT: ; def v1
1291 ; GFX900-NEXT: ;;#ASMEND
1292 ; GFX900-NEXT: ;;#ASMSTART
1293 ; GFX900-NEXT: ; def v2
1294 ; GFX900-NEXT: ;;#ASMEND
1295 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1296 ; GFX900-NEXT: v_alignbit_b32 v2, v1, v2, 16
1297 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1298 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1299 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1300 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1301 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1303 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_0_1:
1305 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1306 ; GFX90A-NEXT: ;;#ASMSTART
1307 ; GFX90A-NEXT: ; def v1
1308 ; GFX90A-NEXT: ;;#ASMEND
1309 ; GFX90A-NEXT: ;;#ASMSTART
1310 ; GFX90A-NEXT: ; def v2
1311 ; GFX90A-NEXT: ;;#ASMEND
1312 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1313 ; GFX90A-NEXT: v_alignbit_b32 v2, v1, v2, 16
1314 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1315 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1316 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1317 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1318 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1320 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_1:
1322 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1323 ; GFX940-NEXT: ;;#ASMSTART
1324 ; GFX940-NEXT: ; def v1
1325 ; GFX940-NEXT: ;;#ASMEND
1326 ; GFX940-NEXT: ;;#ASMSTART
1327 ; GFX940-NEXT: ; def v2
1328 ; GFX940-NEXT: ;;#ASMEND
1329 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1330 ; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16
1331 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1332 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1333 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1334 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1335 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1336 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1337 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1338 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 1>
1339 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1343 define void @v_shuffle_v3bf16_v2bf16__3_2_1(ptr addrspace(1) inreg %ptr) {
1344 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_2_1:
1346 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1348 ; GFX900-NEXT: ;;#ASMSTART
1349 ; GFX900-NEXT: ; def v1
1350 ; GFX900-NEXT: ;;#ASMEND
1351 ; GFX900-NEXT: ;;#ASMSTART
1352 ; GFX900-NEXT: ; def v2
1353 ; GFX900-NEXT: ;;#ASMEND
1354 ; GFX900-NEXT: v_alignbit_b32 v2, v2, v2, 16
1355 ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
1356 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1357 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1358 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1360 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_2_1:
1362 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1363 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1364 ; GFX90A-NEXT: ;;#ASMSTART
1365 ; GFX90A-NEXT: ; def v1
1366 ; GFX90A-NEXT: ;;#ASMEND
1367 ; GFX90A-NEXT: ;;#ASMSTART
1368 ; GFX90A-NEXT: ; def v2
1369 ; GFX90A-NEXT: ;;#ASMEND
1370 ; GFX90A-NEXT: v_alignbit_b32 v2, v2, v2, 16
1371 ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
1372 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1373 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1374 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1376 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_1:
1378 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1380 ; GFX940-NEXT: ;;#ASMSTART
1381 ; GFX940-NEXT: ; def v1
1382 ; GFX940-NEXT: ;;#ASMEND
1383 ; GFX940-NEXT: ;;#ASMSTART
1384 ; GFX940-NEXT: ; def v2
1385 ; GFX940-NEXT: ;;#ASMEND
1386 ; GFX940-NEXT: s_nop 0
1387 ; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16
1388 ; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1
1389 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1390 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1391 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1392 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1393 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1394 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 1>
1395 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1399 define void @v_shuffle_v3bf16_v2bf16__u_2_2(ptr addrspace(1) inreg %ptr) {
1400 ; GFX9-LABEL: v_shuffle_v3bf16_v2bf16__u_2_2:
1402 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1404 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1405 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 poison, i32 2, i32 2>
1406 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1410 define void @v_shuffle_v3bf16_v2bf16__0_2_2(ptr addrspace(1) inreg %ptr) {
1411 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__0_2_2:
1413 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1415 ; GFX900-NEXT: ;;#ASMSTART
1416 ; GFX900-NEXT: ; def v1
1417 ; GFX900-NEXT: ;;#ASMEND
1418 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1419 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1420 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1422 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__0_2_2:
1424 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1426 ; GFX90A-NEXT: ;;#ASMSTART
1427 ; GFX90A-NEXT: ; def v1
1428 ; GFX90A-NEXT: ;;#ASMEND
1429 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1430 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1431 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1433 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_2_2:
1435 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1436 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1437 ; GFX940-NEXT: ;;#ASMSTART
1438 ; GFX940-NEXT: ; def v1
1439 ; GFX940-NEXT: ;;#ASMEND
1440 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1441 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1442 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1443 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1444 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 0, i32 2, i32 2>
1445 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1449 define void @v_shuffle_v3bf16_v2bf16__1_2_2(ptr addrspace(1) inreg %ptr) {
1450 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__1_2_2:
1452 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1453 ; GFX900-NEXT: ;;#ASMSTART
1454 ; GFX900-NEXT: ; def v1
1455 ; GFX900-NEXT: ;;#ASMEND
1456 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1457 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
1458 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1459 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1460 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1462 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__1_2_2:
1464 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465 ; GFX90A-NEXT: ;;#ASMSTART
1466 ; GFX90A-NEXT: ; def v1
1467 ; GFX90A-NEXT: ;;#ASMEND
1468 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1469 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
1470 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1471 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1472 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1474 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_2_2:
1476 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1477 ; GFX940-NEXT: ;;#ASMSTART
1478 ; GFX940-NEXT: ; def v1
1479 ; GFX940-NEXT: ;;#ASMEND
1480 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1481 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
1482 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1483 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1484 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1485 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1486 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 2>
1487 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1491 define void @v_shuffle_v3bf16_v2bf16__2_2_2(ptr addrspace(1) inreg %ptr) {
1492 ; GFX9-LABEL: v_shuffle_v3bf16_v2bf16__2_2_2:
1494 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1495 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1496 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1497 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
1498 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1502 define void @v_shuffle_v3bf16_v2bf16__3_2_2(ptr addrspace(1) inreg %ptr) {
1503 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_2_2:
1505 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1506 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1507 ; GFX900-NEXT: ;;#ASMSTART
1508 ; GFX900-NEXT: ; def v1
1509 ; GFX900-NEXT: ;;#ASMEND
1510 ; GFX900-NEXT: v_alignbit_b32 v2, v1, v1, 16
1511 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1512 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1513 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1514 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1516 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_2_2:
1518 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1520 ; GFX90A-NEXT: ;;#ASMSTART
1521 ; GFX90A-NEXT: ; def v1
1522 ; GFX90A-NEXT: ;;#ASMEND
1523 ; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16
1524 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1525 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1526 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1527 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1529 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_2:
1531 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1533 ; GFX940-NEXT: ;;#ASMSTART
1534 ; GFX940-NEXT: ; def v1
1535 ; GFX940-NEXT: ;;#ASMEND
1536 ; GFX940-NEXT: s_nop 0
1537 ; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16
1538 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1539 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1540 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1541 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1542 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1543 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1544 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 2>
1545 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1549 define void @v_shuffle_v3bf16_v2bf16__3_u_2(ptr addrspace(1) inreg %ptr) {
1550 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_2:
1552 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1553 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1554 ; GFX900-NEXT: ;;#ASMSTART
1555 ; GFX900-NEXT: ; def v1
1556 ; GFX900-NEXT: ;;#ASMEND
1557 ; GFX900-NEXT: v_alignbit_b32 v2, s4, v1, 16
1558 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1559 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1560 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1561 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1563 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_2:
1565 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1567 ; GFX90A-NEXT: ;;#ASMSTART
1568 ; GFX90A-NEXT: ; def v1
1569 ; GFX90A-NEXT: ;;#ASMEND
1570 ; GFX90A-NEXT: v_alignbit_b32 v2, s4, v1, 16
1571 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1572 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1573 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1574 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1576 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_2:
1578 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1579 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1580 ; GFX940-NEXT: ;;#ASMSTART
1581 ; GFX940-NEXT: ; def v1
1582 ; GFX940-NEXT: ;;#ASMEND
1583 ; GFX940-NEXT: s_nop 0
1584 ; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16
1585 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1586 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1587 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1588 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1589 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1590 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1591 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 2>
1592 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1596 define void @v_shuffle_v3bf16_v2bf16__3_0_2(ptr addrspace(1) inreg %ptr) {
1597 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_0_2:
1599 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1600 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1601 ; GFX900-NEXT: ;;#ASMSTART
1602 ; GFX900-NEXT: ; def v1
1603 ; GFX900-NEXT: ;;#ASMEND
1604 ; GFX900-NEXT: ;;#ASMSTART
1605 ; GFX900-NEXT: ; def v2
1606 ; GFX900-NEXT: ;;#ASMEND
1607 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16
1608 ; GFX900-NEXT: global_store_short v0, v2, s[16:17] offset:4
1609 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1610 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1611 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1613 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_0_2:
1615 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1616 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1617 ; GFX90A-NEXT: ;;#ASMSTART
1618 ; GFX90A-NEXT: ; def v1
1619 ; GFX90A-NEXT: ;;#ASMEND
1620 ; GFX90A-NEXT: ;;#ASMSTART
1621 ; GFX90A-NEXT: ; def v2
1622 ; GFX90A-NEXT: ;;#ASMEND
1623 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v2, 16
1624 ; GFX90A-NEXT: global_store_short v0, v2, s[16:17] offset:4
1625 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1626 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1627 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1629 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_2:
1631 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1633 ; GFX940-NEXT: ;;#ASMSTART
1634 ; GFX940-NEXT: ; def v1
1635 ; GFX940-NEXT: ;;#ASMEND
1636 ; GFX940-NEXT: ;;#ASMSTART
1637 ; GFX940-NEXT: ; def v2
1638 ; GFX940-NEXT: ;;#ASMEND
1639 ; GFX940-NEXT: s_nop 0
1640 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16
1641 ; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1
1642 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1643 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1644 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1645 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1646 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1647 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 2>
1648 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1652 define void @v_shuffle_v3bf16_v2bf16__3_1_2(ptr addrspace(1) inreg %ptr) {
1653 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_1_2:
1655 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1656 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1657 ; GFX900-NEXT: ;;#ASMSTART
1658 ; GFX900-NEXT: ; def v1
1659 ; GFX900-NEXT: ;;#ASMEND
1660 ; GFX900-NEXT: ;;#ASMSTART
1661 ; GFX900-NEXT: ; def v2
1662 ; GFX900-NEXT: ;;#ASMEND
1663 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1664 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
1665 ; GFX900-NEXT: global_store_short v0, v2, s[16:17] offset:4
1666 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1667 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1668 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1670 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_1_2:
1672 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1674 ; GFX90A-NEXT: ;;#ASMSTART
1675 ; GFX90A-NEXT: ; def v1
1676 ; GFX90A-NEXT: ;;#ASMEND
1677 ; GFX90A-NEXT: ;;#ASMSTART
1678 ; GFX90A-NEXT: ; def v2
1679 ; GFX90A-NEXT: ;;#ASMEND
1680 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1681 ; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4
1682 ; GFX90A-NEXT: global_store_short v0, v2, s[16:17] offset:4
1683 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1684 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1685 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1687 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_2:
1689 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1691 ; GFX940-NEXT: ;;#ASMSTART
1692 ; GFX940-NEXT: ; def v1
1693 ; GFX940-NEXT: ;;#ASMEND
1694 ; GFX940-NEXT: ;;#ASMSTART
1695 ; GFX940-NEXT: ; def v2
1696 ; GFX940-NEXT: ;;#ASMEND
1697 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1698 ; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2
1699 ; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1
1700 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1701 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1702 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1703 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1704 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1705 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 2>
1706 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1710 define void @v_shuffle_v3bf16_v2bf16__u_3_3(ptr addrspace(1) inreg %ptr) {
1711 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__u_3_3:
1713 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1714 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1715 ; GFX900-NEXT: ;;#ASMSTART
1716 ; GFX900-NEXT: ; def v1
1717 ; GFX900-NEXT: ;;#ASMEND
1718 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1719 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1720 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1721 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1722 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1724 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__u_3_3:
1726 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1727 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1728 ; GFX90A-NEXT: ;;#ASMSTART
1729 ; GFX90A-NEXT: ; def v1
1730 ; GFX90A-NEXT: ;;#ASMEND
1731 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1732 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1733 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1734 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1735 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1737 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__u_3_3:
1739 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1740 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1741 ; GFX940-NEXT: ;;#ASMSTART
1742 ; GFX940-NEXT: ; def v1
1743 ; GFX940-NEXT: ;;#ASMEND
1744 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1745 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1746 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1747 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1748 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1749 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1750 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1751 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 poison, i32 3, i32 3>
1752 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1756 define void @v_shuffle_v3bf16_v2bf16__0_3_3(ptr addrspace(1) inreg %ptr) {
1757 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__0_3_3:
1759 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760 ; GFX900-NEXT: ;;#ASMSTART
1761 ; GFX900-NEXT: ; def v1
1762 ; GFX900-NEXT: ;;#ASMEND
1763 ; GFX900-NEXT: ;;#ASMSTART
1764 ; GFX900-NEXT: ; def v2
1765 ; GFX900-NEXT: ;;#ASMEND
1766 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1767 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1768 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v2
1769 ; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1770 ; GFX900-NEXT: global_store_short v0, v2, s[16:17] offset:4
1771 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1772 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1773 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1775 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__0_3_3:
1777 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1778 ; GFX90A-NEXT: ;;#ASMSTART
1779 ; GFX90A-NEXT: ; def v1
1780 ; GFX90A-NEXT: ;;#ASMEND
1781 ; GFX90A-NEXT: ;;#ASMSTART
1782 ; GFX90A-NEXT: ; def v2
1783 ; GFX90A-NEXT: ;;#ASMEND
1784 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1785 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1786 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v2
1787 ; GFX90A-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1788 ; GFX90A-NEXT: global_store_short v0, v2, s[16:17] offset:4
1789 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1790 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1791 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1793 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_3_3:
1795 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1796 ; GFX940-NEXT: ;;#ASMSTART
1797 ; GFX940-NEXT: ; def v1
1798 ; GFX940-NEXT: ;;#ASMEND
1799 ; GFX940-NEXT: ;;#ASMSTART
1800 ; GFX940-NEXT: ; def v2
1801 ; GFX940-NEXT: ;;#ASMEND
1802 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1803 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1804 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2
1805 ; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1806 ; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1
1807 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1808 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1809 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1810 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1811 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1812 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 0, i32 3, i32 3>
1813 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1817 define void @v_shuffle_v3bf16_v2bf16__1_3_3(ptr addrspace(1) inreg %ptr) {
1818 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__1_3_3:
1820 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1821 ; GFX900-NEXT: ;;#ASMSTART
1822 ; GFX900-NEXT: ; def v1
1823 ; GFX900-NEXT: ;;#ASMEND
1824 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1825 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1826 ; GFX900-NEXT: ;;#ASMSTART
1827 ; GFX900-NEXT: ; def v2
1828 ; GFX900-NEXT: ;;#ASMEND
1829 ; GFX900-NEXT: v_perm_b32 v1, v2, v1, s4
1830 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1831 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1832 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1833 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1834 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1836 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__1_3_3:
1838 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1839 ; GFX90A-NEXT: ;;#ASMSTART
1840 ; GFX90A-NEXT: ; def v1
1841 ; GFX90A-NEXT: ;;#ASMEND
1842 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1843 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1844 ; GFX90A-NEXT: ;;#ASMSTART
1845 ; GFX90A-NEXT: ; def v2
1846 ; GFX90A-NEXT: ;;#ASMEND
1847 ; GFX90A-NEXT: v_perm_b32 v1, v2, v1, s4
1848 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1849 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1850 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1851 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1852 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1854 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_3_3:
1856 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1857 ; GFX940-NEXT: ;;#ASMSTART
1858 ; GFX940-NEXT: ; def v1
1859 ; GFX940-NEXT: ;;#ASMEND
1860 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1861 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1862 ; GFX940-NEXT: ;;#ASMSTART
1863 ; GFX940-NEXT: ; def v2
1864 ; GFX940-NEXT: ;;#ASMEND
1865 ; GFX940-NEXT: s_nop 0
1866 ; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2
1867 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1868 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1869 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1870 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1871 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1872 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1873 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1874 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 1, i32 3, i32 3>
1875 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1879 define void @v_shuffle_v3bf16_v2bf16__2_3_3(ptr addrspace(1) inreg %ptr) {
1880 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__2_3_3:
1882 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1884 ; GFX900-NEXT: ;;#ASMSTART
1885 ; GFX900-NEXT: ; def v1
1886 ; GFX900-NEXT: ;;#ASMEND
1887 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1888 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1889 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1890 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1891 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1893 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__2_3_3:
1895 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1896 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1897 ; GFX90A-NEXT: ;;#ASMSTART
1898 ; GFX90A-NEXT: ; def v1
1899 ; GFX90A-NEXT: ;;#ASMEND
1900 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1901 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1902 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1903 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1904 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1906 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__2_3_3:
1908 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1910 ; GFX940-NEXT: ;;#ASMSTART
1911 ; GFX940-NEXT: ; def v1
1912 ; GFX940-NEXT: ;;#ASMEND
1913 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
1914 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1915 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1916 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1917 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1918 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1919 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1920 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 2, i32 3, i32 3>
1921 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1925 define void @v_shuffle_v3bf16_v2bf16__3_u_3(ptr addrspace(1) inreg %ptr) {
1926 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_3:
1928 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1929 ; GFX900-NEXT: ;;#ASMSTART
1930 ; GFX900-NEXT: ; def v1
1931 ; GFX900-NEXT: ;;#ASMEND
1932 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1933 ; GFX900-NEXT: v_alignbit_b32 v2, s4, v1, 16
1934 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1935 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1936 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
1937 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1938 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1940 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_3:
1942 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1943 ; GFX90A-NEXT: ;;#ASMSTART
1944 ; GFX90A-NEXT: ; def v1
1945 ; GFX90A-NEXT: ;;#ASMEND
1946 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
1947 ; GFX90A-NEXT: v_alignbit_b32 v2, s4, v1, 16
1948 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1949 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1950 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
1951 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1952 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1954 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_3:
1956 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1957 ; GFX940-NEXT: ;;#ASMSTART
1958 ; GFX940-NEXT: ; def v1
1959 ; GFX940-NEXT: ;;#ASMEND
1960 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
1961 ; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16
1962 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1963 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
1964 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
1965 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1966 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1967 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
1968 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
1969 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 3>
1970 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1974 define void @v_shuffle_v3bf16_v2bf16__3_0_3(ptr addrspace(1) inreg %ptr) {
1975 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_0_3:
1977 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1978 ; GFX900-NEXT: ;;#ASMSTART
1979 ; GFX900-NEXT: ; def v1
1980 ; GFX900-NEXT: ;;#ASMEND
1981 ; GFX900-NEXT: ;;#ASMSTART
1982 ; GFX900-NEXT: ; def v2
1983 ; GFX900-NEXT: ;;#ASMEND
1984 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
1985 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16
1986 ; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
1987 ; GFX900-NEXT: global_store_short v0, v2, s[16:17] offset:4
1988 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1989 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1990 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1992 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_0_3:
1994 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1995 ; GFX90A-NEXT: ;;#ASMSTART
1996 ; GFX90A-NEXT: ; def v1
1997 ; GFX90A-NEXT: ;;#ASMEND
1998 ; GFX90A-NEXT: ;;#ASMSTART
1999 ; GFX90A-NEXT: ; def v2
2000 ; GFX90A-NEXT: ;;#ASMEND
2001 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
2002 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v2, 16
2003 ; GFX90A-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2004 ; GFX90A-NEXT: global_store_short v0, v2, s[16:17] offset:4
2005 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
2006 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2007 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2009 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_3:
2011 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012 ; GFX940-NEXT: ;;#ASMSTART
2013 ; GFX940-NEXT: ; def v1
2014 ; GFX940-NEXT: ;;#ASMEND
2015 ; GFX940-NEXT: ;;#ASMSTART
2016 ; GFX940-NEXT: ; def v2
2017 ; GFX940-NEXT: ;;#ASMEND
2018 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
2019 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16
2020 ; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2
2021 ; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1
2022 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2023 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2024 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2025 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
2026 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
2027 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 3>
2028 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2032 define void @v_shuffle_v3bf16_v2bf16__3_1_3(ptr addrspace(1) inreg %ptr) {
2033 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_1_3:
2035 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2036 ; GFX900-NEXT: ;;#ASMSTART
2037 ; GFX900-NEXT: ; def v1
2038 ; GFX900-NEXT: ;;#ASMEND
2039 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2040 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
2041 ; GFX900-NEXT: ;;#ASMSTART
2042 ; GFX900-NEXT: ; def v2
2043 ; GFX900-NEXT: ;;#ASMEND
2044 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
2045 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
2046 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
2047 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
2048 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2049 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2051 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_1_3:
2053 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2054 ; GFX90A-NEXT: ;;#ASMSTART
2055 ; GFX90A-NEXT: ; def v1
2056 ; GFX90A-NEXT: ;;#ASMEND
2057 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2058 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
2059 ; GFX90A-NEXT: ;;#ASMSTART
2060 ; GFX90A-NEXT: ; def v2
2061 ; GFX90A-NEXT: ;;#ASMEND
2062 ; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4
2063 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
2064 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
2065 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
2066 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2067 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2069 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_3:
2071 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2072 ; GFX940-NEXT: ;;#ASMSTART
2073 ; GFX940-NEXT: ; def v1
2074 ; GFX940-NEXT: ;;#ASMEND
2075 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2076 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
2077 ; GFX940-NEXT: ;;#ASMSTART
2078 ; GFX940-NEXT: ; def v2
2079 ; GFX940-NEXT: ;;#ASMEND
2080 ; GFX940-NEXT: s_nop 0
2081 ; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2
2082 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
2083 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
2084 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
2085 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2086 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2087 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
2088 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
2089 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 3>
2090 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2094 define void @v_shuffle_v3bf16_v2bf16__3_2_3(ptr addrspace(1) inreg %ptr) {
2095 ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_2_3:
2097 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2098 ; GFX900-NEXT: ;;#ASMSTART
2099 ; GFX900-NEXT: ; def v1
2100 ; GFX900-NEXT: ;;#ASMEND
2101 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
2102 ; GFX900-NEXT: v_alignbit_b32 v2, v1, v1, 16
2103 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2104 ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
2105 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
2106 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2107 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2109 ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_2_3:
2111 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2112 ; GFX90A-NEXT: ;;#ASMSTART
2113 ; GFX90A-NEXT: ; def v1
2114 ; GFX90A-NEXT: ;;#ASMEND
2115 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
2116 ; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16
2117 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2118 ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
2119 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
2120 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2121 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2123 ; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_3:
2125 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2126 ; GFX940-NEXT: ;;#ASMSTART
2127 ; GFX940-NEXT: ; def v1
2128 ; GFX940-NEXT: ;;#ASMEND
2129 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
2130 ; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16
2131 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
2132 ; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
2133 ; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
2134 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2135 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2136 %vec0 = call <2 x bfloat> asm "; def $0", "=v"()
2137 %vec1 = call <2 x bfloat> asm "; def $0", "=v"()
2138 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 3>
2139 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2143 define void @s_shuffle_v3bf16_v2bf16__u_u_u() {
2144 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__u_u_u:
2146 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2147 ; GFX9-NEXT: ;;#ASMSTART
2148 ; GFX9-NEXT: ; use s[8:9]
2149 ; GFX9-NEXT: ;;#ASMEND
2150 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2151 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2152 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> poison
2153 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2154 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2158 define void @s_shuffle_v3bf16_v2bf16__0_u_u() {
2159 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__0_u_u:
2161 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2162 ; GFX900-NEXT: ;;#ASMSTART
2163 ; GFX900-NEXT: ; def s8
2164 ; GFX900-NEXT: ;;#ASMEND
2165 ; GFX900-NEXT: ;;#ASMSTART
2166 ; GFX900-NEXT: ; use s[8:9]
2167 ; GFX900-NEXT: ;;#ASMEND
2168 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2170 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__0_u_u:
2172 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2173 ; GFX90A-NEXT: ;;#ASMSTART
2174 ; GFX90A-NEXT: ; def s8
2175 ; GFX90A-NEXT: ;;#ASMEND
2176 ; GFX90A-NEXT: ;;#ASMSTART
2177 ; GFX90A-NEXT: ; use s[8:9]
2178 ; GFX90A-NEXT: ;;#ASMEND
2179 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2181 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__0_u_u:
2183 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2184 ; GFX940-NEXT: ;;#ASMSTART
2185 ; GFX940-NEXT: ; def s8
2186 ; GFX940-NEXT: ;;#ASMEND
2187 ; GFX940-NEXT: s_nop 0
2188 ; GFX940-NEXT: ;;#ASMSTART
2189 ; GFX940-NEXT: ; use s[8:9]
2190 ; GFX940-NEXT: ;;#ASMEND
2191 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2192 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2193 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
2194 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2195 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2199 define void @s_shuffle_v3bf16_v2bf16__1_u_u() {
2200 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__1_u_u:
2202 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2203 ; GFX900-NEXT: ;;#ASMSTART
2204 ; GFX900-NEXT: ; def s4
2205 ; GFX900-NEXT: ;;#ASMEND
2206 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
2207 ; GFX900-NEXT: ;;#ASMSTART
2208 ; GFX900-NEXT: ; use s[8:9]
2209 ; GFX900-NEXT: ;;#ASMEND
2210 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2212 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__1_u_u:
2214 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2215 ; GFX90A-NEXT: ;;#ASMSTART
2216 ; GFX90A-NEXT: ; def s4
2217 ; GFX90A-NEXT: ;;#ASMEND
2218 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
2219 ; GFX90A-NEXT: ;;#ASMSTART
2220 ; GFX90A-NEXT: ; use s[8:9]
2221 ; GFX90A-NEXT: ;;#ASMEND
2222 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2224 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_u_u:
2226 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2227 ; GFX940-NEXT: ;;#ASMSTART
2228 ; GFX940-NEXT: ; def s0
2229 ; GFX940-NEXT: ;;#ASMEND
2230 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
2231 ; GFX940-NEXT: ;;#ASMSTART
2232 ; GFX940-NEXT: ; use s[8:9]
2233 ; GFX940-NEXT: ;;#ASMEND
2234 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2235 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2236 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
2237 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2238 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2242 define void @s_shuffle_v3bf16_v2bf16__2_u_u() {
2243 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__2_u_u:
2245 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2246 ; GFX9-NEXT: ;;#ASMSTART
2247 ; GFX9-NEXT: ; use s[8:9]
2248 ; GFX9-NEXT: ;;#ASMEND
2249 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2250 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2251 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
2252 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2253 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2257 define void @s_shuffle_v3bf16_v2bf16__3_u_u() {
2258 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_u_u:
2260 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2261 ; GFX900-NEXT: ;;#ASMSTART
2262 ; GFX900-NEXT: ; def s4
2263 ; GFX900-NEXT: ;;#ASMEND
2264 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
2265 ; GFX900-NEXT: ;;#ASMSTART
2266 ; GFX900-NEXT: ; use s[8:9]
2267 ; GFX900-NEXT: ;;#ASMEND
2268 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2270 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_u_u:
2272 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2273 ; GFX90A-NEXT: ;;#ASMSTART
2274 ; GFX90A-NEXT: ; def s4
2275 ; GFX90A-NEXT: ;;#ASMEND
2276 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
2277 ; GFX90A-NEXT: ;;#ASMSTART
2278 ; GFX90A-NEXT: ; use s[8:9]
2279 ; GFX90A-NEXT: ;;#ASMEND
2280 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2282 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_u:
2284 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2285 ; GFX940-NEXT: ;;#ASMSTART
2286 ; GFX940-NEXT: ; def s0
2287 ; GFX940-NEXT: ;;#ASMEND
2288 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
2289 ; GFX940-NEXT: ;;#ASMSTART
2290 ; GFX940-NEXT: ; use s[8:9]
2291 ; GFX940-NEXT: ;;#ASMEND
2292 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2293 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2294 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2295 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 poison>
2296 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2297 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2301 define void @s_shuffle_v3bf16_v2bf16__3_0_u() {
2302 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_0_u:
2304 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305 ; GFX900-NEXT: ;;#ASMSTART
2306 ; GFX900-NEXT: ; def s5
2307 ; GFX900-NEXT: ;;#ASMEND
2308 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
2309 ; GFX900-NEXT: ;;#ASMSTART
2310 ; GFX900-NEXT: ; def s4
2311 ; GFX900-NEXT: ;;#ASMEND
2312 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
2313 ; GFX900-NEXT: ;;#ASMSTART
2314 ; GFX900-NEXT: ; use s[8:9]
2315 ; GFX900-NEXT: ;;#ASMEND
2316 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2318 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_0_u:
2320 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321 ; GFX90A-NEXT: ;;#ASMSTART
2322 ; GFX90A-NEXT: ; def s5
2323 ; GFX90A-NEXT: ;;#ASMEND
2324 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
2325 ; GFX90A-NEXT: ;;#ASMSTART
2326 ; GFX90A-NEXT: ; def s4
2327 ; GFX90A-NEXT: ;;#ASMEND
2328 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
2329 ; GFX90A-NEXT: ;;#ASMSTART
2330 ; GFX90A-NEXT: ; use s[8:9]
2331 ; GFX90A-NEXT: ;;#ASMEND
2332 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2334 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_u:
2336 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2337 ; GFX940-NEXT: ;;#ASMSTART
2338 ; GFX940-NEXT: ; def s1
2339 ; GFX940-NEXT: ;;#ASMEND
2340 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
2341 ; GFX940-NEXT: ;;#ASMSTART
2342 ; GFX940-NEXT: ; def s0
2343 ; GFX940-NEXT: ;;#ASMEND
2344 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
2345 ; GFX940-NEXT: ;;#ASMSTART
2346 ; GFX940-NEXT: ; use s[8:9]
2347 ; GFX940-NEXT: ;;#ASMEND
2348 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2349 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2350 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2351 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 poison>
2352 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2353 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2357 define void @s_shuffle_v3bf16_v2bf16__3_1_u() {
2358 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_1_u:
2360 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2361 ; GFX900-NEXT: ;;#ASMSTART
2362 ; GFX900-NEXT: ; def s4
2363 ; GFX900-NEXT: ;;#ASMEND
2364 ; GFX900-NEXT: ;;#ASMSTART
2365 ; GFX900-NEXT: ; def s5
2366 ; GFX900-NEXT: ;;#ASMEND
2367 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
2368 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
2369 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
2370 ; GFX900-NEXT: ;;#ASMSTART
2371 ; GFX900-NEXT: ; use s[8:9]
2372 ; GFX900-NEXT: ;;#ASMEND
2373 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2375 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_1_u:
2377 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2378 ; GFX90A-NEXT: ;;#ASMSTART
2379 ; GFX90A-NEXT: ; def s4
2380 ; GFX90A-NEXT: ;;#ASMEND
2381 ; GFX90A-NEXT: ;;#ASMSTART
2382 ; GFX90A-NEXT: ; def s5
2383 ; GFX90A-NEXT: ;;#ASMEND
2384 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
2385 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
2386 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
2387 ; GFX90A-NEXT: ;;#ASMSTART
2388 ; GFX90A-NEXT: ; use s[8:9]
2389 ; GFX90A-NEXT: ;;#ASMEND
2390 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2392 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_u:
2394 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2395 ; GFX940-NEXT: ;;#ASMSTART
2396 ; GFX940-NEXT: ; def s0
2397 ; GFX940-NEXT: ;;#ASMEND
2398 ; GFX940-NEXT: ;;#ASMSTART
2399 ; GFX940-NEXT: ; def s1
2400 ; GFX940-NEXT: ;;#ASMEND
2401 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
2402 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
2403 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
2404 ; GFX940-NEXT: ;;#ASMSTART
2405 ; GFX940-NEXT: ; use s[8:9]
2406 ; GFX940-NEXT: ;;#ASMEND
2407 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2408 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2409 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2410 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 poison>
2411 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2412 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2416 define void @s_shuffle_v3bf16_v2bf16__3_2_u() {
2417 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_2_u:
2419 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2420 ; GFX900-NEXT: ;;#ASMSTART
2421 ; GFX900-NEXT: ; def s4
2422 ; GFX900-NEXT: ;;#ASMEND
2423 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
2424 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
2425 ; GFX900-NEXT: ;;#ASMSTART
2426 ; GFX900-NEXT: ; use s[8:9]
2427 ; GFX900-NEXT: ;;#ASMEND
2428 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2430 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_2_u:
2432 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2433 ; GFX90A-NEXT: ;;#ASMSTART
2434 ; GFX90A-NEXT: ; def s4
2435 ; GFX90A-NEXT: ;;#ASMEND
2436 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
2437 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
2438 ; GFX90A-NEXT: ;;#ASMSTART
2439 ; GFX90A-NEXT: ; use s[8:9]
2440 ; GFX90A-NEXT: ;;#ASMEND
2441 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2443 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_u:
2445 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2446 ; GFX940-NEXT: ;;#ASMSTART
2447 ; GFX940-NEXT: ; def s0
2448 ; GFX940-NEXT: ;;#ASMEND
2449 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
2450 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
2451 ; GFX940-NEXT: ;;#ASMSTART
2452 ; GFX940-NEXT: ; use s[8:9]
2453 ; GFX940-NEXT: ;;#ASMEND
2454 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2455 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2456 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2457 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 poison>
2458 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2459 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2463 define void @s_shuffle_v3bf16_v2bf16__3_3_u() {
2464 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_3_u:
2466 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2467 ; GFX900-NEXT: ;;#ASMSTART
2468 ; GFX900-NEXT: ; def s4
2469 ; GFX900-NEXT: ;;#ASMEND
2470 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
2471 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2472 ; GFX900-NEXT: ;;#ASMSTART
2473 ; GFX900-NEXT: ; use s[8:9]
2474 ; GFX900-NEXT: ;;#ASMEND
2475 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2477 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_3_u:
2479 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2480 ; GFX90A-NEXT: ;;#ASMSTART
2481 ; GFX90A-NEXT: ; def s4
2482 ; GFX90A-NEXT: ;;#ASMEND
2483 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
2484 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2485 ; GFX90A-NEXT: ;;#ASMSTART
2486 ; GFX90A-NEXT: ; use s[8:9]
2487 ; GFX90A-NEXT: ;;#ASMEND
2488 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2490 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_u:
2492 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2493 ; GFX940-NEXT: ;;#ASMSTART
2494 ; GFX940-NEXT: ; def s0
2495 ; GFX940-NEXT: ;;#ASMEND
2496 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
2497 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
2498 ; GFX940-NEXT: ;;#ASMSTART
2499 ; GFX940-NEXT: ; use s[8:9]
2500 ; GFX940-NEXT: ;;#ASMEND
2501 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2502 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2503 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2504 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 poison>
2505 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2506 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2510 define void @s_shuffle_v3bf16_v2bf16__3_3_0() {
2511 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_3_0:
2513 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2514 ; GFX900-NEXT: ;;#ASMSTART
2515 ; GFX900-NEXT: ; def s4
2516 ; GFX900-NEXT: ;;#ASMEND
2517 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
2518 ; GFX900-NEXT: ;;#ASMSTART
2519 ; GFX900-NEXT: ; def s9
2520 ; GFX900-NEXT: ;;#ASMEND
2521 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2522 ; GFX900-NEXT: ;;#ASMSTART
2523 ; GFX900-NEXT: ; use s[8:9]
2524 ; GFX900-NEXT: ;;#ASMEND
2525 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2527 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_3_0:
2529 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2530 ; GFX90A-NEXT: ;;#ASMSTART
2531 ; GFX90A-NEXT: ; def s4
2532 ; GFX90A-NEXT: ;;#ASMEND
2533 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
2534 ; GFX90A-NEXT: ;;#ASMSTART
2535 ; GFX90A-NEXT: ; def s9
2536 ; GFX90A-NEXT: ;;#ASMEND
2537 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2538 ; GFX90A-NEXT: ;;#ASMSTART
2539 ; GFX90A-NEXT: ; use s[8:9]
2540 ; GFX90A-NEXT: ;;#ASMEND
2541 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2543 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_0:
2545 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2546 ; GFX940-NEXT: ;;#ASMSTART
2547 ; GFX940-NEXT: ; def s0
2548 ; GFX940-NEXT: ;;#ASMEND
2549 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
2550 ; GFX940-NEXT: ;;#ASMSTART
2551 ; GFX940-NEXT: ; def s9
2552 ; GFX940-NEXT: ;;#ASMEND
2553 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
2554 ; GFX940-NEXT: ;;#ASMSTART
2555 ; GFX940-NEXT: ; use s[8:9]
2556 ; GFX940-NEXT: ;;#ASMEND
2557 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2558 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2559 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2560 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 0>
2561 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2562 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2566 define void @s_shuffle_v3bf16_v2bf16__3_3_1() {
2567 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_3_1:
2569 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2570 ; GFX900-NEXT: ;;#ASMSTART
2571 ; GFX900-NEXT: ; def s4
2572 ; GFX900-NEXT: ;;#ASMEND
2573 ; GFX900-NEXT: ;;#ASMSTART
2574 ; GFX900-NEXT: ; def s5
2575 ; GFX900-NEXT: ;;#ASMEND
2576 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
2577 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
2578 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2579 ; GFX900-NEXT: ;;#ASMSTART
2580 ; GFX900-NEXT: ; use s[8:9]
2581 ; GFX900-NEXT: ;;#ASMEND
2582 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2584 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_3_1:
2586 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2587 ; GFX90A-NEXT: ;;#ASMSTART
2588 ; GFX90A-NEXT: ; def s4
2589 ; GFX90A-NEXT: ;;#ASMEND
2590 ; GFX90A-NEXT: ;;#ASMSTART
2591 ; GFX90A-NEXT: ; def s5
2592 ; GFX90A-NEXT: ;;#ASMEND
2593 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
2594 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
2595 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2596 ; GFX90A-NEXT: ;;#ASMSTART
2597 ; GFX90A-NEXT: ; use s[8:9]
2598 ; GFX90A-NEXT: ;;#ASMEND
2599 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2601 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_1:
2603 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2604 ; GFX940-NEXT: ;;#ASMSTART
2605 ; GFX940-NEXT: ; def s0
2606 ; GFX940-NEXT: ;;#ASMEND
2607 ; GFX940-NEXT: ;;#ASMSTART
2608 ; GFX940-NEXT: ; def s1
2609 ; GFX940-NEXT: ;;#ASMEND
2610 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
2611 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
2612 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
2613 ; GFX940-NEXT: ;;#ASMSTART
2614 ; GFX940-NEXT: ; use s[8:9]
2615 ; GFX940-NEXT: ;;#ASMEND
2616 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2617 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2618 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2619 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 1>
2620 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2621 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2625 define void @s_shuffle_v3bf16_v2bf16__3_3_2() {
2626 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_3_2:
2628 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629 ; GFX900-NEXT: ;;#ASMSTART
2630 ; GFX900-NEXT: ; def s9
2631 ; GFX900-NEXT: ;;#ASMEND
2632 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
2633 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2634 ; GFX900-NEXT: ;;#ASMSTART
2635 ; GFX900-NEXT: ; use s[8:9]
2636 ; GFX900-NEXT: ;;#ASMEND
2637 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2639 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_3_2:
2641 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2642 ; GFX90A-NEXT: ;;#ASMSTART
2643 ; GFX90A-NEXT: ; def s9
2644 ; GFX90A-NEXT: ;;#ASMEND
2645 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
2646 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
2647 ; GFX90A-NEXT: ;;#ASMSTART
2648 ; GFX90A-NEXT: ; use s[8:9]
2649 ; GFX90A-NEXT: ;;#ASMEND
2650 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2652 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_2:
2654 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2655 ; GFX940-NEXT: ;;#ASMSTART
2656 ; GFX940-NEXT: ; def s9
2657 ; GFX940-NEXT: ;;#ASMEND
2658 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
2659 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
2660 ; GFX940-NEXT: ;;#ASMSTART
2661 ; GFX940-NEXT: ; use s[8:9]
2662 ; GFX940-NEXT: ;;#ASMEND
2663 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2664 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2665 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2666 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 2>
2667 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2668 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2672 define void @s_shuffle_v3bf16_v2bf16__3_3_3() {
2673 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_3_3:
2675 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2676 ; GFX900-NEXT: ;;#ASMSTART
2677 ; GFX900-NEXT: ; def s4
2678 ; GFX900-NEXT: ;;#ASMEND
2679 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
2680 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
2681 ; GFX900-NEXT: ;;#ASMSTART
2682 ; GFX900-NEXT: ; use s[8:9]
2683 ; GFX900-NEXT: ;;#ASMEND
2684 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2686 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_3_3:
2688 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2689 ; GFX90A-NEXT: ;;#ASMSTART
2690 ; GFX90A-NEXT: ; def s4
2691 ; GFX90A-NEXT: ;;#ASMEND
2692 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
2693 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
2694 ; GFX90A-NEXT: ;;#ASMSTART
2695 ; GFX90A-NEXT: ; use s[8:9]
2696 ; GFX90A-NEXT: ;;#ASMEND
2697 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2699 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_3:
2701 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2702 ; GFX940-NEXT: ;;#ASMSTART
2703 ; GFX940-NEXT: ; def s0
2704 ; GFX940-NEXT: ;;#ASMEND
2705 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
2706 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
2707 ; GFX940-NEXT: ;;#ASMSTART
2708 ; GFX940-NEXT: ; use s[8:9]
2709 ; GFX940-NEXT: ;;#ASMEND
2710 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2711 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2712 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2713 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 3, i32 3>
2714 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2715 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2719 define void @s_shuffle_v3bf16_v2bf16__u_0_0() {
2720 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__u_0_0:
2722 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2723 ; GFX9-NEXT: ;;#ASMSTART
2724 ; GFX9-NEXT: ; def s9
2725 ; GFX9-NEXT: ;;#ASMEND
2726 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
2727 ; GFX9-NEXT: ;;#ASMSTART
2728 ; GFX9-NEXT: ; use s[8:9]
2729 ; GFX9-NEXT: ;;#ASMEND
2730 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2731 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2732 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 poison, i32 0, i32 0>
2733 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2734 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2738 define void @s_shuffle_v3bf16_v2bf16__0_0_0() {
2739 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__0_0_0:
2741 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2742 ; GFX9-NEXT: ;;#ASMSTART
2743 ; GFX9-NEXT: ; def s9
2744 ; GFX9-NEXT: ;;#ASMEND
2745 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9
2746 ; GFX9-NEXT: ;;#ASMSTART
2747 ; GFX9-NEXT: ; use s[8:9]
2748 ; GFX9-NEXT: ;;#ASMEND
2749 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2750 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2751 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> zeroinitializer
2752 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2753 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2757 define void @s_shuffle_v3bf16_v2bf16__1_0_0() {
2758 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__1_0_0:
2760 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2761 ; GFX900-NEXT: ;;#ASMSTART
2762 ; GFX900-NEXT: ; def s9
2763 ; GFX900-NEXT: ;;#ASMEND
2764 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
2765 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
2766 ; GFX900-NEXT: ;;#ASMSTART
2767 ; GFX900-NEXT: ; use s[8:9]
2768 ; GFX900-NEXT: ;;#ASMEND
2769 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2771 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__1_0_0:
2773 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2774 ; GFX90A-NEXT: ;;#ASMSTART
2775 ; GFX90A-NEXT: ; def s9
2776 ; GFX90A-NEXT: ;;#ASMEND
2777 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
2778 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
2779 ; GFX90A-NEXT: ;;#ASMSTART
2780 ; GFX90A-NEXT: ; use s[8:9]
2781 ; GFX90A-NEXT: ;;#ASMEND
2782 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2784 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_0_0:
2786 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2787 ; GFX940-NEXT: ;;#ASMSTART
2788 ; GFX940-NEXT: ; def s9
2789 ; GFX940-NEXT: ;;#ASMEND
2790 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
2791 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
2792 ; GFX940-NEXT: ;;#ASMSTART
2793 ; GFX940-NEXT: ; use s[8:9]
2794 ; GFX940-NEXT: ;;#ASMEND
2795 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2796 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2797 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 0, i32 0>
2798 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2799 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2803 define void @s_shuffle_v3bf16_v2bf16__2_0_0() {
2804 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__2_0_0:
2806 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2807 ; GFX9-NEXT: ;;#ASMSTART
2808 ; GFX9-NEXT: ; def s9
2809 ; GFX9-NEXT: ;;#ASMEND
2810 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
2811 ; GFX9-NEXT: ;;#ASMSTART
2812 ; GFX9-NEXT: ; use s[8:9]
2813 ; GFX9-NEXT: ;;#ASMEND
2814 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2815 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2816 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 0>
2817 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2818 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2822 define void @s_shuffle_v3bf16_v2bf16__3_0_0() {
2823 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_0_0:
2825 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2826 ; GFX900-NEXT: ;;#ASMSTART
2827 ; GFX900-NEXT: ; def s4
2828 ; GFX900-NEXT: ;;#ASMEND
2829 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
2830 ; GFX900-NEXT: ;;#ASMSTART
2831 ; GFX900-NEXT: ; def s9
2832 ; GFX900-NEXT: ;;#ASMEND
2833 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
2834 ; GFX900-NEXT: ;;#ASMSTART
2835 ; GFX900-NEXT: ; use s[8:9]
2836 ; GFX900-NEXT: ;;#ASMEND
2837 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2839 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_0_0:
2841 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2842 ; GFX90A-NEXT: ;;#ASMSTART
2843 ; GFX90A-NEXT: ; def s4
2844 ; GFX90A-NEXT: ;;#ASMEND
2845 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
2846 ; GFX90A-NEXT: ;;#ASMSTART
2847 ; GFX90A-NEXT: ; def s9
2848 ; GFX90A-NEXT: ;;#ASMEND
2849 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
2850 ; GFX90A-NEXT: ;;#ASMSTART
2851 ; GFX90A-NEXT: ; use s[8:9]
2852 ; GFX90A-NEXT: ;;#ASMEND
2853 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2855 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_0:
2857 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2858 ; GFX940-NEXT: ;;#ASMSTART
2859 ; GFX940-NEXT: ; def s0
2860 ; GFX940-NEXT: ;;#ASMEND
2861 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
2862 ; GFX940-NEXT: ;;#ASMSTART
2863 ; GFX940-NEXT: ; def s9
2864 ; GFX940-NEXT: ;;#ASMEND
2865 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
2866 ; GFX940-NEXT: ;;#ASMSTART
2867 ; GFX940-NEXT: ; use s[8:9]
2868 ; GFX940-NEXT: ;;#ASMEND
2869 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2870 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2871 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2872 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 0>
2873 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2874 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2878 define void @s_shuffle_v3bf16_v2bf16__3_u_0() {
2879 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_u_0:
2881 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2882 ; GFX900-NEXT: ;;#ASMSTART
2883 ; GFX900-NEXT: ; def s9
2884 ; GFX900-NEXT: ;;#ASMEND
2885 ; GFX900-NEXT: ;;#ASMSTART
2886 ; GFX900-NEXT: ; def s4
2887 ; GFX900-NEXT: ;;#ASMEND
2888 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
2889 ; GFX900-NEXT: ;;#ASMSTART
2890 ; GFX900-NEXT: ; use s[8:9]
2891 ; GFX900-NEXT: ;;#ASMEND
2892 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2894 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_u_0:
2896 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2897 ; GFX90A-NEXT: ;;#ASMSTART
2898 ; GFX90A-NEXT: ; def s9
2899 ; GFX90A-NEXT: ;;#ASMEND
2900 ; GFX90A-NEXT: ;;#ASMSTART
2901 ; GFX90A-NEXT: ; def s4
2902 ; GFX90A-NEXT: ;;#ASMEND
2903 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
2904 ; GFX90A-NEXT: ;;#ASMSTART
2905 ; GFX90A-NEXT: ; use s[8:9]
2906 ; GFX90A-NEXT: ;;#ASMEND
2907 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2909 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_0:
2911 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2912 ; GFX940-NEXT: ;;#ASMSTART
2913 ; GFX940-NEXT: ; def s9
2914 ; GFX940-NEXT: ;;#ASMEND
2915 ; GFX940-NEXT: ;;#ASMSTART
2916 ; GFX940-NEXT: ; def s0
2917 ; GFX940-NEXT: ;;#ASMEND
2918 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
2919 ; GFX940-NEXT: ;;#ASMSTART
2920 ; GFX940-NEXT: ; use s[8:9]
2921 ; GFX940-NEXT: ;;#ASMEND
2922 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2923 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2924 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2925 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 0>
2926 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2927 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2931 define void @s_shuffle_v3bf16_v2bf16__3_1_0() {
2932 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_1_0:
2934 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2935 ; GFX900-NEXT: ;;#ASMSTART
2936 ; GFX900-NEXT: ; def s4
2937 ; GFX900-NEXT: ;;#ASMEND
2938 ; GFX900-NEXT: ;;#ASMSTART
2939 ; GFX900-NEXT: ; def s9
2940 ; GFX900-NEXT: ;;#ASMEND
2941 ; GFX900-NEXT: s_lshr_b32 s5, s9, 16
2942 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
2943 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
2944 ; GFX900-NEXT: ;;#ASMSTART
2945 ; GFX900-NEXT: ; use s[8:9]
2946 ; GFX900-NEXT: ;;#ASMEND
2947 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2949 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_1_0:
2951 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2952 ; GFX90A-NEXT: ;;#ASMSTART
2953 ; GFX90A-NEXT: ; def s4
2954 ; GFX90A-NEXT: ;;#ASMEND
2955 ; GFX90A-NEXT: ;;#ASMSTART
2956 ; GFX90A-NEXT: ; def s9
2957 ; GFX90A-NEXT: ;;#ASMEND
2958 ; GFX90A-NEXT: s_lshr_b32 s5, s9, 16
2959 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
2960 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
2961 ; GFX90A-NEXT: ;;#ASMSTART
2962 ; GFX90A-NEXT: ; use s[8:9]
2963 ; GFX90A-NEXT: ;;#ASMEND
2964 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2966 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_0:
2968 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2969 ; GFX940-NEXT: ;;#ASMSTART
2970 ; GFX940-NEXT: ; def s0
2971 ; GFX940-NEXT: ;;#ASMEND
2972 ; GFX940-NEXT: ;;#ASMSTART
2973 ; GFX940-NEXT: ; def s9
2974 ; GFX940-NEXT: ;;#ASMEND
2975 ; GFX940-NEXT: s_lshr_b32 s1, s9, 16
2976 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
2977 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
2978 ; GFX940-NEXT: ;;#ASMSTART
2979 ; GFX940-NEXT: ; use s[8:9]
2980 ; GFX940-NEXT: ;;#ASMEND
2981 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2982 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
2983 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
2984 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 0>
2985 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
2986 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
2990 define void @s_shuffle_v3bf16_v2bf16__3_2_0() {
2991 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_2_0:
2993 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2994 ; GFX900-NEXT: ;;#ASMSTART
2995 ; GFX900-NEXT: ; def s4
2996 ; GFX900-NEXT: ;;#ASMEND
2997 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
2998 ; GFX900-NEXT: ;;#ASMSTART
2999 ; GFX900-NEXT: ; def s9
3000 ; GFX900-NEXT: ;;#ASMEND
3001 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3002 ; GFX900-NEXT: ;;#ASMSTART
3003 ; GFX900-NEXT: ; use s[8:9]
3004 ; GFX900-NEXT: ;;#ASMEND
3005 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3007 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_2_0:
3009 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3010 ; GFX90A-NEXT: ;;#ASMSTART
3011 ; GFX90A-NEXT: ; def s4
3012 ; GFX90A-NEXT: ;;#ASMEND
3013 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
3014 ; GFX90A-NEXT: ;;#ASMSTART
3015 ; GFX90A-NEXT: ; def s9
3016 ; GFX90A-NEXT: ;;#ASMEND
3017 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3018 ; GFX90A-NEXT: ;;#ASMSTART
3019 ; GFX90A-NEXT: ; use s[8:9]
3020 ; GFX90A-NEXT: ;;#ASMEND
3021 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3023 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_0:
3025 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3026 ; GFX940-NEXT: ;;#ASMSTART
3027 ; GFX940-NEXT: ; def s0
3028 ; GFX940-NEXT: ;;#ASMEND
3029 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
3030 ; GFX940-NEXT: ;;#ASMSTART
3031 ; GFX940-NEXT: ; def s9
3032 ; GFX940-NEXT: ;;#ASMEND
3033 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
3034 ; GFX940-NEXT: ;;#ASMSTART
3035 ; GFX940-NEXT: ; use s[8:9]
3036 ; GFX940-NEXT: ;;#ASMEND
3037 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3038 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3039 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3040 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 0>
3041 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3042 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3046 define void @s_shuffle_v3bf16_v2bf16__u_1_1() {
3047 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__u_1_1:
3049 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3050 ; GFX9-NEXT: ;;#ASMSTART
3051 ; GFX9-NEXT: ; def s8
3052 ; GFX9-NEXT: ;;#ASMEND
3053 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
3054 ; GFX9-NEXT: ;;#ASMSTART
3055 ; GFX9-NEXT: ; use s[8:9]
3056 ; GFX9-NEXT: ;;#ASMEND
3057 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3058 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3059 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 poison, i32 1, i32 1>
3060 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3061 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3065 define void @s_shuffle_v3bf16_v2bf16__0_1_1() {
3066 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__0_1_1:
3068 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3069 ; GFX9-NEXT: ;;#ASMSTART
3070 ; GFX9-NEXT: ; def s8
3071 ; GFX9-NEXT: ;;#ASMEND
3072 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
3073 ; GFX9-NEXT: ;;#ASMSTART
3074 ; GFX9-NEXT: ; use s[8:9]
3075 ; GFX9-NEXT: ;;#ASMEND
3076 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3077 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3078 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 1>
3079 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3080 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3084 define void @s_shuffle_v3bf16_v2bf16__1_1_1() {
3085 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__1_1_1:
3087 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3088 ; GFX900-NEXT: ;;#ASMSTART
3089 ; GFX900-NEXT: ; def s4
3090 ; GFX900-NEXT: ;;#ASMEND
3091 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
3092 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
3093 ; GFX900-NEXT: ;;#ASMSTART
3094 ; GFX900-NEXT: ; use s[8:9]
3095 ; GFX900-NEXT: ;;#ASMEND
3096 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3098 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__1_1_1:
3100 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3101 ; GFX90A-NEXT: ;;#ASMSTART
3102 ; GFX90A-NEXT: ; def s4
3103 ; GFX90A-NEXT: ;;#ASMEND
3104 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
3105 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
3106 ; GFX90A-NEXT: ;;#ASMSTART
3107 ; GFX90A-NEXT: ; use s[8:9]
3108 ; GFX90A-NEXT: ;;#ASMEND
3109 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3111 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_1_1:
3113 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3114 ; GFX940-NEXT: ;;#ASMSTART
3115 ; GFX940-NEXT: ; def s0
3116 ; GFX940-NEXT: ;;#ASMEND
3117 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
3118 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
3119 ; GFX940-NEXT: ;;#ASMSTART
3120 ; GFX940-NEXT: ; use s[8:9]
3121 ; GFX940-NEXT: ;;#ASMEND
3122 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3123 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3124 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
3125 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3126 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3130 define void @s_shuffle_v3bf16_v2bf16__2_1_1() {
3131 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__2_1_1:
3133 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3134 ; GFX9-NEXT: ;;#ASMSTART
3135 ; GFX9-NEXT: ; def s8
3136 ; GFX9-NEXT: ;;#ASMEND
3137 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
3138 ; GFX9-NEXT: ;;#ASMSTART
3139 ; GFX9-NEXT: ; use s[8:9]
3140 ; GFX9-NEXT: ;;#ASMEND
3141 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3142 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3143 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 1, i32 1>
3144 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3145 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3149 define void @s_shuffle_v3bf16_v2bf16__3_1_1() {
3150 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_1_1:
3152 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3153 ; GFX900-NEXT: ;;#ASMSTART
3154 ; GFX900-NEXT: ; def s4
3155 ; GFX900-NEXT: ;;#ASMEND
3156 ; GFX900-NEXT: ;;#ASMSTART
3157 ; GFX900-NEXT: ; def s5
3158 ; GFX900-NEXT: ;;#ASMEND
3159 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
3160 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
3161 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3162 ; GFX900-NEXT: ;;#ASMSTART
3163 ; GFX900-NEXT: ; use s[8:9]
3164 ; GFX900-NEXT: ;;#ASMEND
3165 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3167 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_1_1:
3169 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3170 ; GFX90A-NEXT: ;;#ASMSTART
3171 ; GFX90A-NEXT: ; def s4
3172 ; GFX90A-NEXT: ;;#ASMEND
3173 ; GFX90A-NEXT: ;;#ASMSTART
3174 ; GFX90A-NEXT: ; def s5
3175 ; GFX90A-NEXT: ;;#ASMEND
3176 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
3177 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
3178 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3179 ; GFX90A-NEXT: ;;#ASMSTART
3180 ; GFX90A-NEXT: ; use s[8:9]
3181 ; GFX90A-NEXT: ;;#ASMEND
3182 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3184 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_1:
3186 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3187 ; GFX940-NEXT: ;;#ASMSTART
3188 ; GFX940-NEXT: ; def s0
3189 ; GFX940-NEXT: ;;#ASMEND
3190 ; GFX940-NEXT: ;;#ASMSTART
3191 ; GFX940-NEXT: ; def s1
3192 ; GFX940-NEXT: ;;#ASMEND
3193 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
3194 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
3195 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
3196 ; GFX940-NEXT: ;;#ASMSTART
3197 ; GFX940-NEXT: ; use s[8:9]
3198 ; GFX940-NEXT: ;;#ASMEND
3199 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3200 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3201 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3202 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 1>
3203 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3204 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3208 define void @s_shuffle_v3bf16_v2bf16__3_u_1() {
3209 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_u_1:
3211 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3212 ; GFX900-NEXT: ;;#ASMSTART
3213 ; GFX900-NEXT: ; def s4
3214 ; GFX900-NEXT: ;;#ASMEND
3215 ; GFX900-NEXT: ;;#ASMSTART
3216 ; GFX900-NEXT: ; def s5
3217 ; GFX900-NEXT: ;;#ASMEND
3218 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
3219 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
3220 ; GFX900-NEXT: ;;#ASMSTART
3221 ; GFX900-NEXT: ; use s[8:9]
3222 ; GFX900-NEXT: ;;#ASMEND
3223 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3225 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_u_1:
3227 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3228 ; GFX90A-NEXT: ;;#ASMSTART
3229 ; GFX90A-NEXT: ; def s4
3230 ; GFX90A-NEXT: ;;#ASMEND
3231 ; GFX90A-NEXT: ;;#ASMSTART
3232 ; GFX90A-NEXT: ; def s5
3233 ; GFX90A-NEXT: ;;#ASMEND
3234 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
3235 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
3236 ; GFX90A-NEXT: ;;#ASMSTART
3237 ; GFX90A-NEXT: ; use s[8:9]
3238 ; GFX90A-NEXT: ;;#ASMEND
3239 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3241 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_1:
3243 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3244 ; GFX940-NEXT: ;;#ASMSTART
3245 ; GFX940-NEXT: ; def s0
3246 ; GFX940-NEXT: ;;#ASMEND
3247 ; GFX940-NEXT: ;;#ASMSTART
3248 ; GFX940-NEXT: ; def s1
3249 ; GFX940-NEXT: ;;#ASMEND
3250 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
3251 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
3252 ; GFX940-NEXT: ;;#ASMSTART
3253 ; GFX940-NEXT: ; use s[8:9]
3254 ; GFX940-NEXT: ;;#ASMEND
3255 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3256 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3257 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3258 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 1>
3259 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3260 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3264 define void @s_shuffle_v3bf16_v2bf16__3_0_1() {
3265 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_0_1:
3267 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3268 ; GFX900-NEXT: ;;#ASMSTART
3269 ; GFX900-NEXT: ; def s5
3270 ; GFX900-NEXT: ;;#ASMEND
3271 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
3272 ; GFX900-NEXT: ;;#ASMSTART
3273 ; GFX900-NEXT: ; def s4
3274 ; GFX900-NEXT: ;;#ASMEND
3275 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3276 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
3277 ; GFX900-NEXT: ;;#ASMSTART
3278 ; GFX900-NEXT: ; use s[8:9]
3279 ; GFX900-NEXT: ;;#ASMEND
3280 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3282 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_0_1:
3284 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3285 ; GFX90A-NEXT: ;;#ASMSTART
3286 ; GFX90A-NEXT: ; def s5
3287 ; GFX90A-NEXT: ;;#ASMEND
3288 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
3289 ; GFX90A-NEXT: ;;#ASMSTART
3290 ; GFX90A-NEXT: ; def s4
3291 ; GFX90A-NEXT: ;;#ASMEND
3292 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3293 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
3294 ; GFX90A-NEXT: ;;#ASMSTART
3295 ; GFX90A-NEXT: ; use s[8:9]
3296 ; GFX90A-NEXT: ;;#ASMEND
3297 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3299 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_1:
3301 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3302 ; GFX940-NEXT: ;;#ASMSTART
3303 ; GFX940-NEXT: ; def s1
3304 ; GFX940-NEXT: ;;#ASMEND
3305 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
3306 ; GFX940-NEXT: ;;#ASMSTART
3307 ; GFX940-NEXT: ; def s0
3308 ; GFX940-NEXT: ;;#ASMEND
3309 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
3310 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
3311 ; GFX940-NEXT: ;;#ASMSTART
3312 ; GFX940-NEXT: ; use s[8:9]
3313 ; GFX940-NEXT: ;;#ASMEND
3314 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3315 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3316 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3317 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 1>
3318 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3319 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3323 define void @s_shuffle_v3bf16_v2bf16__3_2_1() {
3324 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_2_1:
3326 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3327 ; GFX900-NEXT: ;;#ASMSTART
3328 ; GFX900-NEXT: ; def s5
3329 ; GFX900-NEXT: ;;#ASMEND
3330 ; GFX900-NEXT: s_lshr_b32 s6, s5, 16
3331 ; GFX900-NEXT: ;;#ASMSTART
3332 ; GFX900-NEXT: ; def s4
3333 ; GFX900-NEXT: ;;#ASMEND
3334 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
3335 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
3336 ; GFX900-NEXT: ;;#ASMSTART
3337 ; GFX900-NEXT: ; use s[8:9]
3338 ; GFX900-NEXT: ;;#ASMEND
3339 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3341 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_2_1:
3343 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3344 ; GFX90A-NEXT: ;;#ASMSTART
3345 ; GFX90A-NEXT: ; def s5
3346 ; GFX90A-NEXT: ;;#ASMEND
3347 ; GFX90A-NEXT: s_lshr_b32 s6, s5, 16
3348 ; GFX90A-NEXT: ;;#ASMSTART
3349 ; GFX90A-NEXT: ; def s4
3350 ; GFX90A-NEXT: ;;#ASMEND
3351 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
3352 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
3353 ; GFX90A-NEXT: ;;#ASMSTART
3354 ; GFX90A-NEXT: ; use s[8:9]
3355 ; GFX90A-NEXT: ;;#ASMEND
3356 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3358 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_1:
3360 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3361 ; GFX940-NEXT: ;;#ASMSTART
3362 ; GFX940-NEXT: ; def s1
3363 ; GFX940-NEXT: ;;#ASMEND
3364 ; GFX940-NEXT: s_lshr_b32 s2, s1, 16
3365 ; GFX940-NEXT: ;;#ASMSTART
3366 ; GFX940-NEXT: ; def s0
3367 ; GFX940-NEXT: ;;#ASMEND
3368 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
3369 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
3370 ; GFX940-NEXT: ;;#ASMSTART
3371 ; GFX940-NEXT: ; use s[8:9]
3372 ; GFX940-NEXT: ;;#ASMEND
3373 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3374 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3375 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3376 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 1>
3377 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3378 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3382 define void @s_shuffle_v3bf16_v2bf16__u_2_2() {
3383 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__u_2_2:
3385 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3386 ; GFX9-NEXT: ;;#ASMSTART
3387 ; GFX9-NEXT: ; use s[8:9]
3388 ; GFX9-NEXT: ;;#ASMEND
3389 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3390 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3391 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 poison, i32 2, i32 2>
3392 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3393 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3397 define void @s_shuffle_v3bf16_v2bf16__0_2_2() {
3398 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__0_2_2:
3400 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3401 ; GFX900-NEXT: ;;#ASMSTART
3402 ; GFX900-NEXT: ; def s8
3403 ; GFX900-NEXT: ;;#ASMEND
3404 ; GFX900-NEXT: ;;#ASMSTART
3405 ; GFX900-NEXT: ; use s[8:9]
3406 ; GFX900-NEXT: ;;#ASMEND
3407 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3409 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__0_2_2:
3411 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3412 ; GFX90A-NEXT: ;;#ASMSTART
3413 ; GFX90A-NEXT: ; def s8
3414 ; GFX90A-NEXT: ;;#ASMEND
3415 ; GFX90A-NEXT: ;;#ASMSTART
3416 ; GFX90A-NEXT: ; use s[8:9]
3417 ; GFX90A-NEXT: ;;#ASMEND
3418 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3420 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__0_2_2:
3422 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3423 ; GFX940-NEXT: ;;#ASMSTART
3424 ; GFX940-NEXT: ; def s8
3425 ; GFX940-NEXT: ;;#ASMEND
3426 ; GFX940-NEXT: s_nop 0
3427 ; GFX940-NEXT: ;;#ASMSTART
3428 ; GFX940-NEXT: ; use s[8:9]
3429 ; GFX940-NEXT: ;;#ASMEND
3430 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3431 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3432 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 0, i32 2, i32 2>
3433 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3434 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3438 define void @s_shuffle_v3bf16_v2bf16__1_2_2() {
3439 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__1_2_2:
3441 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3442 ; GFX900-NEXT: ;;#ASMSTART
3443 ; GFX900-NEXT: ; def s4
3444 ; GFX900-NEXT: ;;#ASMEND
3445 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
3446 ; GFX900-NEXT: ;;#ASMSTART
3447 ; GFX900-NEXT: ; use s[8:9]
3448 ; GFX900-NEXT: ;;#ASMEND
3449 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3451 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__1_2_2:
3453 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3454 ; GFX90A-NEXT: ;;#ASMSTART
3455 ; GFX90A-NEXT: ; def s4
3456 ; GFX90A-NEXT: ;;#ASMEND
3457 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
3458 ; GFX90A-NEXT: ;;#ASMSTART
3459 ; GFX90A-NEXT: ; use s[8:9]
3460 ; GFX90A-NEXT: ;;#ASMEND
3461 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3463 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_2_2:
3465 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3466 ; GFX940-NEXT: ;;#ASMSTART
3467 ; GFX940-NEXT: ; def s0
3468 ; GFX940-NEXT: ;;#ASMEND
3469 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
3470 ; GFX940-NEXT: ;;#ASMSTART
3471 ; GFX940-NEXT: ; use s[8:9]
3472 ; GFX940-NEXT: ;;#ASMEND
3473 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3474 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3475 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 2>
3476 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3477 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3481 define void @s_shuffle_v3bf16_v2bf16__2_2_2() {
3482 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__2_2_2:
3484 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3485 ; GFX9-NEXT: ;;#ASMSTART
3486 ; GFX9-NEXT: ; use s[8:9]
3487 ; GFX9-NEXT: ;;#ASMEND
3488 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3489 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3490 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
3491 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3492 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3496 define void @s_shuffle_v3bf16_v2bf16__3_2_2() {
3497 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_2_2:
3499 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3500 ; GFX900-NEXT: ;;#ASMSTART
3501 ; GFX900-NEXT: ; def s9
3502 ; GFX900-NEXT: ;;#ASMEND
3503 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
3504 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3505 ; GFX900-NEXT: ;;#ASMSTART
3506 ; GFX900-NEXT: ; use s[8:9]
3507 ; GFX900-NEXT: ;;#ASMEND
3508 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3510 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_2_2:
3512 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3513 ; GFX90A-NEXT: ;;#ASMSTART
3514 ; GFX90A-NEXT: ; def s9
3515 ; GFX90A-NEXT: ;;#ASMEND
3516 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
3517 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3518 ; GFX90A-NEXT: ;;#ASMSTART
3519 ; GFX90A-NEXT: ; use s[8:9]
3520 ; GFX90A-NEXT: ;;#ASMEND
3521 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3523 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_2:
3525 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3526 ; GFX940-NEXT: ;;#ASMSTART
3527 ; GFX940-NEXT: ; def s9
3528 ; GFX940-NEXT: ;;#ASMEND
3529 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
3530 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
3531 ; GFX940-NEXT: ;;#ASMSTART
3532 ; GFX940-NEXT: ; use s[8:9]
3533 ; GFX940-NEXT: ;;#ASMEND
3534 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3535 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3536 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3537 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 2>
3538 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3539 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3543 define void @s_shuffle_v3bf16_v2bf16__3_u_2() {
3544 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__3_u_2:
3546 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3547 ; GFX9-NEXT: ;;#ASMSTART
3548 ; GFX9-NEXT: ; def s9
3549 ; GFX9-NEXT: ;;#ASMEND
3550 ; GFX9-NEXT: s_lshr_b32 s8, s9, 16
3551 ; GFX9-NEXT: ;;#ASMSTART
3552 ; GFX9-NEXT: ; use s[8:9]
3553 ; GFX9-NEXT: ;;#ASMEND
3554 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3555 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3556 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3557 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 2>
3558 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3559 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3563 define void @s_shuffle_v3bf16_v2bf16__3_0_2() {
3564 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_0_2:
3566 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3567 ; GFX900-NEXT: ;;#ASMSTART
3568 ; GFX900-NEXT: ; def s9
3569 ; GFX900-NEXT: ;;#ASMEND
3570 ; GFX900-NEXT: s_lshr_b32 s5, s9, 16
3571 ; GFX900-NEXT: ;;#ASMSTART
3572 ; GFX900-NEXT: ; def s4
3573 ; GFX900-NEXT: ;;#ASMEND
3574 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3575 ; GFX900-NEXT: ;;#ASMSTART
3576 ; GFX900-NEXT: ; use s[8:9]
3577 ; GFX900-NEXT: ;;#ASMEND
3578 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3580 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_0_2:
3582 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3583 ; GFX90A-NEXT: ;;#ASMSTART
3584 ; GFX90A-NEXT: ; def s9
3585 ; GFX90A-NEXT: ;;#ASMEND
3586 ; GFX90A-NEXT: s_lshr_b32 s5, s9, 16
3587 ; GFX90A-NEXT: ;;#ASMSTART
3588 ; GFX90A-NEXT: ; def s4
3589 ; GFX90A-NEXT: ;;#ASMEND
3590 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3591 ; GFX90A-NEXT: ;;#ASMSTART
3592 ; GFX90A-NEXT: ; use s[8:9]
3593 ; GFX90A-NEXT: ;;#ASMEND
3594 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3596 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_2:
3598 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3599 ; GFX940-NEXT: ;;#ASMSTART
3600 ; GFX940-NEXT: ; def s9
3601 ; GFX940-NEXT: ;;#ASMEND
3602 ; GFX940-NEXT: s_lshr_b32 s1, s9, 16
3603 ; GFX940-NEXT: ;;#ASMSTART
3604 ; GFX940-NEXT: ; def s0
3605 ; GFX940-NEXT: ;;#ASMEND
3606 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
3607 ; GFX940-NEXT: ;;#ASMSTART
3608 ; GFX940-NEXT: ; use s[8:9]
3609 ; GFX940-NEXT: ;;#ASMEND
3610 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3611 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3612 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3613 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 2>
3614 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3615 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3619 define void @s_shuffle_v3bf16_v2bf16__3_1_2() {
3620 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_1_2:
3622 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3623 ; GFX900-NEXT: ;;#ASMSTART
3624 ; GFX900-NEXT: ; def s4
3625 ; GFX900-NEXT: ;;#ASMEND
3626 ; GFX900-NEXT: ;;#ASMSTART
3627 ; GFX900-NEXT: ; def s9
3628 ; GFX900-NEXT: ;;#ASMEND
3629 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
3630 ; GFX900-NEXT: s_lshr_b32 s5, s9, 16
3631 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3632 ; GFX900-NEXT: ;;#ASMSTART
3633 ; GFX900-NEXT: ; use s[8:9]
3634 ; GFX900-NEXT: ;;#ASMEND
3635 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3637 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_1_2:
3639 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3640 ; GFX90A-NEXT: ;;#ASMSTART
3641 ; GFX90A-NEXT: ; def s4
3642 ; GFX90A-NEXT: ;;#ASMEND
3643 ; GFX90A-NEXT: ;;#ASMSTART
3644 ; GFX90A-NEXT: ; def s9
3645 ; GFX90A-NEXT: ;;#ASMEND
3646 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
3647 ; GFX90A-NEXT: s_lshr_b32 s5, s9, 16
3648 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3649 ; GFX90A-NEXT: ;;#ASMSTART
3650 ; GFX90A-NEXT: ; use s[8:9]
3651 ; GFX90A-NEXT: ;;#ASMEND
3652 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3654 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_2:
3656 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3657 ; GFX940-NEXT: ;;#ASMSTART
3658 ; GFX940-NEXT: ; def s0
3659 ; GFX940-NEXT: ;;#ASMEND
3660 ; GFX940-NEXT: ;;#ASMSTART
3661 ; GFX940-NEXT: ; def s9
3662 ; GFX940-NEXT: ;;#ASMEND
3663 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
3664 ; GFX940-NEXT: s_lshr_b32 s1, s9, 16
3665 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
3666 ; GFX940-NEXT: ;;#ASMSTART
3667 ; GFX940-NEXT: ; use s[8:9]
3668 ; GFX940-NEXT: ;;#ASMEND
3669 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3670 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3671 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3672 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 2>
3673 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3674 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3678 define void @s_shuffle_v3bf16_v2bf16__u_3_3() {
3679 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__u_3_3:
3681 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3682 ; GFX9-NEXT: ;;#ASMSTART
3683 ; GFX9-NEXT: ; def s8
3684 ; GFX9-NEXT: ;;#ASMEND
3685 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
3686 ; GFX9-NEXT: ;;#ASMSTART
3687 ; GFX9-NEXT: ; use s[8:9]
3688 ; GFX9-NEXT: ;;#ASMEND
3689 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3690 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3691 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3692 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 poison, i32 3, i32 3>
3693 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3694 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3698 define void @s_shuffle_v3bf16_v2bf16__0_3_3() {
3699 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__0_3_3:
3701 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3702 ; GFX900-NEXT: ;;#ASMSTART
3703 ; GFX900-NEXT: ; def s5
3704 ; GFX900-NEXT: ;;#ASMEND
3705 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
3706 ; GFX900-NEXT: ;;#ASMSTART
3707 ; GFX900-NEXT: ; def s4
3708 ; GFX900-NEXT: ;;#ASMEND
3709 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3710 ; GFX900-NEXT: ;;#ASMSTART
3711 ; GFX900-NEXT: ; use s[8:9]
3712 ; GFX900-NEXT: ;;#ASMEND
3713 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3715 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__0_3_3:
3717 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3718 ; GFX90A-NEXT: ;;#ASMSTART
3719 ; GFX90A-NEXT: ; def s5
3720 ; GFX90A-NEXT: ;;#ASMEND
3721 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
3722 ; GFX90A-NEXT: ;;#ASMSTART
3723 ; GFX90A-NEXT: ; def s4
3724 ; GFX90A-NEXT: ;;#ASMEND
3725 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3726 ; GFX90A-NEXT: ;;#ASMSTART
3727 ; GFX90A-NEXT: ; use s[8:9]
3728 ; GFX90A-NEXT: ;;#ASMEND
3729 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3731 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__0_3_3:
3733 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3734 ; GFX940-NEXT: ;;#ASMSTART
3735 ; GFX940-NEXT: ; def s1
3736 ; GFX940-NEXT: ;;#ASMEND
3737 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
3738 ; GFX940-NEXT: ;;#ASMSTART
3739 ; GFX940-NEXT: ; def s0
3740 ; GFX940-NEXT: ;;#ASMEND
3741 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
3742 ; GFX940-NEXT: ;;#ASMSTART
3743 ; GFX940-NEXT: ; use s[8:9]
3744 ; GFX940-NEXT: ;;#ASMEND
3745 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3746 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3747 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3748 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 0, i32 3, i32 3>
3749 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3750 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3754 define void @s_shuffle_v3bf16_v2bf16__1_3_3() {
3755 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__1_3_3:
3757 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3758 ; GFX900-NEXT: ;;#ASMSTART
3759 ; GFX900-NEXT: ; def s4
3760 ; GFX900-NEXT: ;;#ASMEND
3761 ; GFX900-NEXT: ;;#ASMSTART
3762 ; GFX900-NEXT: ; def s5
3763 ; GFX900-NEXT: ;;#ASMEND
3764 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
3765 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
3766 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3767 ; GFX900-NEXT: ;;#ASMSTART
3768 ; GFX900-NEXT: ; use s[8:9]
3769 ; GFX900-NEXT: ;;#ASMEND
3770 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3772 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__1_3_3:
3774 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3775 ; GFX90A-NEXT: ;;#ASMSTART
3776 ; GFX90A-NEXT: ; def s4
3777 ; GFX90A-NEXT: ;;#ASMEND
3778 ; GFX90A-NEXT: ;;#ASMSTART
3779 ; GFX90A-NEXT: ; def s5
3780 ; GFX90A-NEXT: ;;#ASMEND
3781 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
3782 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
3783 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
3784 ; GFX90A-NEXT: ;;#ASMSTART
3785 ; GFX90A-NEXT: ; use s[8:9]
3786 ; GFX90A-NEXT: ;;#ASMEND
3787 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3789 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_3_3:
3791 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3792 ; GFX940-NEXT: ;;#ASMSTART
3793 ; GFX940-NEXT: ; def s0
3794 ; GFX940-NEXT: ;;#ASMEND
3795 ; GFX940-NEXT: ;;#ASMSTART
3796 ; GFX940-NEXT: ; def s1
3797 ; GFX940-NEXT: ;;#ASMEND
3798 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
3799 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
3800 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
3801 ; GFX940-NEXT: ;;#ASMSTART
3802 ; GFX940-NEXT: ; use s[8:9]
3803 ; GFX940-NEXT: ;;#ASMEND
3804 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3805 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3806 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3807 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 1, i32 3, i32 3>
3808 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3809 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3813 define void @s_shuffle_v3bf16_v2bf16__2_3_3() {
3814 ; GFX9-LABEL: s_shuffle_v3bf16_v2bf16__2_3_3:
3816 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3817 ; GFX9-NEXT: ;;#ASMSTART
3818 ; GFX9-NEXT: ; def s8
3819 ; GFX9-NEXT: ;;#ASMEND
3820 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
3821 ; GFX9-NEXT: ;;#ASMSTART
3822 ; GFX9-NEXT: ; use s[8:9]
3823 ; GFX9-NEXT: ;;#ASMEND
3824 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3825 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3826 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3827 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 2, i32 3, i32 3>
3828 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3829 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3833 define void @s_shuffle_v3bf16_v2bf16__3_u_3() {
3834 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_u_3:
3836 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3837 ; GFX900-NEXT: ;;#ASMSTART
3838 ; GFX900-NEXT: ; def s4
3839 ; GFX900-NEXT: ;;#ASMEND
3840 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
3841 ; GFX900-NEXT: s_mov_b32 s9, s8
3842 ; GFX900-NEXT: ;;#ASMSTART
3843 ; GFX900-NEXT: ; use s[8:9]
3844 ; GFX900-NEXT: ;;#ASMEND
3845 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3847 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_u_3:
3849 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3850 ; GFX90A-NEXT: ;;#ASMSTART
3851 ; GFX90A-NEXT: ; def s4
3852 ; GFX90A-NEXT: ;;#ASMEND
3853 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
3854 ; GFX90A-NEXT: s_mov_b32 s9, s8
3855 ; GFX90A-NEXT: ;;#ASMSTART
3856 ; GFX90A-NEXT: ; use s[8:9]
3857 ; GFX90A-NEXT: ;;#ASMEND
3858 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3860 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_3:
3862 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3863 ; GFX940-NEXT: ;;#ASMSTART
3864 ; GFX940-NEXT: ; def s0
3865 ; GFX940-NEXT: ;;#ASMEND
3866 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
3867 ; GFX940-NEXT: s_mov_b32 s9, s8
3868 ; GFX940-NEXT: ;;#ASMSTART
3869 ; GFX940-NEXT: ; use s[8:9]
3870 ; GFX940-NEXT: ;;#ASMEND
3871 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3872 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3873 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3874 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 poison, i32 3>
3875 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3876 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3880 define void @s_shuffle_v3bf16_v2bf16__3_0_3() {
3881 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_0_3:
3883 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3884 ; GFX900-NEXT: ;;#ASMSTART
3885 ; GFX900-NEXT: ; def s5
3886 ; GFX900-NEXT: ;;#ASMEND
3887 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
3888 ; GFX900-NEXT: ;;#ASMSTART
3889 ; GFX900-NEXT: ; def s4
3890 ; GFX900-NEXT: ;;#ASMEND
3891 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
3892 ; GFX900-NEXT: ;;#ASMSTART
3893 ; GFX900-NEXT: ; use s[8:9]
3894 ; GFX900-NEXT: ;;#ASMEND
3895 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3897 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_0_3:
3899 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3900 ; GFX90A-NEXT: ;;#ASMSTART
3901 ; GFX90A-NEXT: ; def s5
3902 ; GFX90A-NEXT: ;;#ASMEND
3903 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
3904 ; GFX90A-NEXT: ;;#ASMSTART
3905 ; GFX90A-NEXT: ; def s4
3906 ; GFX90A-NEXT: ;;#ASMEND
3907 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
3908 ; GFX90A-NEXT: ;;#ASMSTART
3909 ; GFX90A-NEXT: ; use s[8:9]
3910 ; GFX90A-NEXT: ;;#ASMEND
3911 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3913 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_3:
3915 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3916 ; GFX940-NEXT: ;;#ASMSTART
3917 ; GFX940-NEXT: ; def s1
3918 ; GFX940-NEXT: ;;#ASMEND
3919 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
3920 ; GFX940-NEXT: ;;#ASMSTART
3921 ; GFX940-NEXT: ; def s0
3922 ; GFX940-NEXT: ;;#ASMEND
3923 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
3924 ; GFX940-NEXT: ;;#ASMSTART
3925 ; GFX940-NEXT: ; use s[8:9]
3926 ; GFX940-NEXT: ;;#ASMEND
3927 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3928 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3929 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3930 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 0, i32 3>
3931 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3932 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3936 define void @s_shuffle_v3bf16_v2bf16__3_1_3() {
3937 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_1_3:
3939 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3940 ; GFX900-NEXT: ;;#ASMSTART
3941 ; GFX900-NEXT: ; def s4
3942 ; GFX900-NEXT: ;;#ASMEND
3943 ; GFX900-NEXT: ;;#ASMSTART
3944 ; GFX900-NEXT: ; def s5
3945 ; GFX900-NEXT: ;;#ASMEND
3946 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
3947 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
3948 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
3949 ; GFX900-NEXT: ;;#ASMSTART
3950 ; GFX900-NEXT: ; use s[8:9]
3951 ; GFX900-NEXT: ;;#ASMEND
3952 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3954 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_1_3:
3956 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3957 ; GFX90A-NEXT: ;;#ASMSTART
3958 ; GFX90A-NEXT: ; def s4
3959 ; GFX90A-NEXT: ;;#ASMEND
3960 ; GFX90A-NEXT: ;;#ASMSTART
3961 ; GFX90A-NEXT: ; def s5
3962 ; GFX90A-NEXT: ;;#ASMEND
3963 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
3964 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
3965 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
3966 ; GFX90A-NEXT: ;;#ASMSTART
3967 ; GFX90A-NEXT: ; use s[8:9]
3968 ; GFX90A-NEXT: ;;#ASMEND
3969 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3971 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_3:
3973 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3974 ; GFX940-NEXT: ;;#ASMSTART
3975 ; GFX940-NEXT: ; def s0
3976 ; GFX940-NEXT: ;;#ASMEND
3977 ; GFX940-NEXT: ;;#ASMSTART
3978 ; GFX940-NEXT: ; def s1
3979 ; GFX940-NEXT: ;;#ASMEND
3980 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
3981 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
3982 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
3983 ; GFX940-NEXT: ;;#ASMSTART
3984 ; GFX940-NEXT: ; use s[8:9]
3985 ; GFX940-NEXT: ;;#ASMEND
3986 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3987 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
3988 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
3989 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 1, i32 3>
3990 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
3991 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
3995 define void @s_shuffle_v3bf16_v2bf16__3_2_3() {
3996 ; GFX900-LABEL: s_shuffle_v3bf16_v2bf16__3_2_3:
3998 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3999 ; GFX900-NEXT: ;;#ASMSTART
4000 ; GFX900-NEXT: ; def s4
4001 ; GFX900-NEXT: ;;#ASMEND
4002 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
4003 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
4004 ; GFX900-NEXT: ;;#ASMSTART
4005 ; GFX900-NEXT: ; use s[8:9]
4006 ; GFX900-NEXT: ;;#ASMEND
4007 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4009 ; GFX90A-LABEL: s_shuffle_v3bf16_v2bf16__3_2_3:
4011 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4012 ; GFX90A-NEXT: ;;#ASMSTART
4013 ; GFX90A-NEXT: ; def s4
4014 ; GFX90A-NEXT: ;;#ASMEND
4015 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
4016 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
4017 ; GFX90A-NEXT: ;;#ASMSTART
4018 ; GFX90A-NEXT: ; use s[8:9]
4019 ; GFX90A-NEXT: ;;#ASMEND
4020 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4022 ; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_3:
4024 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4025 ; GFX940-NEXT: ;;#ASMSTART
4026 ; GFX940-NEXT: ; def s0
4027 ; GFX940-NEXT: ;;#ASMEND
4028 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
4029 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
4030 ; GFX940-NEXT: ;;#ASMSTART
4031 ; GFX940-NEXT: ; use s[8:9]
4032 ; GFX940-NEXT: ;;#ASMEND
4033 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4034 %vec0 = call <2 x bfloat> asm "; def $0", "=s"()
4035 %vec1 = call <2 x bfloat> asm "; def $0", "=s"()
4036 %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> <i32 3, i32 2, i32 3>
4037 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4038 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
4041 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
4042 ; GFX90APLUS: {{.*}}