1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v3bf16_v3bf16__u_u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v3bf16_v3bf16__u_u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
13 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
14 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> poison
15 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
19 define void @v_shuffle_v3bf16_v3bf16__0_u_u(ptr addrspace(1) inreg %ptr) {
20 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__0_u_u:
22 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
24 ; GFX900-NEXT: ;;#ASMSTART
25 ; GFX900-NEXT: ; def v[0:1]
26 ; GFX900-NEXT: ;;#ASMEND
27 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
28 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
29 ; GFX900-NEXT: s_waitcnt vmcnt(0)
30 ; GFX900-NEXT: s_setpc_b64 s[30:31]
32 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__0_u_u:
34 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
36 ; GFX90A-NEXT: ;;#ASMSTART
37 ; GFX90A-NEXT: ; def v[0:1]
38 ; GFX90A-NEXT: ;;#ASMEND
39 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
40 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
41 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
42 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
44 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_u_u:
46 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
48 ; GFX940-NEXT: ;;#ASMSTART
49 ; GFX940-NEXT: ; def v[0:1]
50 ; GFX940-NEXT: ;;#ASMEND
51 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
52 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
53 ; GFX940-NEXT: s_waitcnt vmcnt(0)
54 ; GFX940-NEXT: s_setpc_b64 s[30:31]
55 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
56 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
57 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
58 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
62 define void @v_shuffle_v3bf16_v3bf16__1_u_u(ptr addrspace(1) inreg %ptr) {
63 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__1_u_u:
65 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; GFX900-NEXT: ;;#ASMSTART
67 ; GFX900-NEXT: ; def v[0:1]
68 ; GFX900-NEXT: ;;#ASMEND
69 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
70 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
71 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
72 ; GFX900-NEXT: s_waitcnt vmcnt(0)
73 ; GFX900-NEXT: s_setpc_b64 s[30:31]
75 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__1_u_u:
77 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78 ; GFX90A-NEXT: ;;#ASMSTART
79 ; GFX90A-NEXT: ; def v[0:1]
80 ; GFX90A-NEXT: ;;#ASMEND
81 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
82 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
83 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
84 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
85 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
87 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_u_u:
89 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX940-NEXT: ;;#ASMSTART
91 ; GFX940-NEXT: ; def v[0:1]
92 ; GFX940-NEXT: ;;#ASMEND
93 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
94 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
95 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
96 ; GFX940-NEXT: s_waitcnt vmcnt(0)
97 ; GFX940-NEXT: s_setpc_b64 s[30:31]
98 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
99 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
100 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
101 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
105 define void @v_shuffle_v3bf16_v3bf16__2_u_u(ptr addrspace(1) inreg %ptr) {
106 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__2_u_u:
108 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
110 ; GFX900-NEXT: ;;#ASMSTART
111 ; GFX900-NEXT: ; def v[0:1]
112 ; GFX900-NEXT: ;;#ASMEND
113 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
114 ; GFX900-NEXT: s_waitcnt vmcnt(0)
115 ; GFX900-NEXT: s_setpc_b64 s[30:31]
117 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__2_u_u:
119 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
121 ; GFX90A-NEXT: ;;#ASMSTART
122 ; GFX90A-NEXT: ; def v[0:1]
123 ; GFX90A-NEXT: ;;#ASMEND
124 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
125 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
126 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
128 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_u_u:
130 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
132 ; GFX940-NEXT: ;;#ASMSTART
133 ; GFX940-NEXT: ; def v[0:1]
134 ; GFX940-NEXT: ;;#ASMEND
135 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
136 ; GFX940-NEXT: s_waitcnt vmcnt(0)
137 ; GFX940-NEXT: s_setpc_b64 s[30:31]
138 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
139 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
140 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
141 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
145 define void @v_shuffle_v3bf16_v3bf16__3_u_u(ptr addrspace(1) inreg %ptr) {
146 ; GFX9-LABEL: v_shuffle_v3bf16_v3bf16__3_u_u:
148 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149 ; GFX9-NEXT: s_setpc_b64 s[30:31]
150 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
151 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
152 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
153 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
157 define void @v_shuffle_v3bf16_v3bf16__4_u_u(ptr addrspace(1) inreg %ptr) {
158 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__4_u_u:
160 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161 ; GFX900-NEXT: ;;#ASMSTART
162 ; GFX900-NEXT: ; def v[0:1]
163 ; GFX900-NEXT: ;;#ASMEND
164 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
165 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
166 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
167 ; GFX900-NEXT: s_waitcnt vmcnt(0)
168 ; GFX900-NEXT: s_setpc_b64 s[30:31]
170 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__4_u_u:
172 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173 ; GFX90A-NEXT: ;;#ASMSTART
174 ; GFX90A-NEXT: ; def v[0:1]
175 ; GFX90A-NEXT: ;;#ASMEND
176 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
177 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
178 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
179 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
180 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
182 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_u_u:
184 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; GFX940-NEXT: ;;#ASMSTART
186 ; GFX940-NEXT: ; def v[0:1]
187 ; GFX940-NEXT: ;;#ASMEND
188 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
189 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
190 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
191 ; GFX940-NEXT: s_waitcnt vmcnt(0)
192 ; GFX940-NEXT: s_setpc_b64 s[30:31]
193 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
194 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
195 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
196 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
197 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 poison, i32 poison>
198 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
202 define void @v_shuffle_v3bf16_v3bf16__5_u_u(ptr addrspace(1) inreg %ptr) {
203 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_u:
205 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
207 ; GFX900-NEXT: ;;#ASMSTART
208 ; GFX900-NEXT: ; def v[0:1]
209 ; GFX900-NEXT: ;;#ASMEND
210 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
211 ; GFX900-NEXT: s_waitcnt vmcnt(0)
212 ; GFX900-NEXT: s_setpc_b64 s[30:31]
214 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_u:
216 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
218 ; GFX90A-NEXT: ;;#ASMSTART
219 ; GFX90A-NEXT: ; def v[0:1]
220 ; GFX90A-NEXT: ;;#ASMEND
221 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
222 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
223 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
225 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_u:
227 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
229 ; GFX940-NEXT: ;;#ASMSTART
230 ; GFX940-NEXT: ; def v[0:1]
231 ; GFX940-NEXT: ;;#ASMEND
232 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
233 ; GFX940-NEXT: s_waitcnt vmcnt(0)
234 ; GFX940-NEXT: s_setpc_b64 s[30:31]
235 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
236 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
237 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
238 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
239 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 poison>
240 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
244 define void @v_shuffle_v3bf16_v3bf16__5_0_u(ptr addrspace(1) inreg %ptr) {
245 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_0_u:
247 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248 ; GFX900-NEXT: ;;#ASMSTART
249 ; GFX900-NEXT: ; def v[0:1]
250 ; GFX900-NEXT: ;;#ASMEND
251 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
252 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
253 ; GFX900-NEXT: ;;#ASMSTART
254 ; GFX900-NEXT: ; def v[1:2]
255 ; GFX900-NEXT: ;;#ASMEND
256 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
257 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
258 ; GFX900-NEXT: s_waitcnt vmcnt(0)
259 ; GFX900-NEXT: s_setpc_b64 s[30:31]
261 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_0_u:
263 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX90A-NEXT: ;;#ASMSTART
265 ; GFX90A-NEXT: ; def v[0:1]
266 ; GFX90A-NEXT: ;;#ASMEND
267 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
268 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
269 ; GFX90A-NEXT: ;;#ASMSTART
270 ; GFX90A-NEXT: ; def v[2:3]
271 ; GFX90A-NEXT: ;;#ASMEND
272 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
273 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
274 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
275 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
277 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_u:
279 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX940-NEXT: ;;#ASMSTART
281 ; GFX940-NEXT: ; def v[0:1]
282 ; GFX940-NEXT: ;;#ASMEND
283 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
284 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
285 ; GFX940-NEXT: ;;#ASMSTART
286 ; GFX940-NEXT: ; def v[2:3]
287 ; GFX940-NEXT: ;;#ASMEND
288 ; GFX940-NEXT: s_nop 0
289 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
290 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
291 ; GFX940-NEXT: s_waitcnt vmcnt(0)
292 ; GFX940-NEXT: s_setpc_b64 s[30:31]
293 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
294 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
295 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
296 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
297 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 poison>
298 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
302 define void @v_shuffle_v3bf16_v3bf16__5_1_u(ptr addrspace(1) inreg %ptr) {
303 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_1_u:
305 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306 ; GFX900-NEXT: ;;#ASMSTART
307 ; GFX900-NEXT: ; def v[0:1]
308 ; GFX900-NEXT: ;;#ASMEND
309 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
310 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
311 ; GFX900-NEXT: ;;#ASMSTART
312 ; GFX900-NEXT: ; def v[1:2]
313 ; GFX900-NEXT: ;;#ASMEND
314 ; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
315 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
316 ; GFX900-NEXT: s_waitcnt vmcnt(0)
317 ; GFX900-NEXT: s_setpc_b64 s[30:31]
319 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_1_u:
321 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX90A-NEXT: ;;#ASMSTART
323 ; GFX90A-NEXT: ; def v[0:1]
324 ; GFX90A-NEXT: ;;#ASMEND
325 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
326 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
327 ; GFX90A-NEXT: ;;#ASMSTART
328 ; GFX90A-NEXT: ; def v[2:3]
329 ; GFX90A-NEXT: ;;#ASMEND
330 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0
331 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
332 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
333 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
335 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_u:
337 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338 ; GFX940-NEXT: ;;#ASMSTART
339 ; GFX940-NEXT: ; def v[0:1]
340 ; GFX940-NEXT: ;;#ASMEND
341 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
342 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
343 ; GFX940-NEXT: ;;#ASMSTART
344 ; GFX940-NEXT: ; def v[2:3]
345 ; GFX940-NEXT: ;;#ASMEND
346 ; GFX940-NEXT: s_nop 0
347 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0
348 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
349 ; GFX940-NEXT: s_waitcnt vmcnt(0)
350 ; GFX940-NEXT: s_setpc_b64 s[30:31]
351 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
352 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
353 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
354 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
355 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 poison>
356 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
360 define void @v_shuffle_v3bf16_v3bf16__5_2_u(ptr addrspace(1) inreg %ptr) {
361 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_2_u:
363 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX900-NEXT: ;;#ASMSTART
365 ; GFX900-NEXT: ; def v[0:1]
366 ; GFX900-NEXT: ;;#ASMEND
367 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
368 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
369 ; GFX900-NEXT: ;;#ASMSTART
370 ; GFX900-NEXT: ; def v[2:3]
371 ; GFX900-NEXT: ;;#ASMEND
372 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
373 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
374 ; GFX900-NEXT: s_waitcnt vmcnt(0)
375 ; GFX900-NEXT: s_setpc_b64 s[30:31]
377 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_2_u:
379 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380 ; GFX90A-NEXT: ;;#ASMSTART
381 ; GFX90A-NEXT: ; def v[0:1]
382 ; GFX90A-NEXT: ;;#ASMEND
383 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
384 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
385 ; GFX90A-NEXT: ;;#ASMSTART
386 ; GFX90A-NEXT: ; def v[2:3]
387 ; GFX90A-NEXT: ;;#ASMEND
388 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
389 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
390 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
391 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
393 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_u:
395 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; GFX940-NEXT: ;;#ASMSTART
397 ; GFX940-NEXT: ; def v[0:1]
398 ; GFX940-NEXT: ;;#ASMEND
399 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
400 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
401 ; GFX940-NEXT: ;;#ASMSTART
402 ; GFX940-NEXT: ; def v[2:3]
403 ; GFX940-NEXT: ;;#ASMEND
404 ; GFX940-NEXT: s_nop 0
405 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
406 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
407 ; GFX940-NEXT: s_waitcnt vmcnt(0)
408 ; GFX940-NEXT: s_setpc_b64 s[30:31]
409 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
410 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
411 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
412 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
413 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 poison>
414 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
418 define void @v_shuffle_v3bf16_v3bf16__5_3_u(ptr addrspace(1) inreg %ptr) {
419 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_3_u:
421 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422 ; GFX900-NEXT: ;;#ASMSTART
423 ; GFX900-NEXT: ; def v[0:1]
424 ; GFX900-NEXT: ;;#ASMEND
425 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
426 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
427 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
428 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
429 ; GFX900-NEXT: s_waitcnt vmcnt(0)
430 ; GFX900-NEXT: s_setpc_b64 s[30:31]
432 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_3_u:
434 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435 ; GFX90A-NEXT: ;;#ASMSTART
436 ; GFX90A-NEXT: ; def v[0:1]
437 ; GFX90A-NEXT: ;;#ASMEND
438 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
439 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
440 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
441 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
442 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
443 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
445 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_u:
447 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448 ; GFX940-NEXT: ;;#ASMSTART
449 ; GFX940-NEXT: ; def v[0:1]
450 ; GFX940-NEXT: ;;#ASMEND
451 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
452 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
453 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
454 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
455 ; GFX940-NEXT: s_waitcnt vmcnt(0)
456 ; GFX940-NEXT: s_setpc_b64 s[30:31]
457 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
458 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
459 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
460 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
461 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 poison>
462 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
466 define void @v_shuffle_v3bf16_v3bf16__5_4_u(ptr addrspace(1) inreg %ptr) {
467 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_4_u:
469 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470 ; GFX900-NEXT: ;;#ASMSTART
471 ; GFX900-NEXT: ; def v[0:1]
472 ; GFX900-NEXT: ;;#ASMEND
473 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
474 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
475 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0
476 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
477 ; GFX900-NEXT: s_waitcnt vmcnt(0)
478 ; GFX900-NEXT: s_setpc_b64 s[30:31]
480 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_4_u:
482 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483 ; GFX90A-NEXT: ;;#ASMSTART
484 ; GFX90A-NEXT: ; def v[0:1]
485 ; GFX90A-NEXT: ;;#ASMEND
486 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
487 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
488 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0
489 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
490 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
491 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
493 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_u:
495 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496 ; GFX940-NEXT: ;;#ASMSTART
497 ; GFX940-NEXT: ; def v[0:1]
498 ; GFX940-NEXT: ;;#ASMEND
499 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
500 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
501 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0
502 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
503 ; GFX940-NEXT: s_waitcnt vmcnt(0)
504 ; GFX940-NEXT: s_setpc_b64 s[30:31]
505 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
506 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
507 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
508 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
509 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 poison>
510 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
514 define void @v_shuffle_v3bf16_v3bf16__5_5_u(ptr addrspace(1) inreg %ptr) {
515 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_5_u:
517 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518 ; GFX900-NEXT: ;;#ASMSTART
519 ; GFX900-NEXT: ; def v[0:1]
520 ; GFX900-NEXT: ;;#ASMEND
521 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
522 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
523 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
524 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
525 ; GFX900-NEXT: s_waitcnt vmcnt(0)
526 ; GFX900-NEXT: s_setpc_b64 s[30:31]
528 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_5_u:
530 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531 ; GFX90A-NEXT: ;;#ASMSTART
532 ; GFX90A-NEXT: ; def v[0:1]
533 ; GFX90A-NEXT: ;;#ASMEND
534 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
535 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
536 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
537 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
538 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
539 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
541 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_u:
543 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544 ; GFX940-NEXT: ;;#ASMSTART
545 ; GFX940-NEXT: ; def v[0:1]
546 ; GFX940-NEXT: ;;#ASMEND
547 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
548 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
549 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
550 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
551 ; GFX940-NEXT: s_waitcnt vmcnt(0)
552 ; GFX940-NEXT: s_setpc_b64 s[30:31]
553 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
554 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
555 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
556 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
557 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 poison>
558 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
562 define void @v_shuffle_v3bf16_v3bf16__5_5_0(ptr addrspace(1) inreg %ptr) {
563 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_5_0:
565 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX900-NEXT: ;;#ASMSTART
567 ; GFX900-NEXT: ; def v[0:1]
568 ; GFX900-NEXT: ;;#ASMEND
569 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
570 ; GFX900-NEXT: ;;#ASMSTART
571 ; GFX900-NEXT: ; def v[1:2]
572 ; GFX900-NEXT: ;;#ASMEND
573 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
574 ; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4
575 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
576 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
577 ; GFX900-NEXT: s_waitcnt vmcnt(0)
578 ; GFX900-NEXT: s_setpc_b64 s[30:31]
580 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_5_0:
582 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
584 ; GFX90A-NEXT: ;;#ASMSTART
585 ; GFX90A-NEXT: ; def v[0:1]
586 ; GFX90A-NEXT: ;;#ASMEND
587 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
588 ; GFX90A-NEXT: ;;#ASMSTART
589 ; GFX90A-NEXT: ; def v[2:3]
590 ; GFX90A-NEXT: ;;#ASMEND
591 ; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4
592 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
593 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
594 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
595 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
597 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_0:
599 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
601 ; GFX940-NEXT: ;;#ASMSTART
602 ; GFX940-NEXT: ; def v[0:1]
603 ; GFX940-NEXT: ;;#ASMEND
604 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
605 ; GFX940-NEXT: ;;#ASMSTART
606 ; GFX940-NEXT: ; def v[2:3]
607 ; GFX940-NEXT: ;;#ASMEND
608 ; GFX940-NEXT: s_nop 0
609 ; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2
610 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
611 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
612 ; GFX940-NEXT: s_waitcnt vmcnt(0)
613 ; GFX940-NEXT: s_setpc_b64 s[30:31]
614 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
615 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
616 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
617 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
618 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 0>
619 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
623 define void @v_shuffle_v3bf16_v3bf16__5_5_1(ptr addrspace(1) inreg %ptr) {
624 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_5_1:
626 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627 ; GFX900-NEXT: ;;#ASMSTART
628 ; GFX900-NEXT: ; def v[0:1]
629 ; GFX900-NEXT: ;;#ASMEND
630 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
631 ; GFX900-NEXT: ;;#ASMSTART
632 ; GFX900-NEXT: ; def v[1:2]
633 ; GFX900-NEXT: ;;#ASMEND
634 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
635 ; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4
636 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
637 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
638 ; GFX900-NEXT: s_waitcnt vmcnt(0)
639 ; GFX900-NEXT: s_setpc_b64 s[30:31]
641 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_5_1:
643 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
645 ; GFX90A-NEXT: ;;#ASMSTART
646 ; GFX90A-NEXT: ; def v[0:1]
647 ; GFX90A-NEXT: ;;#ASMEND
648 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
649 ; GFX90A-NEXT: ;;#ASMSTART
650 ; GFX90A-NEXT: ; def v[2:3]
651 ; GFX90A-NEXT: ;;#ASMEND
652 ; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4
653 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
654 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
655 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
656 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
658 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_1:
660 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
661 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
662 ; GFX940-NEXT: ;;#ASMSTART
663 ; GFX940-NEXT: ; def v[0:1]
664 ; GFX940-NEXT: ;;#ASMEND
665 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
666 ; GFX940-NEXT: ;;#ASMSTART
667 ; GFX940-NEXT: ; def v[2:3]
668 ; GFX940-NEXT: ;;#ASMEND
669 ; GFX940-NEXT: s_nop 0
670 ; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2
671 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
672 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
673 ; GFX940-NEXT: s_waitcnt vmcnt(0)
674 ; GFX940-NEXT: s_setpc_b64 s[30:31]
675 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
676 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
677 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
678 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
679 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 1>
680 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
684 define void @v_shuffle_v3bf16_v3bf16__5_5_2(ptr addrspace(1) inreg %ptr) {
685 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_5_2:
687 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
689 ; GFX900-NEXT: ;;#ASMSTART
690 ; GFX900-NEXT: ; def v[0:1]
691 ; GFX900-NEXT: ;;#ASMEND
692 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
693 ; GFX900-NEXT: ;;#ASMSTART
694 ; GFX900-NEXT: ; def v[2:3]
695 ; GFX900-NEXT: ;;#ASMEND
696 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
697 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
698 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
699 ; GFX900-NEXT: s_waitcnt vmcnt(0)
700 ; GFX900-NEXT: s_setpc_b64 s[30:31]
702 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_5_2:
704 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
706 ; GFX90A-NEXT: ;;#ASMSTART
707 ; GFX90A-NEXT: ; def v[0:1]
708 ; GFX90A-NEXT: ;;#ASMEND
709 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
710 ; GFX90A-NEXT: ;;#ASMSTART
711 ; GFX90A-NEXT: ; def v[2:3]
712 ; GFX90A-NEXT: ;;#ASMEND
713 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
714 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
715 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
716 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
717 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
719 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_2:
721 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
723 ; GFX940-NEXT: ;;#ASMSTART
724 ; GFX940-NEXT: ; def v[0:1]
725 ; GFX940-NEXT: ;;#ASMEND
726 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
727 ; GFX940-NEXT: ;;#ASMSTART
728 ; GFX940-NEXT: ; def v[2:3]
729 ; GFX940-NEXT: ;;#ASMEND
730 ; GFX940-NEXT: s_nop 0
731 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
732 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
733 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
734 ; GFX940-NEXT: s_waitcnt vmcnt(0)
735 ; GFX940-NEXT: s_setpc_b64 s[30:31]
736 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
737 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
738 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
739 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
740 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 2>
741 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
745 define void @v_shuffle_v3bf16_v3bf16__5_5_3(ptr addrspace(1) inreg %ptr) {
746 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_5_3:
748 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
749 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
750 ; GFX900-NEXT: ;;#ASMSTART
751 ; GFX900-NEXT: ; def v[0:1]
752 ; GFX900-NEXT: ;;#ASMEND
753 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
754 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
755 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
756 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
757 ; GFX900-NEXT: s_waitcnt vmcnt(0)
758 ; GFX900-NEXT: s_setpc_b64 s[30:31]
760 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_5_3:
762 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
764 ; GFX90A-NEXT: ;;#ASMSTART
765 ; GFX90A-NEXT: ; def v[0:1]
766 ; GFX90A-NEXT: ;;#ASMEND
767 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
768 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
769 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
770 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
771 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
772 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
774 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_3:
776 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
778 ; GFX940-NEXT: ;;#ASMSTART
779 ; GFX940-NEXT: ; def v[0:1]
780 ; GFX940-NEXT: ;;#ASMEND
781 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
782 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
783 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
784 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
785 ; GFX940-NEXT: s_waitcnt vmcnt(0)
786 ; GFX940-NEXT: s_setpc_b64 s[30:31]
787 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
788 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
789 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
790 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
791 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 3>
792 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
796 define void @v_shuffle_v3bf16_v3bf16__5_5_4(ptr addrspace(1) inreg %ptr) {
797 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_5_4:
799 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
800 ; GFX900-NEXT: ;;#ASMSTART
801 ; GFX900-NEXT: ; def v[0:1]
802 ; GFX900-NEXT: ;;#ASMEND
803 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
804 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
805 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
806 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
807 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
808 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
809 ; GFX900-NEXT: s_waitcnt vmcnt(0)
810 ; GFX900-NEXT: s_setpc_b64 s[30:31]
812 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_5_4:
814 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815 ; GFX90A-NEXT: ;;#ASMSTART
816 ; GFX90A-NEXT: ; def v[0:1]
817 ; GFX90A-NEXT: ;;#ASMEND
818 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
819 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
820 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
821 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
822 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
823 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
824 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
825 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
827 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_4:
829 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830 ; GFX940-NEXT: ;;#ASMSTART
831 ; GFX940-NEXT: ; def v[0:1]
832 ; GFX940-NEXT: ;;#ASMEND
833 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
834 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
835 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
836 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
837 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
838 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
839 ; GFX940-NEXT: s_waitcnt vmcnt(0)
840 ; GFX940-NEXT: s_setpc_b64 s[30:31]
841 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
842 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
843 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
844 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
845 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 4>
846 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
850 define void @v_shuffle_v3bf16_v3bf16__5_5_5(ptr addrspace(1) inreg %ptr) {
851 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_5_5:
853 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
854 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
855 ; GFX900-NEXT: ;;#ASMSTART
856 ; GFX900-NEXT: ; def v[0:1]
857 ; GFX900-NEXT: ;;#ASMEND
858 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
859 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
860 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
861 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
862 ; GFX900-NEXT: s_waitcnt vmcnt(0)
863 ; GFX900-NEXT: s_setpc_b64 s[30:31]
865 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_5_5:
867 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
869 ; GFX90A-NEXT: ;;#ASMSTART
870 ; GFX90A-NEXT: ; def v[0:1]
871 ; GFX90A-NEXT: ;;#ASMEND
872 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
873 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
874 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
875 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
876 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
877 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
879 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_5:
881 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
883 ; GFX940-NEXT: ;;#ASMSTART
884 ; GFX940-NEXT: ; def v[0:1]
885 ; GFX940-NEXT: ;;#ASMEND
886 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
887 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
888 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
889 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
890 ; GFX940-NEXT: s_waitcnt vmcnt(0)
891 ; GFX940-NEXT: s_setpc_b64 s[30:31]
892 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
893 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
894 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
895 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
896 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 5>
897 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
901 define void @v_shuffle_v3bf16_v3bf16__u_0_0(ptr addrspace(1) inreg %ptr) {
902 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__u_0_0:
904 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
906 ; GFX900-NEXT: ;;#ASMSTART
907 ; GFX900-NEXT: ; def v[0:1]
908 ; GFX900-NEXT: ;;#ASMEND
909 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0
910 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
911 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
912 ; GFX900-NEXT: s_waitcnt vmcnt(0)
913 ; GFX900-NEXT: s_setpc_b64 s[30:31]
915 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__u_0_0:
917 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
918 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
919 ; GFX90A-NEXT: ;;#ASMSTART
920 ; GFX90A-NEXT: ; def v[0:1]
921 ; GFX90A-NEXT: ;;#ASMEND
922 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0
923 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
924 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
925 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
926 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
928 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_0_0:
930 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
932 ; GFX940-NEXT: ;;#ASMSTART
933 ; GFX940-NEXT: ; def v[0:1]
934 ; GFX940-NEXT: ;;#ASMEND
935 ; GFX940-NEXT: s_nop 0
936 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0
937 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
938 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
939 ; GFX940-NEXT: s_waitcnt vmcnt(0)
940 ; GFX940-NEXT: s_setpc_b64 s[30:31]
941 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
942 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
943 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 0, i32 0>
944 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
948 define void @v_shuffle_v3bf16_v3bf16__0_0_0(ptr addrspace(1) inreg %ptr) {
949 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__0_0_0:
951 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
952 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
953 ; GFX900-NEXT: ;;#ASMSTART
954 ; GFX900-NEXT: ; def v[0:1]
955 ; GFX900-NEXT: ;;#ASMEND
956 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
957 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
958 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
959 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
960 ; GFX900-NEXT: s_waitcnt vmcnt(0)
961 ; GFX900-NEXT: s_setpc_b64 s[30:31]
963 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__0_0_0:
965 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
967 ; GFX90A-NEXT: ;;#ASMSTART
968 ; GFX90A-NEXT: ; def v[0:1]
969 ; GFX90A-NEXT: ;;#ASMEND
970 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
971 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
972 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
973 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
974 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
975 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
977 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_0_0:
979 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
980 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
981 ; GFX940-NEXT: ;;#ASMSTART
982 ; GFX940-NEXT: ; def v[0:1]
983 ; GFX940-NEXT: ;;#ASMEND
984 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
985 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
986 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
987 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
988 ; GFX940-NEXT: s_waitcnt vmcnt(0)
989 ; GFX940-NEXT: s_setpc_b64 s[30:31]
990 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
991 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
992 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> zeroinitializer
993 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
997 define void @v_shuffle_v3bf16_v3bf16__1_0_0(ptr addrspace(1) inreg %ptr) {
998 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__1_0_0:
1000 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1002 ; GFX900-NEXT: ;;#ASMSTART
1003 ; GFX900-NEXT: ; def v[0:1]
1004 ; GFX900-NEXT: ;;#ASMEND
1005 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16
1006 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1007 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1008 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1009 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1011 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__1_0_0:
1013 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1015 ; GFX90A-NEXT: ;;#ASMSTART
1016 ; GFX90A-NEXT: ; def v[0:1]
1017 ; GFX90A-NEXT: ;;#ASMEND
1018 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16
1019 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1020 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1021 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1022 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1024 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_0_0:
1026 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1028 ; GFX940-NEXT: ;;#ASMSTART
1029 ; GFX940-NEXT: ; def v[0:1]
1030 ; GFX940-NEXT: ;;#ASMEND
1031 ; GFX940-NEXT: s_nop 0
1032 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16
1033 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1034 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1035 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1036 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1037 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1038 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1039 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 0, i32 0>
1040 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1044 define void @v_shuffle_v3bf16_v3bf16__2_0_0(ptr addrspace(1) inreg %ptr) {
1045 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__2_0_0:
1047 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1048 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1049 ; GFX900-NEXT: ;;#ASMSTART
1050 ; GFX900-NEXT: ; def v[0:1]
1051 ; GFX900-NEXT: ;;#ASMEND
1052 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1053 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
1054 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1055 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1056 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1057 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1059 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__2_0_0:
1061 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1063 ; GFX90A-NEXT: ;;#ASMSTART
1064 ; GFX90A-NEXT: ; def v[0:1]
1065 ; GFX90A-NEXT: ;;#ASMEND
1066 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1067 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
1068 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1069 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1070 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1071 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1073 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_0_0:
1075 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1076 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1077 ; GFX940-NEXT: ;;#ASMSTART
1078 ; GFX940-NEXT: ; def v[0:1]
1079 ; GFX940-NEXT: ;;#ASMEND
1080 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1081 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
1082 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1083 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1084 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1085 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1086 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1087 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1088 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 0>
1089 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1093 define void @v_shuffle_v3bf16_v3bf16__3_0_0(ptr addrspace(1) inreg %ptr) {
1094 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__3_0_0:
1096 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1097 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1098 ; GFX900-NEXT: ;;#ASMSTART
1099 ; GFX900-NEXT: ; def v[0:1]
1100 ; GFX900-NEXT: ;;#ASMEND
1101 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1102 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1103 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1104 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1105 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1107 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__3_0_0:
1109 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1110 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1111 ; GFX90A-NEXT: ;;#ASMSTART
1112 ; GFX90A-NEXT: ; def v[0:1]
1113 ; GFX90A-NEXT: ;;#ASMEND
1114 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1115 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1116 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1117 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1118 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1120 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_0_0:
1122 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1124 ; GFX940-NEXT: ;;#ASMSTART
1125 ; GFX940-NEXT: ; def v[0:1]
1126 ; GFX940-NEXT: ;;#ASMEND
1127 ; GFX940-NEXT: s_nop 0
1128 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1129 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1130 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1131 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1132 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1133 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1134 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1135 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 0, i32 0>
1136 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1140 define void @v_shuffle_v3bf16_v3bf16__4_0_0(ptr addrspace(1) inreg %ptr) {
1141 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__4_0_0:
1143 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144 ; GFX900-NEXT: ;;#ASMSTART
1145 ; GFX900-NEXT: ; def v[0:1]
1146 ; GFX900-NEXT: ;;#ASMEND
1147 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1148 ; GFX900-NEXT: ;;#ASMSTART
1149 ; GFX900-NEXT: ; def v[1:2]
1150 ; GFX900-NEXT: ;;#ASMEND
1151 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16
1152 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1153 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1154 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1155 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1157 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__4_0_0:
1159 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1160 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1161 ; GFX90A-NEXT: ;;#ASMSTART
1162 ; GFX90A-NEXT: ; def v[0:1]
1163 ; GFX90A-NEXT: ;;#ASMEND
1164 ; GFX90A-NEXT: ;;#ASMSTART
1165 ; GFX90A-NEXT: ; def v[2:3]
1166 ; GFX90A-NEXT: ;;#ASMEND
1167 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v2, 16
1168 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1169 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1170 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1171 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1173 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_0_0:
1175 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1177 ; GFX940-NEXT: ;;#ASMSTART
1178 ; GFX940-NEXT: ; def v[0:1]
1179 ; GFX940-NEXT: ;;#ASMEND
1180 ; GFX940-NEXT: ;;#ASMSTART
1181 ; GFX940-NEXT: ; def v[2:3]
1182 ; GFX940-NEXT: ;;#ASMEND
1183 ; GFX940-NEXT: s_nop 0
1184 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16
1185 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1186 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1187 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1188 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1189 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1190 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1191 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1192 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1193 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 0, i32 0>
1194 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1198 define void @v_shuffle_v3bf16_v3bf16__5_0_0(ptr addrspace(1) inreg %ptr) {
1199 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_0_0:
1201 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202 ; GFX900-NEXT: ;;#ASMSTART
1203 ; GFX900-NEXT: ; def v[0:1]
1204 ; GFX900-NEXT: ;;#ASMEND
1205 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1206 ; GFX900-NEXT: ;;#ASMSTART
1207 ; GFX900-NEXT: ; def v[1:2]
1208 ; GFX900-NEXT: ;;#ASMEND
1209 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1210 ; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4
1211 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1212 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1213 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1214 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1216 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_0_0:
1218 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1220 ; GFX90A-NEXT: ;;#ASMSTART
1221 ; GFX90A-NEXT: ; def v[0:1]
1222 ; GFX90A-NEXT: ;;#ASMEND
1223 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1224 ; GFX90A-NEXT: ;;#ASMSTART
1225 ; GFX90A-NEXT: ; def v[2:3]
1226 ; GFX90A-NEXT: ;;#ASMEND
1227 ; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4
1228 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1229 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1230 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1231 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1233 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_0:
1235 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1237 ; GFX940-NEXT: ;;#ASMSTART
1238 ; GFX940-NEXT: ; def v[0:1]
1239 ; GFX940-NEXT: ;;#ASMEND
1240 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1241 ; GFX940-NEXT: ;;#ASMSTART
1242 ; GFX940-NEXT: ; def v[2:3]
1243 ; GFX940-NEXT: ;;#ASMEND
1244 ; GFX940-NEXT: s_nop 0
1245 ; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2
1246 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1247 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1248 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1249 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1250 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1251 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1252 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1253 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1254 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 0>
1255 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1259 define void @v_shuffle_v3bf16_v3bf16__5_u_0(ptr addrspace(1) inreg %ptr) {
1260 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_0:
1262 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1263 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1264 ; GFX900-NEXT: ;;#ASMSTART
1265 ; GFX900-NEXT: ; def v[0:1]
1266 ; GFX900-NEXT: ;;#ASMEND
1267 ; GFX900-NEXT: ;;#ASMSTART
1268 ; GFX900-NEXT: ; def v[1:2]
1269 ; GFX900-NEXT: ;;#ASMEND
1270 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1271 ; GFX900-NEXT: global_store_dword v3, v2, s[16:17]
1272 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1273 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1275 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_0:
1277 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1278 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1279 ; GFX90A-NEXT: ;;#ASMSTART
1280 ; GFX90A-NEXT: ; def v[0:1]
1281 ; GFX90A-NEXT: ;;#ASMEND
1282 ; GFX90A-NEXT: ;;#ASMSTART
1283 ; GFX90A-NEXT: ; def v[2:3]
1284 ; GFX90A-NEXT: ;;#ASMEND
1285 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1286 ; GFX90A-NEXT: global_store_dword v4, v3, s[16:17]
1287 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1288 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1290 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_0:
1292 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1293 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1294 ; GFX940-NEXT: ;;#ASMSTART
1295 ; GFX940-NEXT: ; def v[0:1]
1296 ; GFX940-NEXT: ;;#ASMEND
1297 ; GFX940-NEXT: ;;#ASMSTART
1298 ; GFX940-NEXT: ; def v[2:3]
1299 ; GFX940-NEXT: ;;#ASMEND
1300 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1301 ; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1
1302 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1303 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1304 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1305 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1306 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1307 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1308 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 0>
1309 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1313 define void @v_shuffle_v3bf16_v3bf16__5_1_0(ptr addrspace(1) inreg %ptr) {
1314 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_1_0:
1316 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1317 ; GFX900-NEXT: ;;#ASMSTART
1318 ; GFX900-NEXT: ; def v[0:1]
1319 ; GFX900-NEXT: ;;#ASMEND
1320 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1321 ; GFX900-NEXT: ;;#ASMSTART
1322 ; GFX900-NEXT: ; def v[1:2]
1323 ; GFX900-NEXT: ;;#ASMEND
1324 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1325 ; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0
1326 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1327 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1328 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1329 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1331 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_1_0:
1333 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1335 ; GFX90A-NEXT: ;;#ASMSTART
1336 ; GFX90A-NEXT: ; def v[0:1]
1337 ; GFX90A-NEXT: ;;#ASMEND
1338 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1339 ; GFX90A-NEXT: ;;#ASMSTART
1340 ; GFX90A-NEXT: ; def v[2:3]
1341 ; GFX90A-NEXT: ;;#ASMEND
1342 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0
1343 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1344 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1345 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1346 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1348 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_0:
1350 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1351 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1352 ; GFX940-NEXT: ;;#ASMSTART
1353 ; GFX940-NEXT: ; def v[0:1]
1354 ; GFX940-NEXT: ;;#ASMEND
1355 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1356 ; GFX940-NEXT: ;;#ASMSTART
1357 ; GFX940-NEXT: ; def v[2:3]
1358 ; GFX940-NEXT: ;;#ASMEND
1359 ; GFX940-NEXT: s_nop 0
1360 ; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0
1361 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1362 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1363 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1364 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1365 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1366 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1367 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1368 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1369 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 0>
1370 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1374 define void @v_shuffle_v3bf16_v3bf16__5_2_0(ptr addrspace(1) inreg %ptr) {
1375 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_2_0:
1377 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1378 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
1379 ; GFX900-NEXT: ;;#ASMSTART
1380 ; GFX900-NEXT: ; def v[0:1]
1381 ; GFX900-NEXT: ;;#ASMEND
1382 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1383 ; GFX900-NEXT: ;;#ASMSTART
1384 ; GFX900-NEXT: ; def v[2:3]
1385 ; GFX900-NEXT: ;;#ASMEND
1386 ; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4
1387 ; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4
1388 ; GFX900-NEXT: global_store_dword v4, v1, s[16:17]
1389 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1390 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1392 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_2_0:
1394 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1396 ; GFX90A-NEXT: ;;#ASMSTART
1397 ; GFX90A-NEXT: ; def v[0:1]
1398 ; GFX90A-NEXT: ;;#ASMEND
1399 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1400 ; GFX90A-NEXT: ;;#ASMSTART
1401 ; GFX90A-NEXT: ; def v[2:3]
1402 ; GFX90A-NEXT: ;;#ASMEND
1403 ; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4
1404 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1405 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1406 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1407 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1409 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_0:
1411 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1412 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1413 ; GFX940-NEXT: ;;#ASMSTART
1414 ; GFX940-NEXT: ; def v[0:1]
1415 ; GFX940-NEXT: ;;#ASMEND
1416 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1417 ; GFX940-NEXT: ;;#ASMSTART
1418 ; GFX940-NEXT: ; def v[2:3]
1419 ; GFX940-NEXT: ;;#ASMEND
1420 ; GFX940-NEXT: s_nop 0
1421 ; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2
1422 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1423 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1424 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1425 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1426 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1427 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1428 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1429 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1430 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 0>
1431 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1435 define void @v_shuffle_v3bf16_v3bf16__5_3_0(ptr addrspace(1) inreg %ptr) {
1436 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_3_0:
1438 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439 ; GFX900-NEXT: ;;#ASMSTART
1440 ; GFX900-NEXT: ; def v[0:1]
1441 ; GFX900-NEXT: ;;#ASMEND
1442 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1443 ; GFX900-NEXT: ;;#ASMSTART
1444 ; GFX900-NEXT: ; def v[1:2]
1445 ; GFX900-NEXT: ;;#ASMEND
1446 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1447 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
1448 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1449 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1450 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1451 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1453 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_3_0:
1455 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1456 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1457 ; GFX90A-NEXT: ;;#ASMSTART
1458 ; GFX90A-NEXT: ; def v[0:1]
1459 ; GFX90A-NEXT: ;;#ASMEND
1460 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1461 ; GFX90A-NEXT: ;;#ASMSTART
1462 ; GFX90A-NEXT: ; def v[2:3]
1463 ; GFX90A-NEXT: ;;#ASMEND
1464 ; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4
1465 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1466 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1467 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1468 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1470 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_0:
1472 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1474 ; GFX940-NEXT: ;;#ASMSTART
1475 ; GFX940-NEXT: ; def v[0:1]
1476 ; GFX940-NEXT: ;;#ASMEND
1477 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1478 ; GFX940-NEXT: ;;#ASMSTART
1479 ; GFX940-NEXT: ; def v[2:3]
1480 ; GFX940-NEXT: ;;#ASMEND
1481 ; GFX940-NEXT: s_nop 0
1482 ; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2
1483 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1484 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1485 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1486 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1487 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1488 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1489 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1490 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1491 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 0>
1492 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1496 define void @v_shuffle_v3bf16_v3bf16__5_4_0(ptr addrspace(1) inreg %ptr) {
1497 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_4_0:
1499 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1500 ; GFX900-NEXT: ;;#ASMSTART
1501 ; GFX900-NEXT: ; def v[0:1]
1502 ; GFX900-NEXT: ;;#ASMEND
1503 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1504 ; GFX900-NEXT: ;;#ASMSTART
1505 ; GFX900-NEXT: ; def v[1:2]
1506 ; GFX900-NEXT: ;;#ASMEND
1507 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1508 ; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v1
1509 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1510 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1511 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1512 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1514 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_4_0:
1516 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1518 ; GFX90A-NEXT: ;;#ASMSTART
1519 ; GFX90A-NEXT: ; def v[0:1]
1520 ; GFX90A-NEXT: ;;#ASMEND
1521 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1522 ; GFX90A-NEXT: ;;#ASMSTART
1523 ; GFX90A-NEXT: ; def v[2:3]
1524 ; GFX90A-NEXT: ;;#ASMEND
1525 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v2
1526 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1527 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1528 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1529 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1531 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_0:
1533 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1534 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1535 ; GFX940-NEXT: ;;#ASMSTART
1536 ; GFX940-NEXT: ; def v[0:1]
1537 ; GFX940-NEXT: ;;#ASMEND
1538 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1539 ; GFX940-NEXT: ;;#ASMSTART
1540 ; GFX940-NEXT: ; def v[2:3]
1541 ; GFX940-NEXT: ;;#ASMEND
1542 ; GFX940-NEXT: s_nop 0
1543 ; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2
1544 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1545 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1546 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1547 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1548 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1549 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1550 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1551 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1552 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 0>
1553 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1557 define void @v_shuffle_v3bf16_v3bf16__u_1_1(ptr addrspace(1) inreg %ptr) {
1558 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__u_1_1:
1560 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1561 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1562 ; GFX900-NEXT: ;;#ASMSTART
1563 ; GFX900-NEXT: ; def v[0:1]
1564 ; GFX900-NEXT: ;;#ASMEND
1565 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1566 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1567 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1568 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1569 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1571 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__u_1_1:
1573 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1575 ; GFX90A-NEXT: ;;#ASMSTART
1576 ; GFX90A-NEXT: ; def v[0:1]
1577 ; GFX90A-NEXT: ;;#ASMEND
1578 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1579 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1580 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1581 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1582 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1584 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_1_1:
1586 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1587 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1588 ; GFX940-NEXT: ;;#ASMSTART
1589 ; GFX940-NEXT: ; def v[0:1]
1590 ; GFX940-NEXT: ;;#ASMEND
1591 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1592 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1593 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1594 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1595 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1596 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1597 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1598 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 1, i32 1>
1599 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1603 define void @v_shuffle_v3bf16_v3bf16__0_1_1(ptr addrspace(1) inreg %ptr) {
1604 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__0_1_1:
1606 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1607 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1608 ; GFX900-NEXT: ;;#ASMSTART
1609 ; GFX900-NEXT: ; def v[0:1]
1610 ; GFX900-NEXT: ;;#ASMEND
1611 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1612 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1613 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1614 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1615 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1617 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__0_1_1:
1619 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1621 ; GFX90A-NEXT: ;;#ASMSTART
1622 ; GFX90A-NEXT: ; def v[0:1]
1623 ; GFX90A-NEXT: ;;#ASMEND
1624 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1625 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1626 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1627 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1628 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1630 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_1_1:
1632 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1633 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1634 ; GFX940-NEXT: ;;#ASMSTART
1635 ; GFX940-NEXT: ; def v[0:1]
1636 ; GFX940-NEXT: ;;#ASMEND
1637 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1638 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1639 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1640 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1641 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1642 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1643 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1644 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 1>
1645 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1649 define void @v_shuffle_v3bf16_v3bf16__1_1_1(ptr addrspace(1) inreg %ptr) {
1650 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__1_1_1:
1652 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; GFX900-NEXT: ;;#ASMSTART
1654 ; GFX900-NEXT: ; def v[0:1]
1655 ; GFX900-NEXT: ;;#ASMEND
1656 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1657 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1658 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1659 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1660 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1661 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1662 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1663 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1665 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__1_1_1:
1667 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668 ; GFX90A-NEXT: ;;#ASMSTART
1669 ; GFX90A-NEXT: ; def v[0:1]
1670 ; GFX90A-NEXT: ;;#ASMEND
1671 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1672 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1673 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1674 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1675 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1676 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1677 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1678 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1680 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_1_1:
1682 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1683 ; GFX940-NEXT: ;;#ASMSTART
1684 ; GFX940-NEXT: ; def v[0:1]
1685 ; GFX940-NEXT: ;;#ASMEND
1686 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1687 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1688 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1689 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1690 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1691 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1692 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1693 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1694 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1695 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1696 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
1697 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1701 define void @v_shuffle_v3bf16_v3bf16__2_1_1(ptr addrspace(1) inreg %ptr) {
1702 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__2_1_1:
1704 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1705 ; GFX900-NEXT: ;;#ASMSTART
1706 ; GFX900-NEXT: ; def v[0:1]
1707 ; GFX900-NEXT: ;;#ASMEND
1708 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1709 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1710 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0
1711 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1712 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1713 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1714 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1715 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1717 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__2_1_1:
1719 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1720 ; GFX90A-NEXT: ;;#ASMSTART
1721 ; GFX90A-NEXT: ; def v[0:1]
1722 ; GFX90A-NEXT: ;;#ASMEND
1723 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1724 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1725 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0
1726 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1727 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1728 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1729 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1730 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1732 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_1_1:
1734 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735 ; GFX940-NEXT: ;;#ASMSTART
1736 ; GFX940-NEXT: ; def v[0:1]
1737 ; GFX940-NEXT: ;;#ASMEND
1738 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1739 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1740 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0
1741 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1742 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1743 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1744 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1745 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1746 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1747 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1748 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 1, i32 1>
1749 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1753 define void @v_shuffle_v3bf16_v3bf16__3_1_1(ptr addrspace(1) inreg %ptr) {
1754 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__3_1_1:
1756 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1757 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1758 ; GFX900-NEXT: ;;#ASMSTART
1759 ; GFX900-NEXT: ; def v[0:1]
1760 ; GFX900-NEXT: ;;#ASMEND
1761 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1762 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1763 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1764 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1765 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1767 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__3_1_1:
1769 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1770 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1771 ; GFX90A-NEXT: ;;#ASMSTART
1772 ; GFX90A-NEXT: ; def v[0:1]
1773 ; GFX90A-NEXT: ;;#ASMEND
1774 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1775 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1776 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1777 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1778 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1780 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_1_1:
1782 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1783 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1784 ; GFX940-NEXT: ;;#ASMSTART
1785 ; GFX940-NEXT: ; def v[0:1]
1786 ; GFX940-NEXT: ;;#ASMEND
1787 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1788 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1789 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1790 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1791 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1792 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1793 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1794 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 1, i32 1>
1795 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1799 define void @v_shuffle_v3bf16_v3bf16__4_1_1(ptr addrspace(1) inreg %ptr) {
1800 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__4_1_1:
1802 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1803 ; GFX900-NEXT: ;;#ASMSTART
1804 ; GFX900-NEXT: ; def v[0:1]
1805 ; GFX900-NEXT: ;;#ASMEND
1806 ; GFX900-NEXT: ;;#ASMSTART
1807 ; GFX900-NEXT: ; def v[1:2]
1808 ; GFX900-NEXT: ;;#ASMEND
1809 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1810 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1811 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
1812 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1813 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1814 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1815 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1816 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1818 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__4_1_1:
1820 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1821 ; GFX90A-NEXT: ;;#ASMSTART
1822 ; GFX90A-NEXT: ; def v[0:1]
1823 ; GFX90A-NEXT: ;;#ASMEND
1824 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1825 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1826 ; GFX90A-NEXT: ;;#ASMSTART
1827 ; GFX90A-NEXT: ; def v[2:3]
1828 ; GFX90A-NEXT: ;;#ASMEND
1829 ; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4
1830 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1831 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1832 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1833 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1834 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1836 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_1_1:
1838 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1839 ; GFX940-NEXT: ;;#ASMSTART
1840 ; GFX940-NEXT: ; def v[0:1]
1841 ; GFX940-NEXT: ;;#ASMEND
1842 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1843 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1844 ; GFX940-NEXT: ;;#ASMSTART
1845 ; GFX940-NEXT: ; def v[2:3]
1846 ; GFX940-NEXT: ;;#ASMEND
1847 ; GFX940-NEXT: s_nop 0
1848 ; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2
1849 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1850 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1851 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1852 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1853 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1854 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1855 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1856 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1857 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1858 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 1, i32 1>
1859 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1863 define void @v_shuffle_v3bf16_v3bf16__5_1_1(ptr addrspace(1) inreg %ptr) {
1864 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_1_1:
1866 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1867 ; GFX900-NEXT: ;;#ASMSTART
1868 ; GFX900-NEXT: ; def v[0:1]
1869 ; GFX900-NEXT: ;;#ASMEND
1870 ; GFX900-NEXT: ;;#ASMSTART
1871 ; GFX900-NEXT: ; def v[1:2]
1872 ; GFX900-NEXT: ;;#ASMEND
1873 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1874 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1875 ; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0
1876 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1877 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1878 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1879 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1880 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1882 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_1_1:
1884 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1885 ; GFX90A-NEXT: ;;#ASMSTART
1886 ; GFX90A-NEXT: ; def v[0:1]
1887 ; GFX90A-NEXT: ;;#ASMEND
1888 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1889 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1890 ; GFX90A-NEXT: ;;#ASMSTART
1891 ; GFX90A-NEXT: ; def v[2:3]
1892 ; GFX90A-NEXT: ;;#ASMEND
1893 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0
1894 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1895 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1896 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1897 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1898 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1900 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_1:
1902 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1903 ; GFX940-NEXT: ;;#ASMSTART
1904 ; GFX940-NEXT: ; def v[0:1]
1905 ; GFX940-NEXT: ;;#ASMEND
1906 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1907 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1908 ; GFX940-NEXT: ;;#ASMSTART
1909 ; GFX940-NEXT: ; def v[2:3]
1910 ; GFX940-NEXT: ;;#ASMEND
1911 ; GFX940-NEXT: s_nop 0
1912 ; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0
1913 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1914 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1915 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1916 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1917 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1918 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1919 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1920 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1921 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1922 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 1>
1923 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1927 define void @v_shuffle_v3bf16_v3bf16__5_u_1(ptr addrspace(1) inreg %ptr) {
1928 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1:
1930 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931 ; GFX900-NEXT: ;;#ASMSTART
1932 ; GFX900-NEXT: ; def v[0:1]
1933 ; GFX900-NEXT: ;;#ASMEND
1934 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1935 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1936 ; GFX900-NEXT: ;;#ASMSTART
1937 ; GFX900-NEXT: ; def v[1:2]
1938 ; GFX900-NEXT: ;;#ASMEND
1939 ; GFX900-NEXT: global_store_dword v3, v2, s[16:17]
1940 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1941 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1942 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1944 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1:
1946 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1947 ; GFX90A-NEXT: ;;#ASMSTART
1948 ; GFX90A-NEXT: ; def v[0:1]
1949 ; GFX90A-NEXT: ;;#ASMEND
1950 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1951 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1952 ; GFX90A-NEXT: ;;#ASMSTART
1953 ; GFX90A-NEXT: ; def v[2:3]
1954 ; GFX90A-NEXT: ;;#ASMEND
1955 ; GFX90A-NEXT: global_store_dword v4, v3, s[16:17]
1956 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1957 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1958 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1960 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1:
1962 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1963 ; GFX940-NEXT: ;;#ASMSTART
1964 ; GFX940-NEXT: ; def v[0:1]
1965 ; GFX940-NEXT: ;;#ASMEND
1966 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1967 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1968 ; GFX940-NEXT: ;;#ASMSTART
1969 ; GFX940-NEXT: ; def v[2:3]
1970 ; GFX940-NEXT: ;;#ASMEND
1971 ; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1
1972 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1973 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1974 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1975 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1976 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1977 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1978 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
1979 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 1>
1980 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1984 define void @v_shuffle_v3bf16_v3bf16__5_0_1(ptr addrspace(1) inreg %ptr) {
1985 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_0_1:
1987 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1988 ; GFX900-NEXT: ;;#ASMSTART
1989 ; GFX900-NEXT: ; def v[0:1]
1990 ; GFX900-NEXT: ;;#ASMEND
1991 ; GFX900-NEXT: ;;#ASMSTART
1992 ; GFX900-NEXT: ; def v[1:2]
1993 ; GFX900-NEXT: ;;#ASMEND
1994 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1995 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1996 ; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4
1997 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1998 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1999 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2000 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2001 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2003 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_0_1:
2005 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006 ; GFX90A-NEXT: ;;#ASMSTART
2007 ; GFX90A-NEXT: ; def v[0:1]
2008 ; GFX90A-NEXT: ;;#ASMEND
2009 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2010 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2011 ; GFX90A-NEXT: ;;#ASMSTART
2012 ; GFX90A-NEXT: ; def v[2:3]
2013 ; GFX90A-NEXT: ;;#ASMEND
2014 ; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4
2015 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2016 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
2017 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2018 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2019 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2021 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_1:
2023 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2024 ; GFX940-NEXT: ;;#ASMSTART
2025 ; GFX940-NEXT: ; def v[0:1]
2026 ; GFX940-NEXT: ;;#ASMEND
2027 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2028 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2029 ; GFX940-NEXT: ;;#ASMSTART
2030 ; GFX940-NEXT: ; def v[2:3]
2031 ; GFX940-NEXT: ;;#ASMEND
2032 ; GFX940-NEXT: s_nop 0
2033 ; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2
2034 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2035 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2036 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2037 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2038 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2039 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2040 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2041 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2042 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2043 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 1>
2044 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2048 define void @v_shuffle_v3bf16_v3bf16__5_2_1(ptr addrspace(1) inreg %ptr) {
2049 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_2_1:
2051 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052 ; GFX900-NEXT: ;;#ASMSTART
2053 ; GFX900-NEXT: ; def v[0:1]
2054 ; GFX900-NEXT: ;;#ASMEND
2055 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2056 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2057 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2058 ; GFX900-NEXT: ;;#ASMSTART
2059 ; GFX900-NEXT: ; def v[2:3]
2060 ; GFX900-NEXT: ;;#ASMEND
2061 ; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4
2062 ; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4
2063 ; GFX900-NEXT: global_store_dword v4, v1, s[16:17]
2064 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2065 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2067 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_2_1:
2069 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070 ; GFX90A-NEXT: ;;#ASMSTART
2071 ; GFX90A-NEXT: ; def v[0:1]
2072 ; GFX90A-NEXT: ;;#ASMEND
2073 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2074 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2075 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2076 ; GFX90A-NEXT: ;;#ASMSTART
2077 ; GFX90A-NEXT: ; def v[2:3]
2078 ; GFX90A-NEXT: ;;#ASMEND
2079 ; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4
2080 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
2081 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2082 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2083 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2085 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_1:
2087 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2088 ; GFX940-NEXT: ;;#ASMSTART
2089 ; GFX940-NEXT: ; def v[0:1]
2090 ; GFX940-NEXT: ;;#ASMEND
2091 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2092 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2093 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2094 ; GFX940-NEXT: ;;#ASMSTART
2095 ; GFX940-NEXT: ; def v[2:3]
2096 ; GFX940-NEXT: ;;#ASMEND
2097 ; GFX940-NEXT: s_nop 0
2098 ; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2
2099 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2100 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2101 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2102 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2103 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2104 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2105 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2106 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2107 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 1>
2108 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2112 define void @v_shuffle_v3bf16_v3bf16__5_3_1(ptr addrspace(1) inreg %ptr) {
2113 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_3_1:
2115 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2116 ; GFX900-NEXT: ;;#ASMSTART
2117 ; GFX900-NEXT: ; def v[0:1]
2118 ; GFX900-NEXT: ;;#ASMEND
2119 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2120 ; GFX900-NEXT: ;;#ASMSTART
2121 ; GFX900-NEXT: ; def v[1:2]
2122 ; GFX900-NEXT: ;;#ASMEND
2123 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2124 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
2125 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
2126 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2127 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2128 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2130 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_3_1:
2132 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2133 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2134 ; GFX90A-NEXT: ;;#ASMSTART
2135 ; GFX90A-NEXT: ; def v[0:1]
2136 ; GFX90A-NEXT: ;;#ASMEND
2137 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2138 ; GFX90A-NEXT: ;;#ASMSTART
2139 ; GFX90A-NEXT: ; def v[2:3]
2140 ; GFX90A-NEXT: ;;#ASMEND
2141 ; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4
2142 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2143 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2144 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2145 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2147 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_1:
2149 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2151 ; GFX940-NEXT: ;;#ASMSTART
2152 ; GFX940-NEXT: ; def v[0:1]
2153 ; GFX940-NEXT: ;;#ASMEND
2154 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2155 ; GFX940-NEXT: ;;#ASMSTART
2156 ; GFX940-NEXT: ; def v[2:3]
2157 ; GFX940-NEXT: ;;#ASMEND
2158 ; GFX940-NEXT: s_nop 0
2159 ; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2
2160 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2161 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2162 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2163 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2164 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2165 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2166 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2167 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2168 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 1>
2169 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2173 define void @v_shuffle_v3bf16_v3bf16__5_4_1(ptr addrspace(1) inreg %ptr) {
2174 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_4_1:
2176 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2177 ; GFX900-NEXT: ;;#ASMSTART
2178 ; GFX900-NEXT: ; def v[0:1]
2179 ; GFX900-NEXT: ;;#ASMEND
2180 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2181 ; GFX900-NEXT: ;;#ASMSTART
2182 ; GFX900-NEXT: ; def v[1:2]
2183 ; GFX900-NEXT: ;;#ASMEND
2184 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2185 ; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v1
2186 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
2187 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2188 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2189 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2191 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_4_1:
2193 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2194 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2195 ; GFX90A-NEXT: ;;#ASMSTART
2196 ; GFX90A-NEXT: ; def v[0:1]
2197 ; GFX90A-NEXT: ;;#ASMEND
2198 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2199 ; GFX90A-NEXT: ;;#ASMSTART
2200 ; GFX90A-NEXT: ; def v[2:3]
2201 ; GFX90A-NEXT: ;;#ASMEND
2202 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v2
2203 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2204 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2205 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2206 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2208 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_1:
2210 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2211 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2212 ; GFX940-NEXT: ;;#ASMSTART
2213 ; GFX940-NEXT: ; def v[0:1]
2214 ; GFX940-NEXT: ;;#ASMEND
2215 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2216 ; GFX940-NEXT: ;;#ASMSTART
2217 ; GFX940-NEXT: ; def v[2:3]
2218 ; GFX940-NEXT: ;;#ASMEND
2219 ; GFX940-NEXT: s_nop 0
2220 ; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2
2221 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2222 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2223 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2224 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2225 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2226 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2227 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2228 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2229 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 1>
2230 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2234 define void @v_shuffle_v3bf16_v3bf16__u_2_2(ptr addrspace(1) inreg %ptr) {
2235 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__u_2_2:
2237 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2238 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2239 ; GFX900-NEXT: ;;#ASMSTART
2240 ; GFX900-NEXT: ; def v[0:1]
2241 ; GFX900-NEXT: ;;#ASMEND
2242 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2243 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2244 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2245 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2246 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2248 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__u_2_2:
2250 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2251 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2252 ; GFX90A-NEXT: ;;#ASMSTART
2253 ; GFX90A-NEXT: ; def v[0:1]
2254 ; GFX90A-NEXT: ;;#ASMEND
2255 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2256 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2257 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2258 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2259 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2261 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_2_2:
2263 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2264 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2265 ; GFX940-NEXT: ;;#ASMSTART
2266 ; GFX940-NEXT: ; def v[0:1]
2267 ; GFX940-NEXT: ;;#ASMEND
2268 ; GFX940-NEXT: s_nop 0
2269 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2270 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2271 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2272 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2273 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2274 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2275 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2276 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 2, i32 2>
2277 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2281 define void @v_shuffle_v3bf16_v3bf16__0_2_2(ptr addrspace(1) inreg %ptr) {
2282 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__0_2_2:
2284 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2285 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2286 ; GFX900-NEXT: ;;#ASMSTART
2287 ; GFX900-NEXT: ; def v[0:1]
2288 ; GFX900-NEXT: ;;#ASMEND
2289 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2290 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
2291 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2292 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2293 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2294 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2296 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__0_2_2:
2298 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2299 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2300 ; GFX90A-NEXT: ;;#ASMSTART
2301 ; GFX90A-NEXT: ; def v[0:1]
2302 ; GFX90A-NEXT: ;;#ASMEND
2303 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2304 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
2305 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2306 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2307 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2308 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2310 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_2_2:
2312 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2314 ; GFX940-NEXT: ;;#ASMSTART
2315 ; GFX940-NEXT: ; def v[0:1]
2316 ; GFX940-NEXT: ;;#ASMEND
2317 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2318 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
2319 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2320 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2321 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2322 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2323 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2324 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2325 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 2, i32 2>
2326 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2330 define void @v_shuffle_v3bf16_v3bf16__1_2_2(ptr addrspace(1) inreg %ptr) {
2331 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__1_2_2:
2333 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2335 ; GFX900-NEXT: ;;#ASMSTART
2336 ; GFX900-NEXT: ; def v[0:1]
2337 ; GFX900-NEXT: ;;#ASMEND
2338 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16
2339 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2340 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2341 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2342 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2344 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__1_2_2:
2346 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2347 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2348 ; GFX90A-NEXT: ;;#ASMSTART
2349 ; GFX90A-NEXT: ; def v[0:1]
2350 ; GFX90A-NEXT: ;;#ASMEND
2351 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16
2352 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2353 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2354 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2355 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2357 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_2_2:
2359 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2360 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2361 ; GFX940-NEXT: ;;#ASMSTART
2362 ; GFX940-NEXT: ; def v[0:1]
2363 ; GFX940-NEXT: ;;#ASMEND
2364 ; GFX940-NEXT: s_nop 0
2365 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16
2366 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2367 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2368 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2369 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2370 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2371 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2372 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 2>
2373 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2377 define void @v_shuffle_v3bf16_v3bf16__2_2_2(ptr addrspace(1) inreg %ptr) {
2378 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__2_2_2:
2380 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2381 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2382 ; GFX900-NEXT: ;;#ASMSTART
2383 ; GFX900-NEXT: ; def v[0:1]
2384 ; GFX900-NEXT: ;;#ASMEND
2385 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2386 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
2387 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2388 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2389 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2390 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2392 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__2_2_2:
2394 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2395 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2396 ; GFX90A-NEXT: ;;#ASMSTART
2397 ; GFX90A-NEXT: ; def v[0:1]
2398 ; GFX90A-NEXT: ;;#ASMEND
2399 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2400 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
2401 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2402 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2403 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2404 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2406 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_2_2:
2408 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2409 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2410 ; GFX940-NEXT: ;;#ASMSTART
2411 ; GFX940-NEXT: ; def v[0:1]
2412 ; GFX940-NEXT: ;;#ASMEND
2413 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2414 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
2415 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2416 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2417 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2418 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2419 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2420 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2421 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
2422 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2426 define void @v_shuffle_v3bf16_v3bf16__3_2_2(ptr addrspace(1) inreg %ptr) {
2427 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__3_2_2:
2429 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2430 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2431 ; GFX900-NEXT: ;;#ASMSTART
2432 ; GFX900-NEXT: ; def v[0:1]
2433 ; GFX900-NEXT: ;;#ASMEND
2434 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2435 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2436 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2437 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2438 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2440 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__3_2_2:
2442 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2443 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2444 ; GFX90A-NEXT: ;;#ASMSTART
2445 ; GFX90A-NEXT: ; def v[0:1]
2446 ; GFX90A-NEXT: ;;#ASMEND
2447 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2448 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2449 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2450 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2451 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2453 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_2_2:
2455 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2457 ; GFX940-NEXT: ;;#ASMSTART
2458 ; GFX940-NEXT: ; def v[0:1]
2459 ; GFX940-NEXT: ;;#ASMEND
2460 ; GFX940-NEXT: s_nop 0
2461 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2462 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2463 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2464 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2465 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2466 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2467 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2468 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 2, i32 2>
2469 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2473 define void @v_shuffle_v3bf16_v3bf16__4_2_2(ptr addrspace(1) inreg %ptr) {
2474 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__4_2_2:
2476 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2477 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2478 ; GFX900-NEXT: ;;#ASMSTART
2479 ; GFX900-NEXT: ; def v[0:1]
2480 ; GFX900-NEXT: ;;#ASMEND
2481 ; GFX900-NEXT: ;;#ASMSTART
2482 ; GFX900-NEXT: ; def v[2:3]
2483 ; GFX900-NEXT: ;;#ASMEND
2484 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16
2485 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
2486 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2487 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2488 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2490 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__4_2_2:
2492 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2493 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2494 ; GFX90A-NEXT: ;;#ASMSTART
2495 ; GFX90A-NEXT: ; def v[0:1]
2496 ; GFX90A-NEXT: ;;#ASMEND
2497 ; GFX90A-NEXT: ;;#ASMSTART
2498 ; GFX90A-NEXT: ; def v[2:3]
2499 ; GFX90A-NEXT: ;;#ASMEND
2500 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16
2501 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
2502 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2503 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2504 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2506 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_2_2:
2508 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2509 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2510 ; GFX940-NEXT: ;;#ASMSTART
2511 ; GFX940-NEXT: ; def v[0:1]
2512 ; GFX940-NEXT: ;;#ASMEND
2513 ; GFX940-NEXT: ;;#ASMSTART
2514 ; GFX940-NEXT: ; def v[2:3]
2515 ; GFX940-NEXT: ;;#ASMEND
2516 ; GFX940-NEXT: s_nop 0
2517 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16
2518 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2519 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2520 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2521 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2522 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2523 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2524 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2525 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2526 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 2, i32 2>
2527 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2531 define void @v_shuffle_v3bf16_v3bf16__5_2_2(ptr addrspace(1) inreg %ptr) {
2532 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_2_2:
2534 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2535 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2536 ; GFX900-NEXT: ;;#ASMSTART
2537 ; GFX900-NEXT: ; def v[0:1]
2538 ; GFX900-NEXT: ;;#ASMEND
2539 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2540 ; GFX900-NEXT: ;;#ASMSTART
2541 ; GFX900-NEXT: ; def v[2:3]
2542 ; GFX900-NEXT: ;;#ASMEND
2543 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
2544 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
2545 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2546 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2547 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2549 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_2_2:
2551 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2552 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2553 ; GFX90A-NEXT: ;;#ASMSTART
2554 ; GFX90A-NEXT: ; def v[0:1]
2555 ; GFX90A-NEXT: ;;#ASMEND
2556 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2557 ; GFX90A-NEXT: ;;#ASMSTART
2558 ; GFX90A-NEXT: ; def v[2:3]
2559 ; GFX90A-NEXT: ;;#ASMEND
2560 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
2561 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
2562 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2563 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2564 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2566 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_2:
2568 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2569 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2570 ; GFX940-NEXT: ;;#ASMSTART
2571 ; GFX940-NEXT: ; def v[0:1]
2572 ; GFX940-NEXT: ;;#ASMEND
2573 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2574 ; GFX940-NEXT: ;;#ASMSTART
2575 ; GFX940-NEXT: ; def v[2:3]
2576 ; GFX940-NEXT: ;;#ASMEND
2577 ; GFX940-NEXT: s_nop 0
2578 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
2579 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2580 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2581 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2582 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2583 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2584 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2585 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2586 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2587 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 2>
2588 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2592 define void @v_shuffle_v3bf16_v3bf16__5_u_2(ptr addrspace(1) inreg %ptr) {
2593 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_2:
2595 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2596 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2597 ; GFX900-NEXT: ;;#ASMSTART
2598 ; GFX900-NEXT: ; def v[0:1]
2599 ; GFX900-NEXT: ;;#ASMEND
2600 ; GFX900-NEXT: ;;#ASMSTART
2601 ; GFX900-NEXT: ; def v[2:3]
2602 ; GFX900-NEXT: ;;#ASMEND
2603 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
2604 ; GFX900-NEXT: global_store_dword v4, v3, s[16:17]
2605 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2606 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2608 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_2:
2610 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2611 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2612 ; GFX90A-NEXT: ;;#ASMSTART
2613 ; GFX90A-NEXT: ; def v[0:1]
2614 ; GFX90A-NEXT: ;;#ASMEND
2615 ; GFX90A-NEXT: ;;#ASMSTART
2616 ; GFX90A-NEXT: ; def v[2:3]
2617 ; GFX90A-NEXT: ;;#ASMEND
2618 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
2619 ; GFX90A-NEXT: global_store_dword v4, v3, s[16:17]
2620 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2621 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2623 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_2:
2625 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2626 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2627 ; GFX940-NEXT: ;;#ASMSTART
2628 ; GFX940-NEXT: ; def v[0:1]
2629 ; GFX940-NEXT: ;;#ASMEND
2630 ; GFX940-NEXT: ;;#ASMSTART
2631 ; GFX940-NEXT: ; def v[2:3]
2632 ; GFX940-NEXT: ;;#ASMEND
2633 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2634 ; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1
2635 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2636 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2637 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2638 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2639 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2640 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2641 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 2>
2642 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2646 define void @v_shuffle_v3bf16_v3bf16__5_0_2(ptr addrspace(1) inreg %ptr) {
2647 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_0_2:
2649 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2650 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2651 ; GFX900-NEXT: ;;#ASMSTART
2652 ; GFX900-NEXT: ; def v[0:1]
2653 ; GFX900-NEXT: ;;#ASMEND
2654 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2655 ; GFX900-NEXT: ;;#ASMSTART
2656 ; GFX900-NEXT: ; def v[2:3]
2657 ; GFX900-NEXT: ;;#ASMEND
2658 ; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4
2659 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
2660 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2661 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2662 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2664 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_0_2:
2666 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2667 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2668 ; GFX90A-NEXT: ;;#ASMSTART
2669 ; GFX90A-NEXT: ; def v[0:1]
2670 ; GFX90A-NEXT: ;;#ASMEND
2671 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2672 ; GFX90A-NEXT: ;;#ASMSTART
2673 ; GFX90A-NEXT: ; def v[2:3]
2674 ; GFX90A-NEXT: ;;#ASMEND
2675 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
2676 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
2677 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2678 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2679 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2681 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_2:
2683 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2684 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2685 ; GFX940-NEXT: ;;#ASMSTART
2686 ; GFX940-NEXT: ; def v[0:1]
2687 ; GFX940-NEXT: ;;#ASMEND
2688 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2689 ; GFX940-NEXT: ;;#ASMSTART
2690 ; GFX940-NEXT: ; def v[2:3]
2691 ; GFX940-NEXT: ;;#ASMEND
2692 ; GFX940-NEXT: s_nop 0
2693 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
2694 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2695 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2696 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2697 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2698 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2699 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2700 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2701 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2702 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 2>
2703 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2707 define void @v_shuffle_v3bf16_v3bf16__5_1_2(ptr addrspace(1) inreg %ptr) {
2708 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_1_2:
2710 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2711 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2712 ; GFX900-NEXT: ;;#ASMSTART
2713 ; GFX900-NEXT: ; def v[0:1]
2714 ; GFX900-NEXT: ;;#ASMEND
2715 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2716 ; GFX900-NEXT: ;;#ASMSTART
2717 ; GFX900-NEXT: ; def v[2:3]
2718 ; GFX900-NEXT: ;;#ASMEND
2719 ; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v0
2720 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
2721 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2722 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2723 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2725 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_1_2:
2727 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2728 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2729 ; GFX90A-NEXT: ;;#ASMSTART
2730 ; GFX90A-NEXT: ; def v[0:1]
2731 ; GFX90A-NEXT: ;;#ASMEND
2732 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2733 ; GFX90A-NEXT: ;;#ASMSTART
2734 ; GFX90A-NEXT: ; def v[2:3]
2735 ; GFX90A-NEXT: ;;#ASMEND
2736 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0
2737 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
2738 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2739 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2740 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2742 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_2:
2744 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2745 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2746 ; GFX940-NEXT: ;;#ASMSTART
2747 ; GFX940-NEXT: ; def v[0:1]
2748 ; GFX940-NEXT: ;;#ASMEND
2749 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2750 ; GFX940-NEXT: ;;#ASMSTART
2751 ; GFX940-NEXT: ; def v[2:3]
2752 ; GFX940-NEXT: ;;#ASMEND
2753 ; GFX940-NEXT: s_nop 0
2754 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0
2755 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2756 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2757 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2758 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2759 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2760 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2761 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2762 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2763 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 2>
2764 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2768 define void @v_shuffle_v3bf16_v3bf16__5_3_2(ptr addrspace(1) inreg %ptr) {
2769 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_3_2:
2771 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2772 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2773 ; GFX900-NEXT: ;;#ASMSTART
2774 ; GFX900-NEXT: ; def v[0:1]
2775 ; GFX900-NEXT: ;;#ASMEND
2776 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2777 ; GFX900-NEXT: ;;#ASMSTART
2778 ; GFX900-NEXT: ; def v[2:3]
2779 ; GFX900-NEXT: ;;#ASMEND
2780 ; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4
2781 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
2782 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2783 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2784 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2786 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_3_2:
2788 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2789 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2790 ; GFX90A-NEXT: ;;#ASMSTART
2791 ; GFX90A-NEXT: ; def v[0:1]
2792 ; GFX90A-NEXT: ;;#ASMEND
2793 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2794 ; GFX90A-NEXT: ;;#ASMSTART
2795 ; GFX90A-NEXT: ; def v[2:3]
2796 ; GFX90A-NEXT: ;;#ASMEND
2797 ; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4
2798 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
2799 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2800 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2801 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2803 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_2:
2805 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2806 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2807 ; GFX940-NEXT: ;;#ASMSTART
2808 ; GFX940-NEXT: ; def v[0:1]
2809 ; GFX940-NEXT: ;;#ASMEND
2810 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2811 ; GFX940-NEXT: ;;#ASMSTART
2812 ; GFX940-NEXT: ; def v[2:3]
2813 ; GFX940-NEXT: ;;#ASMEND
2814 ; GFX940-NEXT: s_nop 0
2815 ; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2
2816 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2817 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2818 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2819 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2820 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2821 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2822 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2823 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2824 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 2>
2825 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2829 define void @v_shuffle_v3bf16_v3bf16__5_4_2(ptr addrspace(1) inreg %ptr) {
2830 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_4_2:
2832 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2833 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2834 ; GFX900-NEXT: ;;#ASMSTART
2835 ; GFX900-NEXT: ; def v[0:1]
2836 ; GFX900-NEXT: ;;#ASMEND
2837 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2838 ; GFX900-NEXT: ;;#ASMSTART
2839 ; GFX900-NEXT: ; def v[2:3]
2840 ; GFX900-NEXT: ;;#ASMEND
2841 ; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v2
2842 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
2843 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2844 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2845 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2847 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_4_2:
2849 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2850 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2851 ; GFX90A-NEXT: ;;#ASMSTART
2852 ; GFX90A-NEXT: ; def v[0:1]
2853 ; GFX90A-NEXT: ;;#ASMEND
2854 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2855 ; GFX90A-NEXT: ;;#ASMSTART
2856 ; GFX90A-NEXT: ; def v[2:3]
2857 ; GFX90A-NEXT: ;;#ASMEND
2858 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v2
2859 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
2860 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2861 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2862 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2864 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_2:
2866 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2867 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2868 ; GFX940-NEXT: ;;#ASMSTART
2869 ; GFX940-NEXT: ; def v[0:1]
2870 ; GFX940-NEXT: ;;#ASMEND
2871 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2872 ; GFX940-NEXT: ;;#ASMSTART
2873 ; GFX940-NEXT: ; def v[2:3]
2874 ; GFX940-NEXT: ;;#ASMEND
2875 ; GFX940-NEXT: s_nop 0
2876 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2
2877 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2878 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2879 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2880 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2881 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2882 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2883 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2884 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2885 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 2>
2886 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2890 define void @v_shuffle_v3bf16_v3bf16__u_3_3(ptr addrspace(1) inreg %ptr) {
2891 ; GFX9-LABEL: v_shuffle_v3bf16_v3bf16__u_3_3:
2893 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2894 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2895 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2896 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2897 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 3, i32 3>
2898 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2902 define void @v_shuffle_v3bf16_v3bf16__0_3_3(ptr addrspace(1) inreg %ptr) {
2903 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__0_3_3:
2905 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2906 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2907 ; GFX900-NEXT: ;;#ASMSTART
2908 ; GFX900-NEXT: ; def v[0:1]
2909 ; GFX900-NEXT: ;;#ASMEND
2910 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2911 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2912 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2913 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2915 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__0_3_3:
2917 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2918 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2919 ; GFX90A-NEXT: ;;#ASMSTART
2920 ; GFX90A-NEXT: ; def v[0:1]
2921 ; GFX90A-NEXT: ;;#ASMEND
2922 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2923 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2924 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2925 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2927 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_3_3:
2929 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2930 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2931 ; GFX940-NEXT: ;;#ASMSTART
2932 ; GFX940-NEXT: ; def v[0:1]
2933 ; GFX940-NEXT: ;;#ASMEND
2934 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2935 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2936 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2937 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2938 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2939 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2940 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 3, i32 3>
2941 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2945 define void @v_shuffle_v3bf16_v3bf16__1_3_3(ptr addrspace(1) inreg %ptr) {
2946 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__1_3_3:
2948 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2949 ; GFX900-NEXT: ;;#ASMSTART
2950 ; GFX900-NEXT: ; def v[0:1]
2951 ; GFX900-NEXT: ;;#ASMEND
2952 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2953 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
2954 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2955 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2956 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2958 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__1_3_3:
2960 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2961 ; GFX90A-NEXT: ;;#ASMSTART
2962 ; GFX90A-NEXT: ; def v[0:1]
2963 ; GFX90A-NEXT: ;;#ASMEND
2964 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2965 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
2966 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2967 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2968 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2970 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_3_3:
2972 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2973 ; GFX940-NEXT: ;;#ASMSTART
2974 ; GFX940-NEXT: ; def v[0:1]
2975 ; GFX940-NEXT: ;;#ASMEND
2976 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2977 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
2978 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2979 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2980 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2981 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2982 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
2983 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 3, i32 3>
2984 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2988 define void @v_shuffle_v3bf16_v3bf16__2_3_3(ptr addrspace(1) inreg %ptr) {
2989 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__2_3_3:
2991 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2992 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2993 ; GFX900-NEXT: ;;#ASMSTART
2994 ; GFX900-NEXT: ; def v[0:1]
2995 ; GFX900-NEXT: ;;#ASMEND
2996 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
2997 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2998 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3000 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__2_3_3:
3002 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3003 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3004 ; GFX90A-NEXT: ;;#ASMSTART
3005 ; GFX90A-NEXT: ; def v[0:1]
3006 ; GFX90A-NEXT: ;;#ASMEND
3007 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3008 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3009 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3011 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_3_3:
3013 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3014 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3015 ; GFX940-NEXT: ;;#ASMSTART
3016 ; GFX940-NEXT: ; def v[0:1]
3017 ; GFX940-NEXT: ;;#ASMEND
3018 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3019 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3020 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3021 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3022 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3023 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 3, i32 3>
3024 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3028 define void @v_shuffle_v3bf16_v3bf16__3_3_3(ptr addrspace(1) inreg %ptr) {
3029 ; GFX9-LABEL: v_shuffle_v3bf16_v3bf16__3_3_3:
3031 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3032 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3033 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3034 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3035 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 3, i32 3>
3036 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3040 define void @v_shuffle_v3bf16_v3bf16__4_3_3(ptr addrspace(1) inreg %ptr) {
3041 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__4_3_3:
3043 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3044 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3045 ; GFX900-NEXT: ;;#ASMSTART
3046 ; GFX900-NEXT: ; def v[0:1]
3047 ; GFX900-NEXT: ;;#ASMEND
3048 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16
3049 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3050 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3051 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3052 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3054 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__4_3_3:
3056 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3057 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3058 ; GFX90A-NEXT: ;;#ASMSTART
3059 ; GFX90A-NEXT: ; def v[0:1]
3060 ; GFX90A-NEXT: ;;#ASMEND
3061 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16
3062 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3063 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3064 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3065 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3067 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_3_3:
3069 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3070 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3071 ; GFX940-NEXT: ;;#ASMSTART
3072 ; GFX940-NEXT: ; def v[0:1]
3073 ; GFX940-NEXT: ;;#ASMEND
3074 ; GFX940-NEXT: s_nop 0
3075 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16
3076 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3077 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3078 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3079 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3080 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3081 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3082 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3083 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3084 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 3, i32 3>
3085 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3089 define void @v_shuffle_v3bf16_v3bf16__5_3_3(ptr addrspace(1) inreg %ptr) {
3090 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_3_3:
3092 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3093 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3094 ; GFX900-NEXT: ;;#ASMSTART
3095 ; GFX900-NEXT: ; def v[0:1]
3096 ; GFX900-NEXT: ;;#ASMEND
3097 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3098 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
3099 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3100 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3101 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3102 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3104 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_3_3:
3106 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3107 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3108 ; GFX90A-NEXT: ;;#ASMSTART
3109 ; GFX90A-NEXT: ; def v[0:1]
3110 ; GFX90A-NEXT: ;;#ASMEND
3111 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3112 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
3113 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3114 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3115 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3116 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3118 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_3:
3120 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3121 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3122 ; GFX940-NEXT: ;;#ASMSTART
3123 ; GFX940-NEXT: ; def v[0:1]
3124 ; GFX940-NEXT: ;;#ASMEND
3125 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3126 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
3127 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3128 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3129 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3130 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3131 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3132 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3133 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3134 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3135 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 3>
3136 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3140 define void @v_shuffle_v3bf16_v3bf16__5_u_3(ptr addrspace(1) inreg %ptr) {
3141 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_3:
3143 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3144 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3145 ; GFX900-NEXT: ;;#ASMSTART
3146 ; GFX900-NEXT: ; def v[0:1]
3147 ; GFX900-NEXT: ;;#ASMEND
3148 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3149 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3150 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3151 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3153 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_3:
3155 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3156 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3157 ; GFX90A-NEXT: ;;#ASMSTART
3158 ; GFX90A-NEXT: ; def v[0:1]
3159 ; GFX90A-NEXT: ;;#ASMEND
3160 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3161 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3162 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3163 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3165 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_3:
3167 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3168 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3169 ; GFX940-NEXT: ;;#ASMSTART
3170 ; GFX940-NEXT: ; def v[0:1]
3171 ; GFX940-NEXT: ;;#ASMEND
3172 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3173 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3174 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3175 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3176 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3177 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3178 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3179 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3180 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 3>
3181 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3185 define void @v_shuffle_v3bf16_v3bf16__5_0_3(ptr addrspace(1) inreg %ptr) {
3186 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_0_3:
3188 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3189 ; GFX900-NEXT: ;;#ASMSTART
3190 ; GFX900-NEXT: ; def v[0:1]
3191 ; GFX900-NEXT: ;;#ASMEND
3192 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3193 ; GFX900-NEXT: ;;#ASMSTART
3194 ; GFX900-NEXT: ; def v[1:2]
3195 ; GFX900-NEXT: ;;#ASMEND
3196 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3197 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
3198 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
3199 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3200 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3201 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3203 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_0_3:
3205 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3206 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3207 ; GFX90A-NEXT: ;;#ASMSTART
3208 ; GFX90A-NEXT: ; def v[0:1]
3209 ; GFX90A-NEXT: ;;#ASMEND
3210 ; GFX90A-NEXT: ;;#ASMSTART
3211 ; GFX90A-NEXT: ; def v[2:3]
3212 ; GFX90A-NEXT: ;;#ASMEND
3213 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3214 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
3215 ; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4
3216 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3217 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3218 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3220 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_3:
3222 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3223 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3224 ; GFX940-NEXT: ;;#ASMSTART
3225 ; GFX940-NEXT: ; def v[0:1]
3226 ; GFX940-NEXT: ;;#ASMEND
3227 ; GFX940-NEXT: ;;#ASMSTART
3228 ; GFX940-NEXT: ; def v[2:3]
3229 ; GFX940-NEXT: ;;#ASMEND
3230 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3231 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
3232 ; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
3233 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3234 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3235 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3236 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3237 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3238 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3239 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3240 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 3>
3241 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3245 define void @v_shuffle_v3bf16_v3bf16__5_1_3(ptr addrspace(1) inreg %ptr) {
3246 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_1_3:
3248 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249 ; GFX900-NEXT: ;;#ASMSTART
3250 ; GFX900-NEXT: ; def v[0:1]
3251 ; GFX900-NEXT: ;;#ASMEND
3252 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3253 ; GFX900-NEXT: ;;#ASMSTART
3254 ; GFX900-NEXT: ; def v[1:2]
3255 ; GFX900-NEXT: ;;#ASMEND
3256 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3257 ; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
3258 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
3259 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3260 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3261 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3263 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_1_3:
3265 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3266 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3267 ; GFX90A-NEXT: ;;#ASMSTART
3268 ; GFX90A-NEXT: ; def v[0:1]
3269 ; GFX90A-NEXT: ;;#ASMEND
3270 ; GFX90A-NEXT: ;;#ASMSTART
3271 ; GFX90A-NEXT: ; def v[2:3]
3272 ; GFX90A-NEXT: ;;#ASMEND
3273 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3274 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0
3275 ; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4
3276 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3277 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3278 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3280 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_3:
3282 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3283 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3284 ; GFX940-NEXT: ;;#ASMSTART
3285 ; GFX940-NEXT: ; def v[0:1]
3286 ; GFX940-NEXT: ;;#ASMEND
3287 ; GFX940-NEXT: ;;#ASMSTART
3288 ; GFX940-NEXT: ; def v[2:3]
3289 ; GFX940-NEXT: ;;#ASMEND
3290 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3291 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0
3292 ; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
3293 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3294 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3295 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3296 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3297 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3298 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3299 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3300 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 3>
3301 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3305 define void @v_shuffle_v3bf16_v3bf16__5_2_3(ptr addrspace(1) inreg %ptr) {
3306 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_2_3:
3308 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3309 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3310 ; GFX900-NEXT: ;;#ASMSTART
3311 ; GFX900-NEXT: ; def v[0:1]
3312 ; GFX900-NEXT: ;;#ASMEND
3313 ; GFX900-NEXT: ;;#ASMSTART
3314 ; GFX900-NEXT: ; def v[2:3]
3315 ; GFX900-NEXT: ;;#ASMEND
3316 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3317 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
3318 ; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4
3319 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3320 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3321 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3323 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_2_3:
3325 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3326 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3327 ; GFX90A-NEXT: ;;#ASMSTART
3328 ; GFX90A-NEXT: ; def v[0:1]
3329 ; GFX90A-NEXT: ;;#ASMEND
3330 ; GFX90A-NEXT: ;;#ASMSTART
3331 ; GFX90A-NEXT: ; def v[2:3]
3332 ; GFX90A-NEXT: ;;#ASMEND
3333 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3334 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
3335 ; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4
3336 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3337 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3338 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3340 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_3:
3342 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3343 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3344 ; GFX940-NEXT: ;;#ASMSTART
3345 ; GFX940-NEXT: ; def v[0:1]
3346 ; GFX940-NEXT: ;;#ASMEND
3347 ; GFX940-NEXT: ;;#ASMSTART
3348 ; GFX940-NEXT: ; def v[2:3]
3349 ; GFX940-NEXT: ;;#ASMEND
3350 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3351 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
3352 ; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
3353 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3354 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3355 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3356 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3357 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3358 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3359 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3360 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 3>
3361 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3365 define void @v_shuffle_v3bf16_v3bf16__5_4_3(ptr addrspace(1) inreg %ptr) {
3366 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_4_3:
3368 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3369 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3370 ; GFX900-NEXT: ;;#ASMSTART
3371 ; GFX900-NEXT: ; def v[0:1]
3372 ; GFX900-NEXT: ;;#ASMEND
3373 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3374 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0
3375 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3376 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3377 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3378 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3380 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_4_3:
3382 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3383 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3384 ; GFX90A-NEXT: ;;#ASMSTART
3385 ; GFX90A-NEXT: ; def v[0:1]
3386 ; GFX90A-NEXT: ;;#ASMEND
3387 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3388 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0
3389 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3390 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3391 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3392 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3394 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_3:
3396 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3397 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3398 ; GFX940-NEXT: ;;#ASMSTART
3399 ; GFX940-NEXT: ; def v[0:1]
3400 ; GFX940-NEXT: ;;#ASMEND
3401 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3402 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0
3403 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3404 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3405 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3406 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3407 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3408 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3409 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3410 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3411 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 3>
3412 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3416 define void @v_shuffle_v3bf16_v3bf16__u_4_4(ptr addrspace(1) inreg %ptr) {
3417 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__u_4_4:
3419 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3420 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3421 ; GFX900-NEXT: ;;#ASMSTART
3422 ; GFX900-NEXT: ; def v[0:1]
3423 ; GFX900-NEXT: ;;#ASMEND
3424 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3425 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3426 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3427 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3428 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3430 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__u_4_4:
3432 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3433 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3434 ; GFX90A-NEXT: ;;#ASMSTART
3435 ; GFX90A-NEXT: ; def v[0:1]
3436 ; GFX90A-NEXT: ;;#ASMEND
3437 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3438 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3439 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3440 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3441 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3443 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_4_4:
3445 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3446 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3447 ; GFX940-NEXT: ;;#ASMSTART
3448 ; GFX940-NEXT: ; def v[0:1]
3449 ; GFX940-NEXT: ;;#ASMEND
3450 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3451 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3452 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3453 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3454 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3455 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3456 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3457 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3458 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3459 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 poison, i32 4, i32 4>
3460 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3464 define void @v_shuffle_v3bf16_v3bf16__0_4_4(ptr addrspace(1) inreg %ptr) {
3465 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__0_4_4:
3467 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3468 ; GFX900-NEXT: ;;#ASMSTART
3469 ; GFX900-NEXT: ; def v[0:1]
3470 ; GFX900-NEXT: ;;#ASMEND
3471 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3472 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3473 ; GFX900-NEXT: ;;#ASMSTART
3474 ; GFX900-NEXT: ; def v[1:2]
3475 ; GFX900-NEXT: ;;#ASMEND
3476 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
3477 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3478 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1
3479 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
3480 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3481 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3483 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__0_4_4:
3485 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3486 ; GFX90A-NEXT: ;;#ASMSTART
3487 ; GFX90A-NEXT: ; def v[0:1]
3488 ; GFX90A-NEXT: ;;#ASMEND
3489 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3490 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3491 ; GFX90A-NEXT: ;;#ASMSTART
3492 ; GFX90A-NEXT: ; def v[2:3]
3493 ; GFX90A-NEXT: ;;#ASMEND
3494 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2
3495 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3496 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2
3497 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
3498 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3499 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3501 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_4_4:
3503 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3504 ; GFX940-NEXT: ;;#ASMSTART
3505 ; GFX940-NEXT: ; def v[0:1]
3506 ; GFX940-NEXT: ;;#ASMEND
3507 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3508 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3509 ; GFX940-NEXT: ;;#ASMSTART
3510 ; GFX940-NEXT: ; def v[2:3]
3511 ; GFX940-NEXT: ;;#ASMEND
3512 ; GFX940-NEXT: s_nop 0
3513 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2
3514 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3515 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2
3516 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
3517 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3518 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3519 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3520 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3521 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3522 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3523 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 0, i32 4, i32 4>
3524 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3528 define void @v_shuffle_v3bf16_v3bf16__1_4_4(ptr addrspace(1) inreg %ptr) {
3529 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__1_4_4:
3531 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3532 ; GFX900-NEXT: ;;#ASMSTART
3533 ; GFX900-NEXT: ; def v[0:1]
3534 ; GFX900-NEXT: ;;#ASMEND
3535 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3536 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3537 ; GFX900-NEXT: ;;#ASMSTART
3538 ; GFX900-NEXT: ; def v[1:2]
3539 ; GFX900-NEXT: ;;#ASMEND
3540 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
3541 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3542 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1
3543 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
3544 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3545 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3547 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__1_4_4:
3549 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3550 ; GFX90A-NEXT: ;;#ASMSTART
3551 ; GFX90A-NEXT: ; def v[0:1]
3552 ; GFX90A-NEXT: ;;#ASMEND
3553 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3554 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3555 ; GFX90A-NEXT: ;;#ASMSTART
3556 ; GFX90A-NEXT: ; def v[2:3]
3557 ; GFX90A-NEXT: ;;#ASMEND
3558 ; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4
3559 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3560 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2
3561 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
3562 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3563 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3565 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_4_4:
3567 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3568 ; GFX940-NEXT: ;;#ASMSTART
3569 ; GFX940-NEXT: ; def v[0:1]
3570 ; GFX940-NEXT: ;;#ASMEND
3571 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3572 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3573 ; GFX940-NEXT: ;;#ASMSTART
3574 ; GFX940-NEXT: ; def v[2:3]
3575 ; GFX940-NEXT: ;;#ASMEND
3576 ; GFX940-NEXT: s_nop 0
3577 ; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2
3578 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3579 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2
3580 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
3581 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3582 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3583 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3584 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3585 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3586 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3587 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 1, i32 4, i32 4>
3588 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3592 define void @v_shuffle_v3bf16_v3bf16__2_4_4(ptr addrspace(1) inreg %ptr) {
3593 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__2_4_4:
3595 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3596 ; GFX900-NEXT: ;;#ASMSTART
3597 ; GFX900-NEXT: ; def v[0:1]
3598 ; GFX900-NEXT: ;;#ASMEND
3599 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3600 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3601 ; GFX900-NEXT: ;;#ASMSTART
3602 ; GFX900-NEXT: ; def v[2:3]
3603 ; GFX900-NEXT: ;;#ASMEND
3604 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2
3605 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
3606 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3607 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3608 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3609 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3611 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__2_4_4:
3613 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3614 ; GFX90A-NEXT: ;;#ASMSTART
3615 ; GFX90A-NEXT: ; def v[0:1]
3616 ; GFX90A-NEXT: ;;#ASMEND
3617 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3618 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3619 ; GFX90A-NEXT: ;;#ASMSTART
3620 ; GFX90A-NEXT: ; def v[2:3]
3621 ; GFX90A-NEXT: ;;#ASMEND
3622 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2
3623 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
3624 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3625 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3626 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3627 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3629 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_4_4:
3631 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3632 ; GFX940-NEXT: ;;#ASMSTART
3633 ; GFX940-NEXT: ; def v[0:1]
3634 ; GFX940-NEXT: ;;#ASMEND
3635 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3636 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3637 ; GFX940-NEXT: ;;#ASMSTART
3638 ; GFX940-NEXT: ; def v[2:3]
3639 ; GFX940-NEXT: ;;#ASMEND
3640 ; GFX940-NEXT: s_nop 0
3641 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2
3642 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
3643 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3644 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3645 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3646 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3647 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3648 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3649 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3650 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3651 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 2, i32 4, i32 4>
3652 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3656 define void @v_shuffle_v3bf16_v3bf16__3_4_4(ptr addrspace(1) inreg %ptr) {
3657 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__3_4_4:
3659 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3660 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3661 ; GFX900-NEXT: ;;#ASMSTART
3662 ; GFX900-NEXT: ; def v[0:1]
3663 ; GFX900-NEXT: ;;#ASMEND
3664 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3665 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3666 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3667 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3668 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3670 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__3_4_4:
3672 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3673 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3674 ; GFX90A-NEXT: ;;#ASMSTART
3675 ; GFX90A-NEXT: ; def v[0:1]
3676 ; GFX90A-NEXT: ;;#ASMEND
3677 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3678 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3679 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3680 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3681 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3683 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_4_4:
3685 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3686 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3687 ; GFX940-NEXT: ;;#ASMSTART
3688 ; GFX940-NEXT: ; def v[0:1]
3689 ; GFX940-NEXT: ;;#ASMEND
3690 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3691 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3692 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3693 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3694 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3695 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3696 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3697 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3698 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3699 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 3, i32 4, i32 4>
3700 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3704 define void @v_shuffle_v3bf16_v3bf16__4_4_4(ptr addrspace(1) inreg %ptr) {
3705 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__4_4_4:
3707 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3708 ; GFX900-NEXT: ;;#ASMSTART
3709 ; GFX900-NEXT: ; def v[0:1]
3710 ; GFX900-NEXT: ;;#ASMEND
3711 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3712 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3713 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
3714 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3715 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3716 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3717 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3718 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3720 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__4_4_4:
3722 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3723 ; GFX90A-NEXT: ;;#ASMSTART
3724 ; GFX90A-NEXT: ; def v[0:1]
3725 ; GFX90A-NEXT: ;;#ASMEND
3726 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3727 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3728 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
3729 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3730 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3731 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3732 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3733 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3735 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_4_4:
3737 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3738 ; GFX940-NEXT: ;;#ASMSTART
3739 ; GFX940-NEXT: ; def v[0:1]
3740 ; GFX940-NEXT: ;;#ASMEND
3741 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3742 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3743 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
3744 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3745 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3746 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3747 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3748 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3749 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3750 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3751 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3752 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3753 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 4, i32 4>
3754 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3758 define void @v_shuffle_v3bf16_v3bf16__5_4_4(ptr addrspace(1) inreg %ptr) {
3759 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_4_4:
3761 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3762 ; GFX900-NEXT: ;;#ASMSTART
3763 ; GFX900-NEXT: ; def v[0:1]
3764 ; GFX900-NEXT: ;;#ASMEND
3765 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3766 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3767 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0
3768 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3769 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3770 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3771 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3772 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3774 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_4_4:
3776 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3777 ; GFX90A-NEXT: ;;#ASMSTART
3778 ; GFX90A-NEXT: ; def v[0:1]
3779 ; GFX90A-NEXT: ;;#ASMEND
3780 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3781 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3782 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0
3783 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3784 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3785 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3786 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3787 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3789 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_4:
3791 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3792 ; GFX940-NEXT: ;;#ASMSTART
3793 ; GFX940-NEXT: ; def v[0:1]
3794 ; GFX940-NEXT: ;;#ASMEND
3795 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3796 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3797 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0
3798 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3799 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3800 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3801 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3802 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3803 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3804 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3805 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3806 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3807 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 4>
3808 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3812 define void @v_shuffle_v3bf16_v3bf16__5_u_4(ptr addrspace(1) inreg %ptr) {
3813 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_4:
3815 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3816 ; GFX900-NEXT: ;;#ASMSTART
3817 ; GFX900-NEXT: ; def v[0:1]
3818 ; GFX900-NEXT: ;;#ASMEND
3819 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3820 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3821 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3822 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3823 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3824 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3826 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_4:
3828 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3829 ; GFX90A-NEXT: ;;#ASMSTART
3830 ; GFX90A-NEXT: ; def v[0:1]
3831 ; GFX90A-NEXT: ;;#ASMEND
3832 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3833 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3834 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3835 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3836 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3837 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3839 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_4:
3841 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3842 ; GFX940-NEXT: ;;#ASMSTART
3843 ; GFX940-NEXT: ; def v[0:1]
3844 ; GFX940-NEXT: ;;#ASMEND
3845 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3846 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
3847 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3848 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3849 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3850 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3851 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3852 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3853 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3854 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3855 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 4>
3856 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3860 define void @v_shuffle_v3bf16_v3bf16__5_0_4(ptr addrspace(1) inreg %ptr) {
3861 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_0_4:
3863 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3864 ; GFX900-NEXT: ;;#ASMSTART
3865 ; GFX900-NEXT: ; def v[0:1]
3866 ; GFX900-NEXT: ;;#ASMEND
3867 ; GFX900-NEXT: ;;#ASMSTART
3868 ; GFX900-NEXT: ; def v[1:2]
3869 ; GFX900-NEXT: ;;#ASMEND
3870 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3871 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3872 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3873 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
3874 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
3875 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3876 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3877 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3879 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_0_4:
3881 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3882 ; GFX90A-NEXT: ;;#ASMSTART
3883 ; GFX90A-NEXT: ; def v[0:1]
3884 ; GFX90A-NEXT: ;;#ASMEND
3885 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3886 ; GFX90A-NEXT: ;;#ASMSTART
3887 ; GFX90A-NEXT: ; def v[2:3]
3888 ; GFX90A-NEXT: ;;#ASMEND
3889 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3890 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
3891 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
3892 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3893 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3894 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3895 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3897 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_4:
3899 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3900 ; GFX940-NEXT: ;;#ASMSTART
3901 ; GFX940-NEXT: ; def v[0:1]
3902 ; GFX940-NEXT: ;;#ASMEND
3903 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3904 ; GFX940-NEXT: ;;#ASMSTART
3905 ; GFX940-NEXT: ; def v[2:3]
3906 ; GFX940-NEXT: ;;#ASMEND
3907 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3908 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
3909 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
3910 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3911 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3912 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3913 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3914 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3915 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3916 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3917 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3918 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 4>
3919 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3923 define void @v_shuffle_v3bf16_v3bf16__5_1_4(ptr addrspace(1) inreg %ptr) {
3924 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_1_4:
3926 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3927 ; GFX900-NEXT: ;;#ASMSTART
3928 ; GFX900-NEXT: ; def v[0:1]
3929 ; GFX900-NEXT: ;;#ASMEND
3930 ; GFX900-NEXT: ;;#ASMSTART
3931 ; GFX900-NEXT: ; def v[1:2]
3932 ; GFX900-NEXT: ;;#ASMEND
3933 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3934 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3935 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3936 ; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
3937 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
3938 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3939 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3940 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3942 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_1_4:
3944 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3945 ; GFX90A-NEXT: ;;#ASMSTART
3946 ; GFX90A-NEXT: ; def v[0:1]
3947 ; GFX90A-NEXT: ;;#ASMEND
3948 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3949 ; GFX90A-NEXT: ;;#ASMSTART
3950 ; GFX90A-NEXT: ; def v[2:3]
3951 ; GFX90A-NEXT: ;;#ASMEND
3952 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3953 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
3954 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0
3955 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3956 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3957 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3958 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3960 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_4:
3962 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3963 ; GFX940-NEXT: ;;#ASMSTART
3964 ; GFX940-NEXT: ; def v[0:1]
3965 ; GFX940-NEXT: ;;#ASMEND
3966 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3967 ; GFX940-NEXT: ;;#ASMSTART
3968 ; GFX940-NEXT: ; def v[2:3]
3969 ; GFX940-NEXT: ;;#ASMEND
3970 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3971 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
3972 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0
3973 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3974 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3975 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3976 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3977 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3978 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3979 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3980 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
3981 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 4>
3982 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3986 define void @v_shuffle_v3bf16_v3bf16__5_2_4(ptr addrspace(1) inreg %ptr) {
3987 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_2_4:
3989 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3990 ; GFX900-NEXT: ;;#ASMSTART
3991 ; GFX900-NEXT: ; def v[0:1]
3992 ; GFX900-NEXT: ;;#ASMEND
3993 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3994 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3995 ; GFX900-NEXT: ;;#ASMSTART
3996 ; GFX900-NEXT: ; def v[2:3]
3997 ; GFX900-NEXT: ;;#ASMEND
3998 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
3999 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
4000 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
4001 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4002 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4003 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4005 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_2_4:
4007 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4008 ; GFX90A-NEXT: ;;#ASMSTART
4009 ; GFX90A-NEXT: ; def v[0:1]
4010 ; GFX90A-NEXT: ;;#ASMEND
4011 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4012 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4013 ; GFX90A-NEXT: ;;#ASMSTART
4014 ; GFX90A-NEXT: ; def v[2:3]
4015 ; GFX90A-NEXT: ;;#ASMEND
4016 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
4017 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
4018 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
4019 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4020 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4021 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4023 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_4:
4025 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4026 ; GFX940-NEXT: ;;#ASMSTART
4027 ; GFX940-NEXT: ; def v[0:1]
4028 ; GFX940-NEXT: ;;#ASMEND
4029 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4030 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4031 ; GFX940-NEXT: ;;#ASMSTART
4032 ; GFX940-NEXT: ; def v[2:3]
4033 ; GFX940-NEXT: ;;#ASMEND
4034 ; GFX940-NEXT: s_nop 0
4035 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
4036 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
4037 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4038 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4039 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4040 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4041 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4042 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4043 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4044 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4045 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 4>
4046 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4050 define void @v_shuffle_v3bf16_v3bf16__5_3_4(ptr addrspace(1) inreg %ptr) {
4051 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_3_4:
4053 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4054 ; GFX900-NEXT: ;;#ASMSTART
4055 ; GFX900-NEXT: ; def v[0:1]
4056 ; GFX900-NEXT: ;;#ASMEND
4057 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4058 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4059 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
4060 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
4061 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
4062 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
4063 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4064 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4066 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_3_4:
4068 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4069 ; GFX90A-NEXT: ;;#ASMSTART
4070 ; GFX90A-NEXT: ; def v[0:1]
4071 ; GFX90A-NEXT: ;;#ASMEND
4072 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4073 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4074 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
4075 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
4076 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
4077 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
4078 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4079 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4081 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_4:
4083 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4084 ; GFX940-NEXT: ;;#ASMSTART
4085 ; GFX940-NEXT: ; def v[0:1]
4086 ; GFX940-NEXT: ;;#ASMEND
4087 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4088 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4089 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
4090 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
4091 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4092 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
4093 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4094 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4095 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4096 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4097 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4098 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4099 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 4>
4100 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4104 define void @v_shuffle_v3bf16_v3bf16__u_5_5(ptr addrspace(1) inreg %ptr) {
4105 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__u_5_5:
4107 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4108 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4109 ; GFX900-NEXT: ;;#ASMSTART
4110 ; GFX900-NEXT: ; def v[0:1]
4111 ; GFX900-NEXT: ;;#ASMEND
4112 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
4113 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
4114 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4115 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4116 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4118 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__u_5_5:
4120 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4121 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4122 ; GFX90A-NEXT: ;;#ASMSTART
4123 ; GFX90A-NEXT: ; def v[0:1]
4124 ; GFX90A-NEXT: ;;#ASMEND
4125 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
4126 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4127 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4128 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4129 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4131 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_5_5:
4133 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4134 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4135 ; GFX940-NEXT: ;;#ASMSTART
4136 ; GFX940-NEXT: ; def v[0:1]
4137 ; GFX940-NEXT: ;;#ASMEND
4138 ; GFX940-NEXT: s_nop 0
4139 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
4140 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4141 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4142 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4143 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4144 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4145 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4146 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4147 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4148 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 poison, i32 5, i32 5>
4149 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4153 define void @v_shuffle_v3bf16_v3bf16__0_5_5(ptr addrspace(1) inreg %ptr) {
4154 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__0_5_5:
4156 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4157 ; GFX900-NEXT: ;;#ASMSTART
4158 ; GFX900-NEXT: ; def v[0:1]
4159 ; GFX900-NEXT: ;;#ASMEND
4160 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
4161 ; GFX900-NEXT: ;;#ASMSTART
4162 ; GFX900-NEXT: ; def v[1:2]
4163 ; GFX900-NEXT: ;;#ASMEND
4164 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4165 ; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4
4166 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
4167 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
4168 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4169 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4171 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__0_5_5:
4173 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4174 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4175 ; GFX90A-NEXT: ;;#ASMSTART
4176 ; GFX90A-NEXT: ; def v[0:1]
4177 ; GFX90A-NEXT: ;;#ASMEND
4178 ; GFX90A-NEXT: ;;#ASMSTART
4179 ; GFX90A-NEXT: ; def v[2:3]
4180 ; GFX90A-NEXT: ;;#ASMEND
4181 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4182 ; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4
4183 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
4184 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4185 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4186 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4188 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_5_5:
4190 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4191 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4192 ; GFX940-NEXT: ;;#ASMSTART
4193 ; GFX940-NEXT: ; def v[0:1]
4194 ; GFX940-NEXT: ;;#ASMEND
4195 ; GFX940-NEXT: ;;#ASMSTART
4196 ; GFX940-NEXT: ; def v[2:3]
4197 ; GFX940-NEXT: ;;#ASMEND
4198 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4199 ; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2
4200 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4201 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4202 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4203 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4204 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4205 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4206 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4207 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4208 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 0, i32 5, i32 5>
4209 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4213 define void @v_shuffle_v3bf16_v3bf16__1_5_5(ptr addrspace(1) inreg %ptr) {
4214 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__1_5_5:
4216 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4217 ; GFX900-NEXT: ;;#ASMSTART
4218 ; GFX900-NEXT: ; def v[0:1]
4219 ; GFX900-NEXT: ;;#ASMEND
4220 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
4221 ; GFX900-NEXT: ;;#ASMSTART
4222 ; GFX900-NEXT: ; def v[1:2]
4223 ; GFX900-NEXT: ;;#ASMEND
4224 ; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16
4225 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
4226 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
4227 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4228 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4230 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__1_5_5:
4232 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4233 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4234 ; GFX90A-NEXT: ;;#ASMSTART
4235 ; GFX90A-NEXT: ; def v[0:1]
4236 ; GFX90A-NEXT: ;;#ASMEND
4237 ; GFX90A-NEXT: ;;#ASMSTART
4238 ; GFX90A-NEXT: ; def v[2:3]
4239 ; GFX90A-NEXT: ;;#ASMEND
4240 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16
4241 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
4242 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4243 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4244 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4246 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_5_5:
4248 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4249 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4250 ; GFX940-NEXT: ;;#ASMSTART
4251 ; GFX940-NEXT: ; def v[0:1]
4252 ; GFX940-NEXT: ;;#ASMEND
4253 ; GFX940-NEXT: ;;#ASMSTART
4254 ; GFX940-NEXT: ; def v[2:3]
4255 ; GFX940-NEXT: ;;#ASMEND
4256 ; GFX940-NEXT: s_nop 0
4257 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16
4258 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4259 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4260 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4261 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4262 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4263 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4264 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4265 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4266 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 1, i32 5, i32 5>
4267 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4271 define void @v_shuffle_v3bf16_v3bf16__2_5_5(ptr addrspace(1) inreg %ptr) {
4272 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__2_5_5:
4274 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4275 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4276 ; GFX900-NEXT: ;;#ASMSTART
4277 ; GFX900-NEXT: ; def v[0:1]
4278 ; GFX900-NEXT: ;;#ASMEND
4279 ; GFX900-NEXT: ;;#ASMSTART
4280 ; GFX900-NEXT: ; def v[2:3]
4281 ; GFX900-NEXT: ;;#ASMEND
4282 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4283 ; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4
4284 ; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4
4285 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4286 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4287 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4289 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__2_5_5:
4291 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4292 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4293 ; GFX90A-NEXT: ;;#ASMSTART
4294 ; GFX90A-NEXT: ; def v[0:1]
4295 ; GFX90A-NEXT: ;;#ASMEND
4296 ; GFX90A-NEXT: ;;#ASMSTART
4297 ; GFX90A-NEXT: ; def v[2:3]
4298 ; GFX90A-NEXT: ;;#ASMEND
4299 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4300 ; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4
4301 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
4302 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4303 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4304 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4306 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_5_5:
4308 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4309 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4310 ; GFX940-NEXT: ;;#ASMSTART
4311 ; GFX940-NEXT: ; def v[0:1]
4312 ; GFX940-NEXT: ;;#ASMEND
4313 ; GFX940-NEXT: ;;#ASMSTART
4314 ; GFX940-NEXT: ; def v[2:3]
4315 ; GFX940-NEXT: ;;#ASMEND
4316 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4317 ; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2
4318 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4319 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4320 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4321 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4322 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4323 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4324 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4325 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4326 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 2, i32 5, i32 5>
4327 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4331 define void @v_shuffle_v3bf16_v3bf16__3_5_5(ptr addrspace(1) inreg %ptr) {
4332 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__3_5_5:
4334 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4335 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4336 ; GFX900-NEXT: ;;#ASMSTART
4337 ; GFX900-NEXT: ; def v[0:1]
4338 ; GFX900-NEXT: ;;#ASMEND
4339 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4340 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
4341 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
4342 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4343 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4344 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4346 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__3_5_5:
4348 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4349 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4350 ; GFX90A-NEXT: ;;#ASMSTART
4351 ; GFX90A-NEXT: ; def v[0:1]
4352 ; GFX90A-NEXT: ;;#ASMEND
4353 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4354 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
4355 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4356 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4357 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4358 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4360 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_5_5:
4362 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4363 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4364 ; GFX940-NEXT: ;;#ASMSTART
4365 ; GFX940-NEXT: ; def v[0:1]
4366 ; GFX940-NEXT: ;;#ASMEND
4367 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4368 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
4369 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4370 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4371 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4372 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4373 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4374 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4375 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4376 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4377 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 3, i32 5, i32 5>
4378 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4382 define void @v_shuffle_v3bf16_v3bf16__4_5_5(ptr addrspace(1) inreg %ptr) {
4383 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__4_5_5:
4385 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4386 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4387 ; GFX900-NEXT: ;;#ASMSTART
4388 ; GFX900-NEXT: ; def v[0:1]
4389 ; GFX900-NEXT: ;;#ASMEND
4390 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16
4391 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
4392 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4393 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4394 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4396 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__4_5_5:
4398 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4399 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4400 ; GFX90A-NEXT: ;;#ASMSTART
4401 ; GFX90A-NEXT: ; def v[0:1]
4402 ; GFX90A-NEXT: ;;#ASMEND
4403 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16
4404 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4405 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4406 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4407 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4409 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_5_5:
4411 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4412 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4413 ; GFX940-NEXT: ;;#ASMSTART
4414 ; GFX940-NEXT: ; def v[0:1]
4415 ; GFX940-NEXT: ;;#ASMEND
4416 ; GFX940-NEXT: s_nop 0
4417 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16
4418 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4419 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4420 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4421 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4422 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4423 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4424 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4425 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4426 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 5, i32 5>
4427 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4431 define void @v_shuffle_v3bf16_v3bf16__5_u_5(ptr addrspace(1) inreg %ptr) {
4432 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_5:
4434 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4435 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4436 ; GFX900-NEXT: ;;#ASMSTART
4437 ; GFX900-NEXT: ; def v[0:1]
4438 ; GFX900-NEXT: ;;#ASMEND
4439 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
4440 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
4441 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4442 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4444 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_5:
4446 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4447 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4448 ; GFX90A-NEXT: ;;#ASMSTART
4449 ; GFX90A-NEXT: ; def v[0:1]
4450 ; GFX90A-NEXT: ;;#ASMEND
4451 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4452 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
4453 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4454 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4456 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_5:
4458 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4459 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4460 ; GFX940-NEXT: ;;#ASMSTART
4461 ; GFX940-NEXT: ; def v[0:1]
4462 ; GFX940-NEXT: ;;#ASMEND
4463 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4464 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
4465 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4466 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4467 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4468 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4469 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4470 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4471 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 5>
4472 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4476 define void @v_shuffle_v3bf16_v3bf16__5_0_5(ptr addrspace(1) inreg %ptr) {
4477 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_0_5:
4479 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4480 ; GFX900-NEXT: ;;#ASMSTART
4481 ; GFX900-NEXT: ; def v[0:1]
4482 ; GFX900-NEXT: ;;#ASMEND
4483 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
4484 ; GFX900-NEXT: ;;#ASMSTART
4485 ; GFX900-NEXT: ; def v[1:2]
4486 ; GFX900-NEXT: ;;#ASMEND
4487 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4488 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
4489 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
4490 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
4491 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4492 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4494 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_0_5:
4496 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4497 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4498 ; GFX90A-NEXT: ;;#ASMSTART
4499 ; GFX90A-NEXT: ; def v[0:1]
4500 ; GFX90A-NEXT: ;;#ASMEND
4501 ; GFX90A-NEXT: ;;#ASMSTART
4502 ; GFX90A-NEXT: ; def v[2:3]
4503 ; GFX90A-NEXT: ;;#ASMEND
4504 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4505 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
4506 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
4507 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4508 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4509 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4511 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_5:
4513 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4514 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4515 ; GFX940-NEXT: ;;#ASMSTART
4516 ; GFX940-NEXT: ; def v[0:1]
4517 ; GFX940-NEXT: ;;#ASMEND
4518 ; GFX940-NEXT: ;;#ASMSTART
4519 ; GFX940-NEXT: ; def v[2:3]
4520 ; GFX940-NEXT: ;;#ASMEND
4521 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4522 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
4523 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4524 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4525 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4526 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4527 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4528 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4529 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4530 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4531 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 5>
4532 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4536 define void @v_shuffle_v3bf16_v3bf16__5_1_5(ptr addrspace(1) inreg %ptr) {
4537 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_1_5:
4539 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4540 ; GFX900-NEXT: ;;#ASMSTART
4541 ; GFX900-NEXT: ; def v[0:1]
4542 ; GFX900-NEXT: ;;#ASMEND
4543 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
4544 ; GFX900-NEXT: ;;#ASMSTART
4545 ; GFX900-NEXT: ; def v[1:2]
4546 ; GFX900-NEXT: ;;#ASMEND
4547 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
4548 ; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
4549 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
4550 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
4551 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4552 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4554 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_1_5:
4556 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4557 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4558 ; GFX90A-NEXT: ;;#ASMSTART
4559 ; GFX90A-NEXT: ; def v[0:1]
4560 ; GFX90A-NEXT: ;;#ASMEND
4561 ; GFX90A-NEXT: ;;#ASMSTART
4562 ; GFX90A-NEXT: ; def v[2:3]
4563 ; GFX90A-NEXT: ;;#ASMEND
4564 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
4565 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0
4566 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
4567 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4568 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4569 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4571 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_5:
4573 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4574 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4575 ; GFX940-NEXT: ;;#ASMSTART
4576 ; GFX940-NEXT: ; def v[0:1]
4577 ; GFX940-NEXT: ;;#ASMEND
4578 ; GFX940-NEXT: ;;#ASMSTART
4579 ; GFX940-NEXT: ; def v[2:3]
4580 ; GFX940-NEXT: ;;#ASMEND
4581 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
4582 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0
4583 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4584 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4585 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4586 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4587 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4588 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4589 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4590 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4591 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 5>
4592 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4596 define void @v_shuffle_v3bf16_v3bf16__5_2_5(ptr addrspace(1) inreg %ptr) {
4597 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_2_5:
4599 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4600 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4601 ; GFX900-NEXT: ;;#ASMSTART
4602 ; GFX900-NEXT: ; def v[0:1]
4603 ; GFX900-NEXT: ;;#ASMEND
4604 ; GFX900-NEXT: ;;#ASMSTART
4605 ; GFX900-NEXT: ; def v[2:3]
4606 ; GFX900-NEXT: ;;#ASMEND
4607 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4608 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
4609 ; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4
4610 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4611 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4612 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4614 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_2_5:
4616 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4617 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4618 ; GFX90A-NEXT: ;;#ASMSTART
4619 ; GFX90A-NEXT: ; def v[0:1]
4620 ; GFX90A-NEXT: ;;#ASMEND
4621 ; GFX90A-NEXT: ;;#ASMSTART
4622 ; GFX90A-NEXT: ; def v[2:3]
4623 ; GFX90A-NEXT: ;;#ASMEND
4624 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4625 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
4626 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
4627 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4628 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4629 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4631 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_5:
4633 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4634 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4635 ; GFX940-NEXT: ;;#ASMSTART
4636 ; GFX940-NEXT: ; def v[0:1]
4637 ; GFX940-NEXT: ;;#ASMEND
4638 ; GFX940-NEXT: ;;#ASMSTART
4639 ; GFX940-NEXT: ; def v[2:3]
4640 ; GFX940-NEXT: ;;#ASMEND
4641 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4642 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
4643 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4644 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4645 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4646 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4647 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4648 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4649 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4650 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4651 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 5>
4652 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4656 define void @v_shuffle_v3bf16_v3bf16__5_3_5(ptr addrspace(1) inreg %ptr) {
4657 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_3_5:
4659 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4660 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4661 ; GFX900-NEXT: ;;#ASMSTART
4662 ; GFX900-NEXT: ; def v[0:1]
4663 ; GFX900-NEXT: ;;#ASMEND
4664 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4665 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
4666 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
4667 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4668 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4669 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4671 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_3_5:
4673 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4674 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4675 ; GFX90A-NEXT: ;;#ASMSTART
4676 ; GFX90A-NEXT: ; def v[0:1]
4677 ; GFX90A-NEXT: ;;#ASMEND
4678 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4679 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
4680 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4681 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4682 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4683 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4685 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_5:
4687 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4688 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4689 ; GFX940-NEXT: ;;#ASMSTART
4690 ; GFX940-NEXT: ; def v[0:1]
4691 ; GFX940-NEXT: ;;#ASMEND
4692 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4693 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
4694 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4695 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4696 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4697 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4698 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4699 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4700 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4701 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4702 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 5>
4703 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4707 define void @v_shuffle_v3bf16_v3bf16__5_4_5(ptr addrspace(1) inreg %ptr) {
4708 ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_4_5:
4710 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4711 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4712 ; GFX900-NEXT: ;;#ASMSTART
4713 ; GFX900-NEXT: ; def v[0:1]
4714 ; GFX900-NEXT: ;;#ASMEND
4715 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
4716 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0
4717 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
4718 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4719 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4720 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4722 ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_4_5:
4724 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4725 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4726 ; GFX90A-NEXT: ;;#ASMSTART
4727 ; GFX90A-NEXT: ; def v[0:1]
4728 ; GFX90A-NEXT: ;;#ASMEND
4729 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
4730 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0
4731 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4732 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4733 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4734 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4736 ; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_5:
4738 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4739 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4740 ; GFX940-NEXT: ;;#ASMSTART
4741 ; GFX940-NEXT: ; def v[0:1]
4742 ; GFX940-NEXT: ;;#ASMEND
4743 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
4744 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0
4745 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4746 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4747 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4748 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4749 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4750 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4751 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4752 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4753 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 5>
4754 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4758 define void @s_shuffle_v3bf16_v3bf16__u_u_u() {
4759 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__u_u_u:
4761 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4762 ; GFX9-NEXT: ;;#ASMSTART
4763 ; GFX9-NEXT: ; use s[8:9]
4764 ; GFX9-NEXT: ;;#ASMEND
4765 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4766 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
4767 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4768 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> poison
4769 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4770 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
4774 define void @s_shuffle_v3bf16_v3bf16__0_u_u() {
4775 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__0_u_u:
4777 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4778 ; GFX900-NEXT: ;;#ASMSTART
4779 ; GFX900-NEXT: ; def s[8:9]
4780 ; GFX900-NEXT: ;;#ASMEND
4781 ; GFX900-NEXT: ;;#ASMSTART
4782 ; GFX900-NEXT: ; use s[8:9]
4783 ; GFX900-NEXT: ;;#ASMEND
4784 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4786 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__0_u_u:
4788 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4789 ; GFX90A-NEXT: ;;#ASMSTART
4790 ; GFX90A-NEXT: ; def s[8:9]
4791 ; GFX90A-NEXT: ;;#ASMEND
4792 ; GFX90A-NEXT: ;;#ASMSTART
4793 ; GFX90A-NEXT: ; use s[8:9]
4794 ; GFX90A-NEXT: ;;#ASMEND
4795 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4797 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_u_u:
4799 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4800 ; GFX940-NEXT: ;;#ASMSTART
4801 ; GFX940-NEXT: ; def s[8:9]
4802 ; GFX940-NEXT: ;;#ASMEND
4803 ; GFX940-NEXT: s_nop 0
4804 ; GFX940-NEXT: ;;#ASMSTART
4805 ; GFX940-NEXT: ; use s[8:9]
4806 ; GFX940-NEXT: ;;#ASMEND
4807 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4808 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
4809 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4810 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
4811 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4812 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
4816 define void @s_shuffle_v3bf16_v3bf16__1_u_u() {
4817 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__1_u_u:
4819 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4820 ; GFX900-NEXT: ;;#ASMSTART
4821 ; GFX900-NEXT: ; def s[4:5]
4822 ; GFX900-NEXT: ;;#ASMEND
4823 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
4824 ; GFX900-NEXT: ;;#ASMSTART
4825 ; GFX900-NEXT: ; use s[8:9]
4826 ; GFX900-NEXT: ;;#ASMEND
4827 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4829 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__1_u_u:
4831 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4832 ; GFX90A-NEXT: ;;#ASMSTART
4833 ; GFX90A-NEXT: ; def s[4:5]
4834 ; GFX90A-NEXT: ;;#ASMEND
4835 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
4836 ; GFX90A-NEXT: ;;#ASMSTART
4837 ; GFX90A-NEXT: ; use s[8:9]
4838 ; GFX90A-NEXT: ;;#ASMEND
4839 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4841 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_u_u:
4843 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4844 ; GFX940-NEXT: ;;#ASMSTART
4845 ; GFX940-NEXT: ; def s[0:1]
4846 ; GFX940-NEXT: ;;#ASMEND
4847 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
4848 ; GFX940-NEXT: ;;#ASMSTART
4849 ; GFX940-NEXT: ; use s[8:9]
4850 ; GFX940-NEXT: ;;#ASMEND
4851 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4852 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
4853 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4854 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
4855 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4856 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
4860 define void @s_shuffle_v3bf16_v3bf16__2_u_u() {
4861 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u:
4863 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4864 ; GFX900-NEXT: ;;#ASMSTART
4865 ; GFX900-NEXT: ; def s[4:5]
4866 ; GFX900-NEXT: ;;#ASMEND
4867 ; GFX900-NEXT: s_mov_b32 s8, s5
4868 ; GFX900-NEXT: ;;#ASMSTART
4869 ; GFX900-NEXT: ; use s[8:9]
4870 ; GFX900-NEXT: ;;#ASMEND
4871 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4873 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u:
4875 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4876 ; GFX90A-NEXT: ;;#ASMSTART
4877 ; GFX90A-NEXT: ; def s[4:5]
4878 ; GFX90A-NEXT: ;;#ASMEND
4879 ; GFX90A-NEXT: s_mov_b32 s8, s5
4880 ; GFX90A-NEXT: ;;#ASMSTART
4881 ; GFX90A-NEXT: ; use s[8:9]
4882 ; GFX90A-NEXT: ;;#ASMEND
4883 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4885 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u:
4887 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4888 ; GFX940-NEXT: ;;#ASMSTART
4889 ; GFX940-NEXT: ; def s[0:1]
4890 ; GFX940-NEXT: ;;#ASMEND
4891 ; GFX940-NEXT: s_mov_b32 s8, s1
4892 ; GFX940-NEXT: ;;#ASMSTART
4893 ; GFX940-NEXT: ; use s[8:9]
4894 ; GFX940-NEXT: ;;#ASMEND
4895 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4896 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
4897 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4898 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
4899 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4900 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
4904 define void @s_shuffle_v3bf16_v3bf16__3_u_u() {
4905 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__3_u_u:
4907 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4908 ; GFX9-NEXT: ;;#ASMSTART
4909 ; GFX9-NEXT: ; use s[8:9]
4910 ; GFX9-NEXT: ;;#ASMEND
4911 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4912 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
4913 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4914 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
4915 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4916 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
4920 define void @s_shuffle_v3bf16_v3bf16__4_u_u() {
4921 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__4_u_u:
4923 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4924 ; GFX900-NEXT: ;;#ASMSTART
4925 ; GFX900-NEXT: ; def s[4:5]
4926 ; GFX900-NEXT: ;;#ASMEND
4927 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
4928 ; GFX900-NEXT: ;;#ASMSTART
4929 ; GFX900-NEXT: ; use s[8:9]
4930 ; GFX900-NEXT: ;;#ASMEND
4931 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4933 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__4_u_u:
4935 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4936 ; GFX90A-NEXT: ;;#ASMSTART
4937 ; GFX90A-NEXT: ; def s[4:5]
4938 ; GFX90A-NEXT: ;;#ASMEND
4939 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
4940 ; GFX90A-NEXT: ;;#ASMSTART
4941 ; GFX90A-NEXT: ; use s[8:9]
4942 ; GFX90A-NEXT: ;;#ASMEND
4943 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4945 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_u_u:
4947 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4948 ; GFX940-NEXT: ;;#ASMSTART
4949 ; GFX940-NEXT: ; def s[0:1]
4950 ; GFX940-NEXT: ;;#ASMEND
4951 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
4952 ; GFX940-NEXT: ;;#ASMSTART
4953 ; GFX940-NEXT: ; use s[8:9]
4954 ; GFX940-NEXT: ;;#ASMEND
4955 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4956 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
4957 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
4958 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4959 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
4960 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 poison, i32 poison>
4961 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4962 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
4966 define void @s_shuffle_v3bf16_v3bf16__5_u_u() {
4967 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u:
4969 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4970 ; GFX900-NEXT: ;;#ASMSTART
4971 ; GFX900-NEXT: ; def s[4:5]
4972 ; GFX900-NEXT: ;;#ASMEND
4973 ; GFX900-NEXT: s_mov_b32 s8, s5
4974 ; GFX900-NEXT: ;;#ASMSTART
4975 ; GFX900-NEXT: ; use s[8:9]
4976 ; GFX900-NEXT: ;;#ASMEND
4977 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4979 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u:
4981 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4982 ; GFX90A-NEXT: ;;#ASMSTART
4983 ; GFX90A-NEXT: ; def s[4:5]
4984 ; GFX90A-NEXT: ;;#ASMEND
4985 ; GFX90A-NEXT: s_mov_b32 s8, s5
4986 ; GFX90A-NEXT: ;;#ASMSTART
4987 ; GFX90A-NEXT: ; use s[8:9]
4988 ; GFX90A-NEXT: ;;#ASMEND
4989 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4991 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u:
4993 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4994 ; GFX940-NEXT: ;;#ASMSTART
4995 ; GFX940-NEXT: ; def s[0:1]
4996 ; GFX940-NEXT: ;;#ASMEND
4997 ; GFX940-NEXT: s_mov_b32 s8, s1
4998 ; GFX940-NEXT: ;;#ASMSTART
4999 ; GFX940-NEXT: ; use s[8:9]
5000 ; GFX940-NEXT: ;;#ASMEND
5001 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5002 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5003 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5004 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5005 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5006 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 poison>
5007 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5008 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5012 define void @s_shuffle_v3bf16_v3bf16__5_0_u() {
5013 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_0_u:
5015 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5016 ; GFX900-NEXT: ;;#ASMSTART
5017 ; GFX900-NEXT: ; def s[4:5]
5018 ; GFX900-NEXT: ;;#ASMEND
5019 ; GFX900-NEXT: ;;#ASMSTART
5020 ; GFX900-NEXT: ; def s[6:7]
5021 ; GFX900-NEXT: ;;#ASMEND
5022 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
5023 ; GFX900-NEXT: ;;#ASMSTART
5024 ; GFX900-NEXT: ; use s[8:9]
5025 ; GFX900-NEXT: ;;#ASMEND
5026 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5028 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_0_u:
5030 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5031 ; GFX90A-NEXT: ;;#ASMSTART
5032 ; GFX90A-NEXT: ; def s[4:5]
5033 ; GFX90A-NEXT: ;;#ASMEND
5034 ; GFX90A-NEXT: ;;#ASMSTART
5035 ; GFX90A-NEXT: ; def s[6:7]
5036 ; GFX90A-NEXT: ;;#ASMEND
5037 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
5038 ; GFX90A-NEXT: ;;#ASMSTART
5039 ; GFX90A-NEXT: ; use s[8:9]
5040 ; GFX90A-NEXT: ;;#ASMEND
5041 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5043 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_u:
5045 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5046 ; GFX940-NEXT: ;;#ASMSTART
5047 ; GFX940-NEXT: ; def s[0:1]
5048 ; GFX940-NEXT: ;;#ASMEND
5049 ; GFX940-NEXT: ;;#ASMSTART
5050 ; GFX940-NEXT: ; def s[2:3]
5051 ; GFX940-NEXT: ;;#ASMEND
5052 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
5053 ; GFX940-NEXT: ;;#ASMSTART
5054 ; GFX940-NEXT: ; use s[8:9]
5055 ; GFX940-NEXT: ;;#ASMEND
5056 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5057 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5058 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5059 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5060 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5061 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 poison>
5062 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5063 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5067 define void @s_shuffle_v3bf16_v3bf16__5_1_u() {
5068 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_1_u:
5070 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5071 ; GFX900-NEXT: ;;#ASMSTART
5072 ; GFX900-NEXT: ; def s[4:5]
5073 ; GFX900-NEXT: ;;#ASMEND
5074 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5075 ; GFX900-NEXT: ;;#ASMSTART
5076 ; GFX900-NEXT: ; def s[6:7]
5077 ; GFX900-NEXT: ;;#ASMEND
5078 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
5079 ; GFX900-NEXT: ;;#ASMSTART
5080 ; GFX900-NEXT: ; use s[8:9]
5081 ; GFX900-NEXT: ;;#ASMEND
5082 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5084 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_1_u:
5086 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5087 ; GFX90A-NEXT: ;;#ASMSTART
5088 ; GFX90A-NEXT: ; def s[4:5]
5089 ; GFX90A-NEXT: ;;#ASMEND
5090 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5091 ; GFX90A-NEXT: ;;#ASMSTART
5092 ; GFX90A-NEXT: ; def s[6:7]
5093 ; GFX90A-NEXT: ;;#ASMEND
5094 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
5095 ; GFX90A-NEXT: ;;#ASMSTART
5096 ; GFX90A-NEXT: ; use s[8:9]
5097 ; GFX90A-NEXT: ;;#ASMEND
5098 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5100 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_u:
5102 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5103 ; GFX940-NEXT: ;;#ASMSTART
5104 ; GFX940-NEXT: ; def s[0:1]
5105 ; GFX940-NEXT: ;;#ASMEND
5106 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5107 ; GFX940-NEXT: ;;#ASMSTART
5108 ; GFX940-NEXT: ; def s[2:3]
5109 ; GFX940-NEXT: ;;#ASMEND
5110 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
5111 ; GFX940-NEXT: ;;#ASMSTART
5112 ; GFX940-NEXT: ; use s[8:9]
5113 ; GFX940-NEXT: ;;#ASMEND
5114 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5115 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5116 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5117 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5118 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5119 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 poison>
5120 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5121 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5125 define void @s_shuffle_v3bf16_v3bf16__5_2_u() {
5126 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_2_u:
5128 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5129 ; GFX900-NEXT: ;;#ASMSTART
5130 ; GFX900-NEXT: ; def s[4:5]
5131 ; GFX900-NEXT: ;;#ASMEND
5132 ; GFX900-NEXT: ;;#ASMSTART
5133 ; GFX900-NEXT: ; def s[6:7]
5134 ; GFX900-NEXT: ;;#ASMEND
5135 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
5136 ; GFX900-NEXT: ;;#ASMSTART
5137 ; GFX900-NEXT: ; use s[8:9]
5138 ; GFX900-NEXT: ;;#ASMEND
5139 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5141 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_2_u:
5143 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5144 ; GFX90A-NEXT: ;;#ASMSTART
5145 ; GFX90A-NEXT: ; def s[4:5]
5146 ; GFX90A-NEXT: ;;#ASMEND
5147 ; GFX90A-NEXT: ;;#ASMSTART
5148 ; GFX90A-NEXT: ; def s[6:7]
5149 ; GFX90A-NEXT: ;;#ASMEND
5150 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
5151 ; GFX90A-NEXT: ;;#ASMSTART
5152 ; GFX90A-NEXT: ; use s[8:9]
5153 ; GFX90A-NEXT: ;;#ASMEND
5154 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5156 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_u:
5158 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5159 ; GFX940-NEXT: ;;#ASMSTART
5160 ; GFX940-NEXT: ; def s[0:1]
5161 ; GFX940-NEXT: ;;#ASMEND
5162 ; GFX940-NEXT: ;;#ASMSTART
5163 ; GFX940-NEXT: ; def s[2:3]
5164 ; GFX940-NEXT: ;;#ASMEND
5165 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
5166 ; GFX940-NEXT: ;;#ASMSTART
5167 ; GFX940-NEXT: ; use s[8:9]
5168 ; GFX940-NEXT: ;;#ASMEND
5169 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5170 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5171 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5172 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5173 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5174 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 poison>
5175 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5176 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5180 define void @s_shuffle_v3bf16_v3bf16__5_3_u() {
5181 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_3_u:
5183 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5184 ; GFX900-NEXT: ;;#ASMSTART
5185 ; GFX900-NEXT: ; def s[4:5]
5186 ; GFX900-NEXT: ;;#ASMEND
5187 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5188 ; GFX900-NEXT: ;;#ASMSTART
5189 ; GFX900-NEXT: ; use s[8:9]
5190 ; GFX900-NEXT: ;;#ASMEND
5191 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5193 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_3_u:
5195 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5196 ; GFX90A-NEXT: ;;#ASMSTART
5197 ; GFX90A-NEXT: ; def s[4:5]
5198 ; GFX90A-NEXT: ;;#ASMEND
5199 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5200 ; GFX90A-NEXT: ;;#ASMSTART
5201 ; GFX90A-NEXT: ; use s[8:9]
5202 ; GFX90A-NEXT: ;;#ASMEND
5203 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5205 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_u:
5207 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5208 ; GFX940-NEXT: ;;#ASMSTART
5209 ; GFX940-NEXT: ; def s[0:1]
5210 ; GFX940-NEXT: ;;#ASMEND
5211 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
5212 ; GFX940-NEXT: ;;#ASMSTART
5213 ; GFX940-NEXT: ; use s[8:9]
5214 ; GFX940-NEXT: ;;#ASMEND
5215 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5216 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5217 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5218 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5219 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5220 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 poison>
5221 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5222 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5226 define void @s_shuffle_v3bf16_v3bf16__5_4_u() {
5227 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_4_u:
5229 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5230 ; GFX900-NEXT: ;;#ASMSTART
5231 ; GFX900-NEXT: ; def s[4:5]
5232 ; GFX900-NEXT: ;;#ASMEND
5233 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5234 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5235 ; GFX900-NEXT: ;;#ASMSTART
5236 ; GFX900-NEXT: ; use s[8:9]
5237 ; GFX900-NEXT: ;;#ASMEND
5238 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5240 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_4_u:
5242 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5243 ; GFX90A-NEXT: ;;#ASMSTART
5244 ; GFX90A-NEXT: ; def s[4:5]
5245 ; GFX90A-NEXT: ;;#ASMEND
5246 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5247 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5248 ; GFX90A-NEXT: ;;#ASMSTART
5249 ; GFX90A-NEXT: ; use s[8:9]
5250 ; GFX90A-NEXT: ;;#ASMEND
5251 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5253 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_u:
5255 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5256 ; GFX940-NEXT: ;;#ASMSTART
5257 ; GFX940-NEXT: ; def s[0:1]
5258 ; GFX940-NEXT: ;;#ASMEND
5259 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5260 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
5261 ; GFX940-NEXT: ;;#ASMSTART
5262 ; GFX940-NEXT: ; use s[8:9]
5263 ; GFX940-NEXT: ;;#ASMEND
5264 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5265 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5266 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5267 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5268 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5269 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 poison>
5270 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5271 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5275 define void @s_shuffle_v3bf16_v3bf16__5_5_u() {
5276 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_5_u:
5278 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5279 ; GFX900-NEXT: ;;#ASMSTART
5280 ; GFX900-NEXT: ; def s[4:5]
5281 ; GFX900-NEXT: ;;#ASMEND
5282 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5283 ; GFX900-NEXT: ;;#ASMSTART
5284 ; GFX900-NEXT: ; use s[8:9]
5285 ; GFX900-NEXT: ;;#ASMEND
5286 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5288 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_5_u:
5290 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5291 ; GFX90A-NEXT: ;;#ASMSTART
5292 ; GFX90A-NEXT: ; def s[4:5]
5293 ; GFX90A-NEXT: ;;#ASMEND
5294 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5295 ; GFX90A-NEXT: ;;#ASMSTART
5296 ; GFX90A-NEXT: ; use s[8:9]
5297 ; GFX90A-NEXT: ;;#ASMEND
5298 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5300 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_u:
5302 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5303 ; GFX940-NEXT: ;;#ASMSTART
5304 ; GFX940-NEXT: ; def s[0:1]
5305 ; GFX940-NEXT: ;;#ASMEND
5306 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
5307 ; GFX940-NEXT: ;;#ASMSTART
5308 ; GFX940-NEXT: ; use s[8:9]
5309 ; GFX940-NEXT: ;;#ASMEND
5310 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5311 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5312 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5313 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5314 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5315 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 poison>
5316 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5317 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5321 define void @s_shuffle_v3bf16_v3bf16__5_5_0() {
5322 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_5_0:
5324 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5325 ; GFX900-NEXT: ;;#ASMSTART
5326 ; GFX900-NEXT: ; def s[4:5]
5327 ; GFX900-NEXT: ;;#ASMEND
5328 ; GFX900-NEXT: ;;#ASMSTART
5329 ; GFX900-NEXT: ; def s[6:7]
5330 ; GFX900-NEXT: ;;#ASMEND
5331 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7
5332 ; GFX900-NEXT: s_mov_b32 s9, s4
5333 ; GFX900-NEXT: ;;#ASMSTART
5334 ; GFX900-NEXT: ; use s[8:9]
5335 ; GFX900-NEXT: ;;#ASMEND
5336 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5338 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_5_0:
5340 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5341 ; GFX90A-NEXT: ;;#ASMSTART
5342 ; GFX90A-NEXT: ; def s[4:5]
5343 ; GFX90A-NEXT: ;;#ASMEND
5344 ; GFX90A-NEXT: ;;#ASMSTART
5345 ; GFX90A-NEXT: ; def s[6:7]
5346 ; GFX90A-NEXT: ;;#ASMEND
5347 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7
5348 ; GFX90A-NEXT: s_mov_b32 s9, s4
5349 ; GFX90A-NEXT: ;;#ASMSTART
5350 ; GFX90A-NEXT: ; use s[8:9]
5351 ; GFX90A-NEXT: ;;#ASMEND
5352 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5354 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_0:
5356 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5357 ; GFX940-NEXT: ;;#ASMSTART
5358 ; GFX940-NEXT: ; def s[0:1]
5359 ; GFX940-NEXT: ;;#ASMEND
5360 ; GFX940-NEXT: ;;#ASMSTART
5361 ; GFX940-NEXT: ; def s[2:3]
5362 ; GFX940-NEXT: ;;#ASMEND
5363 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3
5364 ; GFX940-NEXT: s_mov_b32 s9, s0
5365 ; GFX940-NEXT: ;;#ASMSTART
5366 ; GFX940-NEXT: ; use s[8:9]
5367 ; GFX940-NEXT: ;;#ASMEND
5368 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5369 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5370 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5371 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5372 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5373 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 0>
5374 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5375 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5379 define void @s_shuffle_v3bf16_v3bf16__5_5_1() {
5380 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_5_1:
5382 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5383 ; GFX900-NEXT: ;;#ASMSTART
5384 ; GFX900-NEXT: ; def s[4:5]
5385 ; GFX900-NEXT: ;;#ASMEND
5386 ; GFX900-NEXT: ;;#ASMSTART
5387 ; GFX900-NEXT: ; def s[6:7]
5388 ; GFX900-NEXT: ;;#ASMEND
5389 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
5390 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7
5391 ; GFX900-NEXT: ;;#ASMSTART
5392 ; GFX900-NEXT: ; use s[8:9]
5393 ; GFX900-NEXT: ;;#ASMEND
5394 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5396 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_5_1:
5398 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5399 ; GFX90A-NEXT: ;;#ASMSTART
5400 ; GFX90A-NEXT: ; def s[4:5]
5401 ; GFX90A-NEXT: ;;#ASMEND
5402 ; GFX90A-NEXT: ;;#ASMSTART
5403 ; GFX90A-NEXT: ; def s[6:7]
5404 ; GFX90A-NEXT: ;;#ASMEND
5405 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
5406 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7
5407 ; GFX90A-NEXT: ;;#ASMSTART
5408 ; GFX90A-NEXT: ; use s[8:9]
5409 ; GFX90A-NEXT: ;;#ASMEND
5410 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5412 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_1:
5414 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5415 ; GFX940-NEXT: ;;#ASMSTART
5416 ; GFX940-NEXT: ; def s[0:1]
5417 ; GFX940-NEXT: ;;#ASMEND
5418 ; GFX940-NEXT: ;;#ASMSTART
5419 ; GFX940-NEXT: ; def s[2:3]
5420 ; GFX940-NEXT: ;;#ASMEND
5421 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
5422 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3
5423 ; GFX940-NEXT: ;;#ASMSTART
5424 ; GFX940-NEXT: ; use s[8:9]
5425 ; GFX940-NEXT: ;;#ASMEND
5426 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5427 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5428 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5429 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5430 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5431 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 1>
5432 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5433 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5437 define void @s_shuffle_v3bf16_v3bf16__5_5_2() {
5438 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_5_2:
5440 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5441 ; GFX900-NEXT: ;;#ASMSTART
5442 ; GFX900-NEXT: ; def s[8:9]
5443 ; GFX900-NEXT: ;;#ASMEND
5444 ; GFX900-NEXT: ;;#ASMSTART
5445 ; GFX900-NEXT: ; def s[4:5]
5446 ; GFX900-NEXT: ;;#ASMEND
5447 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5448 ; GFX900-NEXT: ;;#ASMSTART
5449 ; GFX900-NEXT: ; use s[8:9]
5450 ; GFX900-NEXT: ;;#ASMEND
5451 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5453 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_5_2:
5455 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5456 ; GFX90A-NEXT: ;;#ASMSTART
5457 ; GFX90A-NEXT: ; def s[8:9]
5458 ; GFX90A-NEXT: ;;#ASMEND
5459 ; GFX90A-NEXT: ;;#ASMSTART
5460 ; GFX90A-NEXT: ; def s[4:5]
5461 ; GFX90A-NEXT: ;;#ASMEND
5462 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5463 ; GFX90A-NEXT: ;;#ASMSTART
5464 ; GFX90A-NEXT: ; use s[8:9]
5465 ; GFX90A-NEXT: ;;#ASMEND
5466 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5468 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_2:
5470 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5471 ; GFX940-NEXT: ;;#ASMSTART
5472 ; GFX940-NEXT: ; def s[8:9]
5473 ; GFX940-NEXT: ;;#ASMEND
5474 ; GFX940-NEXT: ;;#ASMSTART
5475 ; GFX940-NEXT: ; def s[0:1]
5476 ; GFX940-NEXT: ;;#ASMEND
5477 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
5478 ; GFX940-NEXT: ;;#ASMSTART
5479 ; GFX940-NEXT: ; use s[8:9]
5480 ; GFX940-NEXT: ;;#ASMEND
5481 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5482 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5483 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5484 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5485 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5486 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 2>
5487 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5488 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5492 define void @s_shuffle_v3bf16_v3bf16__5_5_3() {
5493 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_5_3:
5495 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5496 ; GFX900-NEXT: ;;#ASMSTART
5497 ; GFX900-NEXT: ; def s[4:5]
5498 ; GFX900-NEXT: ;;#ASMEND
5499 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5500 ; GFX900-NEXT: s_mov_b32 s9, s4
5501 ; GFX900-NEXT: ;;#ASMSTART
5502 ; GFX900-NEXT: ; use s[8:9]
5503 ; GFX900-NEXT: ;;#ASMEND
5504 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5506 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_5_3:
5508 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5509 ; GFX90A-NEXT: ;;#ASMSTART
5510 ; GFX90A-NEXT: ; def s[4:5]
5511 ; GFX90A-NEXT: ;;#ASMEND
5512 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5513 ; GFX90A-NEXT: s_mov_b32 s9, s4
5514 ; GFX90A-NEXT: ;;#ASMSTART
5515 ; GFX90A-NEXT: ; use s[8:9]
5516 ; GFX90A-NEXT: ;;#ASMEND
5517 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5519 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_3:
5521 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5522 ; GFX940-NEXT: ;;#ASMSTART
5523 ; GFX940-NEXT: ; def s[0:1]
5524 ; GFX940-NEXT: ;;#ASMEND
5525 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
5526 ; GFX940-NEXT: s_mov_b32 s9, s0
5527 ; GFX940-NEXT: ;;#ASMSTART
5528 ; GFX940-NEXT: ; use s[8:9]
5529 ; GFX940-NEXT: ;;#ASMEND
5530 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5531 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5532 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5533 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5534 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5535 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 3>
5536 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5537 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5541 define void @s_shuffle_v3bf16_v3bf16__5_5_4() {
5542 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_5_4:
5544 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5545 ; GFX900-NEXT: ;;#ASMSTART
5546 ; GFX900-NEXT: ; def s[4:5]
5547 ; GFX900-NEXT: ;;#ASMEND
5548 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
5549 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5550 ; GFX900-NEXT: ;;#ASMSTART
5551 ; GFX900-NEXT: ; use s[8:9]
5552 ; GFX900-NEXT: ;;#ASMEND
5553 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5555 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_5_4:
5557 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5558 ; GFX90A-NEXT: ;;#ASMSTART
5559 ; GFX90A-NEXT: ; def s[4:5]
5560 ; GFX90A-NEXT: ;;#ASMEND
5561 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
5562 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5563 ; GFX90A-NEXT: ;;#ASMSTART
5564 ; GFX90A-NEXT: ; use s[8:9]
5565 ; GFX90A-NEXT: ;;#ASMEND
5566 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5568 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_4:
5570 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5571 ; GFX940-NEXT: ;;#ASMSTART
5572 ; GFX940-NEXT: ; def s[0:1]
5573 ; GFX940-NEXT: ;;#ASMEND
5574 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
5575 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
5576 ; GFX940-NEXT: ;;#ASMSTART
5577 ; GFX940-NEXT: ; use s[8:9]
5578 ; GFX940-NEXT: ;;#ASMEND
5579 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5580 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5581 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5582 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5583 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5584 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 4>
5585 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5586 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5590 define void @s_shuffle_v3bf16_v3bf16__5_5_5() {
5591 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__5_5_5:
5593 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5594 ; GFX9-NEXT: ;;#ASMSTART
5595 ; GFX9-NEXT: ; def s[8:9]
5596 ; GFX9-NEXT: ;;#ASMEND
5597 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9
5598 ; GFX9-NEXT: ;;#ASMSTART
5599 ; GFX9-NEXT: ; use s[8:9]
5600 ; GFX9-NEXT: ;;#ASMEND
5601 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5602 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5603 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5604 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5605 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5606 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 5, i32 5>
5607 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5608 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5612 define void @s_shuffle_v3bf16_v3bf16__u_0_0() {
5613 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__u_0_0:
5615 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5616 ; GFX900-NEXT: ;;#ASMSTART
5617 ; GFX900-NEXT: ; def s[4:5]
5618 ; GFX900-NEXT: ;;#ASMEND
5619 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
5620 ; GFX900-NEXT: s_mov_b32 s9, s4
5621 ; GFX900-NEXT: ;;#ASMSTART
5622 ; GFX900-NEXT: ; use s[8:9]
5623 ; GFX900-NEXT: ;;#ASMEND
5624 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5626 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__u_0_0:
5628 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5629 ; GFX90A-NEXT: ;;#ASMSTART
5630 ; GFX90A-NEXT: ; def s[4:5]
5631 ; GFX90A-NEXT: ;;#ASMEND
5632 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
5633 ; GFX90A-NEXT: s_mov_b32 s9, s4
5634 ; GFX90A-NEXT: ;;#ASMSTART
5635 ; GFX90A-NEXT: ; use s[8:9]
5636 ; GFX90A-NEXT: ;;#ASMEND
5637 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5639 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__u_0_0:
5641 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5642 ; GFX940-NEXT: ;;#ASMSTART
5643 ; GFX940-NEXT: ; def s[0:1]
5644 ; GFX940-NEXT: ;;#ASMEND
5645 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
5646 ; GFX940-NEXT: s_mov_b32 s9, s0
5647 ; GFX940-NEXT: ;;#ASMSTART
5648 ; GFX940-NEXT: ; use s[8:9]
5649 ; GFX940-NEXT: ;;#ASMEND
5650 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5651 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5652 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5653 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 0, i32 0>
5654 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5655 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5659 define void @s_shuffle_v3bf16_v3bf16__0_0_0() {
5660 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__0_0_0:
5662 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5663 ; GFX900-NEXT: ;;#ASMSTART
5664 ; GFX900-NEXT: ; def s[4:5]
5665 ; GFX900-NEXT: ;;#ASMEND
5666 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5667 ; GFX900-NEXT: s_mov_b32 s9, s4
5668 ; GFX900-NEXT: ;;#ASMSTART
5669 ; GFX900-NEXT: ; use s[8:9]
5670 ; GFX900-NEXT: ;;#ASMEND
5671 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5673 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__0_0_0:
5675 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5676 ; GFX90A-NEXT: ;;#ASMSTART
5677 ; GFX90A-NEXT: ; def s[4:5]
5678 ; GFX90A-NEXT: ;;#ASMEND
5679 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5680 ; GFX90A-NEXT: s_mov_b32 s9, s4
5681 ; GFX90A-NEXT: ;;#ASMSTART
5682 ; GFX90A-NEXT: ; use s[8:9]
5683 ; GFX90A-NEXT: ;;#ASMEND
5684 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5686 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_0_0:
5688 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5689 ; GFX940-NEXT: ;;#ASMSTART
5690 ; GFX940-NEXT: ; def s[0:1]
5691 ; GFX940-NEXT: ;;#ASMEND
5692 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
5693 ; GFX940-NEXT: s_mov_b32 s9, s0
5694 ; GFX940-NEXT: ;;#ASMSTART
5695 ; GFX940-NEXT: ; use s[8:9]
5696 ; GFX940-NEXT: ;;#ASMEND
5697 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5698 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5699 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5700 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> zeroinitializer
5701 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5702 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5706 define void @s_shuffle_v3bf16_v3bf16__1_0_0() {
5707 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__1_0_0:
5709 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5710 ; GFX900-NEXT: ;;#ASMSTART
5711 ; GFX900-NEXT: ; def s[4:5]
5712 ; GFX900-NEXT: ;;#ASMEND
5713 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
5714 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5715 ; GFX900-NEXT: s_mov_b32 s9, s4
5716 ; GFX900-NEXT: ;;#ASMSTART
5717 ; GFX900-NEXT: ; use s[8:9]
5718 ; GFX900-NEXT: ;;#ASMEND
5719 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5721 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__1_0_0:
5723 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5724 ; GFX90A-NEXT: ;;#ASMSTART
5725 ; GFX90A-NEXT: ; def s[4:5]
5726 ; GFX90A-NEXT: ;;#ASMEND
5727 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
5728 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5729 ; GFX90A-NEXT: s_mov_b32 s9, s4
5730 ; GFX90A-NEXT: ;;#ASMSTART
5731 ; GFX90A-NEXT: ; use s[8:9]
5732 ; GFX90A-NEXT: ;;#ASMEND
5733 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5735 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_0_0:
5737 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5738 ; GFX940-NEXT: ;;#ASMSTART
5739 ; GFX940-NEXT: ; def s[0:1]
5740 ; GFX940-NEXT: ;;#ASMEND
5741 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
5742 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
5743 ; GFX940-NEXT: s_mov_b32 s9, s0
5744 ; GFX940-NEXT: ;;#ASMSTART
5745 ; GFX940-NEXT: ; use s[8:9]
5746 ; GFX940-NEXT: ;;#ASMEND
5747 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5748 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5749 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5750 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 0, i32 0>
5751 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5752 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5756 define void @s_shuffle_v3bf16_v3bf16__2_0_0() {
5757 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_0_0:
5759 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5760 ; GFX900-NEXT: ;;#ASMSTART
5761 ; GFX900-NEXT: ; def s[4:5]
5762 ; GFX900-NEXT: ;;#ASMEND
5763 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5764 ; GFX900-NEXT: s_mov_b32 s9, s4
5765 ; GFX900-NEXT: ;;#ASMSTART
5766 ; GFX900-NEXT: ; use s[8:9]
5767 ; GFX900-NEXT: ;;#ASMEND
5768 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5770 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_0_0:
5772 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5773 ; GFX90A-NEXT: ;;#ASMSTART
5774 ; GFX90A-NEXT: ; def s[4:5]
5775 ; GFX90A-NEXT: ;;#ASMEND
5776 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5777 ; GFX90A-NEXT: s_mov_b32 s9, s4
5778 ; GFX90A-NEXT: ;;#ASMSTART
5779 ; GFX90A-NEXT: ; use s[8:9]
5780 ; GFX90A-NEXT: ;;#ASMEND
5781 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5783 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_0_0:
5785 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5786 ; GFX940-NEXT: ;;#ASMSTART
5787 ; GFX940-NEXT: ; def s[0:1]
5788 ; GFX940-NEXT: ;;#ASMEND
5789 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
5790 ; GFX940-NEXT: s_mov_b32 s9, s0
5791 ; GFX940-NEXT: ;;#ASMSTART
5792 ; GFX940-NEXT: ; use s[8:9]
5793 ; GFX940-NEXT: ;;#ASMEND
5794 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5795 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5796 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5797 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 0>
5798 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5799 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5803 define void @s_shuffle_v3bf16_v3bf16__3_0_0() {
5804 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__3_0_0:
5806 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5807 ; GFX900-NEXT: ;;#ASMSTART
5808 ; GFX900-NEXT: ; def s[4:5]
5809 ; GFX900-NEXT: ;;#ASMEND
5810 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
5811 ; GFX900-NEXT: s_mov_b32 s9, s4
5812 ; GFX900-NEXT: ;;#ASMSTART
5813 ; GFX900-NEXT: ; use s[8:9]
5814 ; GFX900-NEXT: ;;#ASMEND
5815 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5817 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__3_0_0:
5819 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5820 ; GFX90A-NEXT: ;;#ASMSTART
5821 ; GFX90A-NEXT: ; def s[4:5]
5822 ; GFX90A-NEXT: ;;#ASMEND
5823 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
5824 ; GFX90A-NEXT: s_mov_b32 s9, s4
5825 ; GFX90A-NEXT: ;;#ASMSTART
5826 ; GFX90A-NEXT: ; use s[8:9]
5827 ; GFX90A-NEXT: ;;#ASMEND
5828 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5830 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__3_0_0:
5832 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5833 ; GFX940-NEXT: ;;#ASMSTART
5834 ; GFX940-NEXT: ; def s[0:1]
5835 ; GFX940-NEXT: ;;#ASMEND
5836 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
5837 ; GFX940-NEXT: s_mov_b32 s9, s0
5838 ; GFX940-NEXT: ;;#ASMSTART
5839 ; GFX940-NEXT: ; use s[8:9]
5840 ; GFX940-NEXT: ;;#ASMEND
5841 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5842 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5843 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5844 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 0, i32 0>
5845 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5846 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5850 define void @s_shuffle_v3bf16_v3bf16__4_0_0() {
5851 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__4_0_0:
5853 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5854 ; GFX900-NEXT: ;;#ASMSTART
5855 ; GFX900-NEXT: ; def s[4:5]
5856 ; GFX900-NEXT: ;;#ASMEND
5857 ; GFX900-NEXT: ;;#ASMSTART
5858 ; GFX900-NEXT: ; def s[6:7]
5859 ; GFX900-NEXT: ;;#ASMEND
5860 ; GFX900-NEXT: s_lshr_b32 s5, s6, 16
5861 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5862 ; GFX900-NEXT: s_mov_b32 s9, s4
5863 ; GFX900-NEXT: ;;#ASMSTART
5864 ; GFX900-NEXT: ; use s[8:9]
5865 ; GFX900-NEXT: ;;#ASMEND
5866 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5868 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__4_0_0:
5870 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5871 ; GFX90A-NEXT: ;;#ASMSTART
5872 ; GFX90A-NEXT: ; def s[4:5]
5873 ; GFX90A-NEXT: ;;#ASMEND
5874 ; GFX90A-NEXT: ;;#ASMSTART
5875 ; GFX90A-NEXT: ; def s[6:7]
5876 ; GFX90A-NEXT: ;;#ASMEND
5877 ; GFX90A-NEXT: s_lshr_b32 s5, s6, 16
5878 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5879 ; GFX90A-NEXT: s_mov_b32 s9, s4
5880 ; GFX90A-NEXT: ;;#ASMSTART
5881 ; GFX90A-NEXT: ; use s[8:9]
5882 ; GFX90A-NEXT: ;;#ASMEND
5883 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5885 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_0_0:
5887 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5888 ; GFX940-NEXT: ;;#ASMSTART
5889 ; GFX940-NEXT: ; def s[0:1]
5890 ; GFX940-NEXT: ;;#ASMEND
5891 ; GFX940-NEXT: ;;#ASMSTART
5892 ; GFX940-NEXT: ; def s[2:3]
5893 ; GFX940-NEXT: ;;#ASMEND
5894 ; GFX940-NEXT: s_lshr_b32 s1, s2, 16
5895 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
5896 ; GFX940-NEXT: s_mov_b32 s9, s0
5897 ; GFX940-NEXT: ;;#ASMSTART
5898 ; GFX940-NEXT: ; use s[8:9]
5899 ; GFX940-NEXT: ;;#ASMEND
5900 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5901 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5902 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5903 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5904 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5905 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 0, i32 0>
5906 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5907 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5911 define void @s_shuffle_v3bf16_v3bf16__5_0_0() {
5912 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_0_0:
5914 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5915 ; GFX900-NEXT: ;;#ASMSTART
5916 ; GFX900-NEXT: ; def s[4:5]
5917 ; GFX900-NEXT: ;;#ASMEND
5918 ; GFX900-NEXT: ;;#ASMSTART
5919 ; GFX900-NEXT: ; def s[6:7]
5920 ; GFX900-NEXT: ;;#ASMEND
5921 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
5922 ; GFX900-NEXT: s_mov_b32 s9, s4
5923 ; GFX900-NEXT: ;;#ASMSTART
5924 ; GFX900-NEXT: ; use s[8:9]
5925 ; GFX900-NEXT: ;;#ASMEND
5926 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5928 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_0_0:
5930 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5931 ; GFX90A-NEXT: ;;#ASMSTART
5932 ; GFX90A-NEXT: ; def s[4:5]
5933 ; GFX90A-NEXT: ;;#ASMEND
5934 ; GFX90A-NEXT: ;;#ASMSTART
5935 ; GFX90A-NEXT: ; def s[6:7]
5936 ; GFX90A-NEXT: ;;#ASMEND
5937 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
5938 ; GFX90A-NEXT: s_mov_b32 s9, s4
5939 ; GFX90A-NEXT: ;;#ASMSTART
5940 ; GFX90A-NEXT: ; use s[8:9]
5941 ; GFX90A-NEXT: ;;#ASMEND
5942 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5944 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_0:
5946 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5947 ; GFX940-NEXT: ;;#ASMSTART
5948 ; GFX940-NEXT: ; def s[0:1]
5949 ; GFX940-NEXT: ;;#ASMEND
5950 ; GFX940-NEXT: ;;#ASMSTART
5951 ; GFX940-NEXT: ; def s[2:3]
5952 ; GFX940-NEXT: ;;#ASMEND
5953 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
5954 ; GFX940-NEXT: s_mov_b32 s9, s0
5955 ; GFX940-NEXT: ;;#ASMSTART
5956 ; GFX940-NEXT: ; use s[8:9]
5957 ; GFX940-NEXT: ;;#ASMEND
5958 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5959 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
5960 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
5961 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5962 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
5963 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 0>
5964 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5965 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
5969 define void @s_shuffle_v3bf16_v3bf16__5_u_0() {
5970 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_u_0:
5972 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5973 ; GFX900-NEXT: ;;#ASMSTART
5974 ; GFX900-NEXT: ; def s[4:5]
5975 ; GFX900-NEXT: ;;#ASMEND
5976 ; GFX900-NEXT: ;;#ASMSTART
5977 ; GFX900-NEXT: ; def s[6:7]
5978 ; GFX900-NEXT: ;;#ASMEND
5979 ; GFX900-NEXT: s_mov_b32 s8, s7
5980 ; GFX900-NEXT: s_mov_b32 s9, s4
5981 ; GFX900-NEXT: ;;#ASMSTART
5982 ; GFX900-NEXT: ; use s[8:9]
5983 ; GFX900-NEXT: ;;#ASMEND
5984 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5986 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_u_0:
5988 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5989 ; GFX90A-NEXT: ;;#ASMSTART
5990 ; GFX90A-NEXT: ; def s[4:5]
5991 ; GFX90A-NEXT: ;;#ASMEND
5992 ; GFX90A-NEXT: ;;#ASMSTART
5993 ; GFX90A-NEXT: ; def s[6:7]
5994 ; GFX90A-NEXT: ;;#ASMEND
5995 ; GFX90A-NEXT: s_mov_b32 s8, s7
5996 ; GFX90A-NEXT: s_mov_b32 s9, s4
5997 ; GFX90A-NEXT: ;;#ASMSTART
5998 ; GFX90A-NEXT: ; use s[8:9]
5999 ; GFX90A-NEXT: ;;#ASMEND
6000 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6002 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_0:
6004 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6005 ; GFX940-NEXT: ;;#ASMSTART
6006 ; GFX940-NEXT: ; def s[0:1]
6007 ; GFX940-NEXT: ;;#ASMEND
6008 ; GFX940-NEXT: ;;#ASMSTART
6009 ; GFX940-NEXT: ; def s[2:3]
6010 ; GFX940-NEXT: ;;#ASMEND
6011 ; GFX940-NEXT: s_mov_b32 s8, s3
6012 ; GFX940-NEXT: s_mov_b32 s9, s0
6013 ; GFX940-NEXT: ;;#ASMSTART
6014 ; GFX940-NEXT: ; use s[8:9]
6015 ; GFX940-NEXT: ;;#ASMEND
6016 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6017 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6018 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6019 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6020 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6021 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 0>
6022 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6023 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6027 define void @s_shuffle_v3bf16_v3bf16__5_1_0() {
6028 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_1_0:
6030 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6031 ; GFX900-NEXT: ;;#ASMSTART
6032 ; GFX900-NEXT: ; def s[4:5]
6033 ; GFX900-NEXT: ;;#ASMEND
6034 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
6035 ; GFX900-NEXT: ;;#ASMSTART
6036 ; GFX900-NEXT: ; def s[6:7]
6037 ; GFX900-NEXT: ;;#ASMEND
6038 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6039 ; GFX900-NEXT: s_mov_b32 s9, s4
6040 ; GFX900-NEXT: ;;#ASMSTART
6041 ; GFX900-NEXT: ; use s[8:9]
6042 ; GFX900-NEXT: ;;#ASMEND
6043 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6045 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_1_0:
6047 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6048 ; GFX90A-NEXT: ;;#ASMSTART
6049 ; GFX90A-NEXT: ; def s[4:5]
6050 ; GFX90A-NEXT: ;;#ASMEND
6051 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
6052 ; GFX90A-NEXT: ;;#ASMSTART
6053 ; GFX90A-NEXT: ; def s[6:7]
6054 ; GFX90A-NEXT: ;;#ASMEND
6055 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6056 ; GFX90A-NEXT: s_mov_b32 s9, s4
6057 ; GFX90A-NEXT: ;;#ASMSTART
6058 ; GFX90A-NEXT: ; use s[8:9]
6059 ; GFX90A-NEXT: ;;#ASMEND
6060 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6062 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_0:
6064 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6065 ; GFX940-NEXT: ;;#ASMSTART
6066 ; GFX940-NEXT: ; def s[0:1]
6067 ; GFX940-NEXT: ;;#ASMEND
6068 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
6069 ; GFX940-NEXT: ;;#ASMSTART
6070 ; GFX940-NEXT: ; def s[2:3]
6071 ; GFX940-NEXT: ;;#ASMEND
6072 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
6073 ; GFX940-NEXT: s_mov_b32 s9, s0
6074 ; GFX940-NEXT: ;;#ASMSTART
6075 ; GFX940-NEXT: ; use s[8:9]
6076 ; GFX940-NEXT: ;;#ASMEND
6077 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6078 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6079 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6080 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6081 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6082 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 0>
6083 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6084 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6088 define void @s_shuffle_v3bf16_v3bf16__5_2_0() {
6089 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_2_0:
6091 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6092 ; GFX900-NEXT: ;;#ASMSTART
6093 ; GFX900-NEXT: ; def s[4:5]
6094 ; GFX900-NEXT: ;;#ASMEND
6095 ; GFX900-NEXT: ;;#ASMSTART
6096 ; GFX900-NEXT: ; def s[6:7]
6097 ; GFX900-NEXT: ;;#ASMEND
6098 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6099 ; GFX900-NEXT: s_mov_b32 s9, s4
6100 ; GFX900-NEXT: ;;#ASMSTART
6101 ; GFX900-NEXT: ; use s[8:9]
6102 ; GFX900-NEXT: ;;#ASMEND
6103 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6105 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_2_0:
6107 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6108 ; GFX90A-NEXT: ;;#ASMSTART
6109 ; GFX90A-NEXT: ; def s[4:5]
6110 ; GFX90A-NEXT: ;;#ASMEND
6111 ; GFX90A-NEXT: ;;#ASMSTART
6112 ; GFX90A-NEXT: ; def s[6:7]
6113 ; GFX90A-NEXT: ;;#ASMEND
6114 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6115 ; GFX90A-NEXT: s_mov_b32 s9, s4
6116 ; GFX90A-NEXT: ;;#ASMSTART
6117 ; GFX90A-NEXT: ; use s[8:9]
6118 ; GFX90A-NEXT: ;;#ASMEND
6119 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6121 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_0:
6123 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6124 ; GFX940-NEXT: ;;#ASMSTART
6125 ; GFX940-NEXT: ; def s[0:1]
6126 ; GFX940-NEXT: ;;#ASMEND
6127 ; GFX940-NEXT: ;;#ASMSTART
6128 ; GFX940-NEXT: ; def s[2:3]
6129 ; GFX940-NEXT: ;;#ASMEND
6130 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
6131 ; GFX940-NEXT: s_mov_b32 s9, s0
6132 ; GFX940-NEXT: ;;#ASMSTART
6133 ; GFX940-NEXT: ; use s[8:9]
6134 ; GFX940-NEXT: ;;#ASMEND
6135 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6136 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6137 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6138 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6139 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6140 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 0>
6141 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6142 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6146 define void @s_shuffle_v3bf16_v3bf16__5_3_0() {
6147 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_3_0:
6149 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6150 ; GFX900-NEXT: ;;#ASMSTART
6151 ; GFX900-NEXT: ; def s[4:5]
6152 ; GFX900-NEXT: ;;#ASMEND
6153 ; GFX900-NEXT: ;;#ASMSTART
6154 ; GFX900-NEXT: ; def s[6:7]
6155 ; GFX900-NEXT: ;;#ASMEND
6156 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6
6157 ; GFX900-NEXT: s_mov_b32 s9, s4
6158 ; GFX900-NEXT: ;;#ASMSTART
6159 ; GFX900-NEXT: ; use s[8:9]
6160 ; GFX900-NEXT: ;;#ASMEND
6161 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6163 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_3_0:
6165 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6166 ; GFX90A-NEXT: ;;#ASMSTART
6167 ; GFX90A-NEXT: ; def s[4:5]
6168 ; GFX90A-NEXT: ;;#ASMEND
6169 ; GFX90A-NEXT: ;;#ASMSTART
6170 ; GFX90A-NEXT: ; def s[6:7]
6171 ; GFX90A-NEXT: ;;#ASMEND
6172 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6
6173 ; GFX90A-NEXT: s_mov_b32 s9, s4
6174 ; GFX90A-NEXT: ;;#ASMSTART
6175 ; GFX90A-NEXT: ; use s[8:9]
6176 ; GFX90A-NEXT: ;;#ASMEND
6177 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6179 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_0:
6181 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6182 ; GFX940-NEXT: ;;#ASMSTART
6183 ; GFX940-NEXT: ; def s[0:1]
6184 ; GFX940-NEXT: ;;#ASMEND
6185 ; GFX940-NEXT: ;;#ASMSTART
6186 ; GFX940-NEXT: ; def s[2:3]
6187 ; GFX940-NEXT: ;;#ASMEND
6188 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2
6189 ; GFX940-NEXT: s_mov_b32 s9, s0
6190 ; GFX940-NEXT: ;;#ASMSTART
6191 ; GFX940-NEXT: ; use s[8:9]
6192 ; GFX940-NEXT: ;;#ASMEND
6193 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6194 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6195 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6196 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6197 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6198 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 0>
6199 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6200 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6204 define void @s_shuffle_v3bf16_v3bf16__5_4_0() {
6205 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_4_0:
6207 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6208 ; GFX900-NEXT: ;;#ASMSTART
6209 ; GFX900-NEXT: ; def s[4:5]
6210 ; GFX900-NEXT: ;;#ASMEND
6211 ; GFX900-NEXT: ;;#ASMSTART
6212 ; GFX900-NEXT: ; def s[6:7]
6213 ; GFX900-NEXT: ;;#ASMEND
6214 ; GFX900-NEXT: s_lshr_b32 s5, s6, 16
6215 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6216 ; GFX900-NEXT: s_mov_b32 s9, s4
6217 ; GFX900-NEXT: ;;#ASMSTART
6218 ; GFX900-NEXT: ; use s[8:9]
6219 ; GFX900-NEXT: ;;#ASMEND
6220 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6222 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_4_0:
6224 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6225 ; GFX90A-NEXT: ;;#ASMSTART
6226 ; GFX90A-NEXT: ; def s[4:5]
6227 ; GFX90A-NEXT: ;;#ASMEND
6228 ; GFX90A-NEXT: ;;#ASMSTART
6229 ; GFX90A-NEXT: ; def s[6:7]
6230 ; GFX90A-NEXT: ;;#ASMEND
6231 ; GFX90A-NEXT: s_lshr_b32 s5, s6, 16
6232 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6233 ; GFX90A-NEXT: s_mov_b32 s9, s4
6234 ; GFX90A-NEXT: ;;#ASMSTART
6235 ; GFX90A-NEXT: ; use s[8:9]
6236 ; GFX90A-NEXT: ;;#ASMEND
6237 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6239 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_0:
6241 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6242 ; GFX940-NEXT: ;;#ASMSTART
6243 ; GFX940-NEXT: ; def s[0:1]
6244 ; GFX940-NEXT: ;;#ASMEND
6245 ; GFX940-NEXT: ;;#ASMSTART
6246 ; GFX940-NEXT: ; def s[2:3]
6247 ; GFX940-NEXT: ;;#ASMEND
6248 ; GFX940-NEXT: s_lshr_b32 s1, s2, 16
6249 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
6250 ; GFX940-NEXT: s_mov_b32 s9, s0
6251 ; GFX940-NEXT: ;;#ASMSTART
6252 ; GFX940-NEXT: ; use s[8:9]
6253 ; GFX940-NEXT: ;;#ASMEND
6254 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6255 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6256 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6257 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6258 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6259 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 0>
6260 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6261 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6265 define void @s_shuffle_v3bf16_v3bf16__u_1_1() {
6266 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__u_1_1:
6268 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6269 ; GFX9-NEXT: ;;#ASMSTART
6270 ; GFX9-NEXT: ; def s[8:9]
6271 ; GFX9-NEXT: ;;#ASMEND
6272 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
6273 ; GFX9-NEXT: ;;#ASMSTART
6274 ; GFX9-NEXT: ; use s[8:9]
6275 ; GFX9-NEXT: ;;#ASMEND
6276 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6277 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6278 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6279 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 1, i32 1>
6280 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6281 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6285 define void @s_shuffle_v3bf16_v3bf16__0_1_1() {
6286 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__0_1_1:
6288 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6289 ; GFX9-NEXT: ;;#ASMSTART
6290 ; GFX9-NEXT: ; def s[8:9]
6291 ; GFX9-NEXT: ;;#ASMEND
6292 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
6293 ; GFX9-NEXT: ;;#ASMSTART
6294 ; GFX9-NEXT: ; use s[8:9]
6295 ; GFX9-NEXT: ;;#ASMEND
6296 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6297 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6298 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6299 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 1>
6300 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6301 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6305 define void @s_shuffle_v3bf16_v3bf16__1_1_1() {
6306 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__1_1_1:
6308 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6309 ; GFX900-NEXT: ;;#ASMSTART
6310 ; GFX900-NEXT: ; def s[4:5]
6311 ; GFX900-NEXT: ;;#ASMEND
6312 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6313 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
6314 ; GFX900-NEXT: ;;#ASMSTART
6315 ; GFX900-NEXT: ; use s[8:9]
6316 ; GFX900-NEXT: ;;#ASMEND
6317 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6319 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__1_1_1:
6321 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6322 ; GFX90A-NEXT: ;;#ASMSTART
6323 ; GFX90A-NEXT: ; def s[4:5]
6324 ; GFX90A-NEXT: ;;#ASMEND
6325 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6326 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
6327 ; GFX90A-NEXT: ;;#ASMSTART
6328 ; GFX90A-NEXT: ; use s[8:9]
6329 ; GFX90A-NEXT: ;;#ASMEND
6330 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6332 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_1_1:
6334 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6335 ; GFX940-NEXT: ;;#ASMSTART
6336 ; GFX940-NEXT: ; def s[0:1]
6337 ; GFX940-NEXT: ;;#ASMEND
6338 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6339 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
6340 ; GFX940-NEXT: ;;#ASMSTART
6341 ; GFX940-NEXT: ; use s[8:9]
6342 ; GFX940-NEXT: ;;#ASMEND
6343 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6344 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6345 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6346 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
6347 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6348 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6352 define void @s_shuffle_v3bf16_v3bf16__2_1_1() {
6353 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_1_1:
6355 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6356 ; GFX900-NEXT: ;;#ASMSTART
6357 ; GFX900-NEXT: ; def s[4:5]
6358 ; GFX900-NEXT: ;;#ASMEND
6359 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6360 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
6361 ; GFX900-NEXT: ;;#ASMSTART
6362 ; GFX900-NEXT: ; use s[8:9]
6363 ; GFX900-NEXT: ;;#ASMEND
6364 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6366 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_1_1:
6368 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6369 ; GFX90A-NEXT: ;;#ASMSTART
6370 ; GFX90A-NEXT: ; def s[4:5]
6371 ; GFX90A-NEXT: ;;#ASMEND
6372 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6373 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
6374 ; GFX90A-NEXT: ;;#ASMSTART
6375 ; GFX90A-NEXT: ; use s[8:9]
6376 ; GFX90A-NEXT: ;;#ASMEND
6377 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6379 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_1_1:
6381 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6382 ; GFX940-NEXT: ;;#ASMSTART
6383 ; GFX940-NEXT: ; def s[0:1]
6384 ; GFX940-NEXT: ;;#ASMEND
6385 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6386 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
6387 ; GFX940-NEXT: ;;#ASMSTART
6388 ; GFX940-NEXT: ; use s[8:9]
6389 ; GFX940-NEXT: ;;#ASMEND
6390 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6391 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6392 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6393 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 1, i32 1>
6394 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6395 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6399 define void @s_shuffle_v3bf16_v3bf16__3_1_1() {
6400 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__3_1_1:
6402 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6403 ; GFX9-NEXT: ;;#ASMSTART
6404 ; GFX9-NEXT: ; def s[8:9]
6405 ; GFX9-NEXT: ;;#ASMEND
6406 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
6407 ; GFX9-NEXT: ;;#ASMSTART
6408 ; GFX9-NEXT: ; use s[8:9]
6409 ; GFX9-NEXT: ;;#ASMEND
6410 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6411 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6412 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6413 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 1, i32 1>
6414 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6415 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6419 define void @s_shuffle_v3bf16_v3bf16__4_1_1() {
6420 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__4_1_1:
6422 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6423 ; GFX900-NEXT: ;;#ASMSTART
6424 ; GFX900-NEXT: ; def s[4:5]
6425 ; GFX900-NEXT: ;;#ASMEND
6426 ; GFX900-NEXT: ;;#ASMSTART
6427 ; GFX900-NEXT: ; def s[6:7]
6428 ; GFX900-NEXT: ;;#ASMEND
6429 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6430 ; GFX900-NEXT: s_lshr_b32 s4, s6, 16
6431 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
6432 ; GFX900-NEXT: ;;#ASMSTART
6433 ; GFX900-NEXT: ; use s[8:9]
6434 ; GFX900-NEXT: ;;#ASMEND
6435 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6437 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__4_1_1:
6439 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6440 ; GFX90A-NEXT: ;;#ASMSTART
6441 ; GFX90A-NEXT: ; def s[4:5]
6442 ; GFX90A-NEXT: ;;#ASMEND
6443 ; GFX90A-NEXT: ;;#ASMSTART
6444 ; GFX90A-NEXT: ; def s[6:7]
6445 ; GFX90A-NEXT: ;;#ASMEND
6446 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6447 ; GFX90A-NEXT: s_lshr_b32 s4, s6, 16
6448 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
6449 ; GFX90A-NEXT: ;;#ASMSTART
6450 ; GFX90A-NEXT: ; use s[8:9]
6451 ; GFX90A-NEXT: ;;#ASMEND
6452 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6454 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_1_1:
6456 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6457 ; GFX940-NEXT: ;;#ASMSTART
6458 ; GFX940-NEXT: ; def s[0:1]
6459 ; GFX940-NEXT: ;;#ASMEND
6460 ; GFX940-NEXT: ;;#ASMSTART
6461 ; GFX940-NEXT: ; def s[2:3]
6462 ; GFX940-NEXT: ;;#ASMEND
6463 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6464 ; GFX940-NEXT: s_lshr_b32 s0, s2, 16
6465 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
6466 ; GFX940-NEXT: ;;#ASMSTART
6467 ; GFX940-NEXT: ; use s[8:9]
6468 ; GFX940-NEXT: ;;#ASMEND
6469 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6470 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6471 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6472 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6473 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6474 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 1, i32 1>
6475 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6476 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6480 define void @s_shuffle_v3bf16_v3bf16__5_1_1() {
6481 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_1_1:
6483 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6484 ; GFX900-NEXT: ;;#ASMSTART
6485 ; GFX900-NEXT: ; def s[4:5]
6486 ; GFX900-NEXT: ;;#ASMEND
6487 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6488 ; GFX900-NEXT: ;;#ASMSTART
6489 ; GFX900-NEXT: ; def s[6:7]
6490 ; GFX900-NEXT: ;;#ASMEND
6491 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s9
6492 ; GFX900-NEXT: ;;#ASMSTART
6493 ; GFX900-NEXT: ; use s[8:9]
6494 ; GFX900-NEXT: ;;#ASMEND
6495 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6497 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_1_1:
6499 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6500 ; GFX90A-NEXT: ;;#ASMSTART
6501 ; GFX90A-NEXT: ; def s[4:5]
6502 ; GFX90A-NEXT: ;;#ASMEND
6503 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6504 ; GFX90A-NEXT: ;;#ASMSTART
6505 ; GFX90A-NEXT: ; def s[6:7]
6506 ; GFX90A-NEXT: ;;#ASMEND
6507 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s9
6508 ; GFX90A-NEXT: ;;#ASMSTART
6509 ; GFX90A-NEXT: ; use s[8:9]
6510 ; GFX90A-NEXT: ;;#ASMEND
6511 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6513 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_1:
6515 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6516 ; GFX940-NEXT: ;;#ASMSTART
6517 ; GFX940-NEXT: ; def s[0:1]
6518 ; GFX940-NEXT: ;;#ASMEND
6519 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6520 ; GFX940-NEXT: ;;#ASMSTART
6521 ; GFX940-NEXT: ; def s[2:3]
6522 ; GFX940-NEXT: ;;#ASMEND
6523 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9
6524 ; GFX940-NEXT: ;;#ASMSTART
6525 ; GFX940-NEXT: ; use s[8:9]
6526 ; GFX940-NEXT: ;;#ASMEND
6527 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6528 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6529 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6530 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6531 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6532 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 1>
6533 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6534 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6538 define void @s_shuffle_v3bf16_v3bf16__5_u_1() {
6539 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_u_1:
6541 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6542 ; GFX900-NEXT: ;;#ASMSTART
6543 ; GFX900-NEXT: ; def s[4:5]
6544 ; GFX900-NEXT: ;;#ASMEND
6545 ; GFX900-NEXT: ;;#ASMSTART
6546 ; GFX900-NEXT: ; def s[6:7]
6547 ; GFX900-NEXT: ;;#ASMEND
6548 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6549 ; GFX900-NEXT: s_mov_b32 s8, s7
6550 ; GFX900-NEXT: ;;#ASMSTART
6551 ; GFX900-NEXT: ; use s[8:9]
6552 ; GFX900-NEXT: ;;#ASMEND
6553 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6555 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_u_1:
6557 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6558 ; GFX90A-NEXT: ;;#ASMSTART
6559 ; GFX90A-NEXT: ; def s[4:5]
6560 ; GFX90A-NEXT: ;;#ASMEND
6561 ; GFX90A-NEXT: ;;#ASMSTART
6562 ; GFX90A-NEXT: ; def s[6:7]
6563 ; GFX90A-NEXT: ;;#ASMEND
6564 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6565 ; GFX90A-NEXT: s_mov_b32 s8, s7
6566 ; GFX90A-NEXT: ;;#ASMSTART
6567 ; GFX90A-NEXT: ; use s[8:9]
6568 ; GFX90A-NEXT: ;;#ASMEND
6569 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6571 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_1:
6573 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6574 ; GFX940-NEXT: ;;#ASMSTART
6575 ; GFX940-NEXT: ; def s[0:1]
6576 ; GFX940-NEXT: ;;#ASMEND
6577 ; GFX940-NEXT: ;;#ASMSTART
6578 ; GFX940-NEXT: ; def s[2:3]
6579 ; GFX940-NEXT: ;;#ASMEND
6580 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6581 ; GFX940-NEXT: s_mov_b32 s8, s3
6582 ; GFX940-NEXT: ;;#ASMSTART
6583 ; GFX940-NEXT: ; use s[8:9]
6584 ; GFX940-NEXT: ;;#ASMEND
6585 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6586 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6587 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6588 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6589 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6590 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 1>
6591 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6592 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6596 define void @s_shuffle_v3bf16_v3bf16__5_0_1() {
6597 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_0_1:
6599 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6600 ; GFX900-NEXT: ;;#ASMSTART
6601 ; GFX900-NEXT: ; def s[4:5]
6602 ; GFX900-NEXT: ;;#ASMEND
6603 ; GFX900-NEXT: ;;#ASMSTART
6604 ; GFX900-NEXT: ; def s[6:7]
6605 ; GFX900-NEXT: ;;#ASMEND
6606 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
6607 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6608 ; GFX900-NEXT: ;;#ASMSTART
6609 ; GFX900-NEXT: ; use s[8:9]
6610 ; GFX900-NEXT: ;;#ASMEND
6611 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6613 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_0_1:
6615 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6616 ; GFX90A-NEXT: ;;#ASMSTART
6617 ; GFX90A-NEXT: ; def s[4:5]
6618 ; GFX90A-NEXT: ;;#ASMEND
6619 ; GFX90A-NEXT: ;;#ASMSTART
6620 ; GFX90A-NEXT: ; def s[6:7]
6621 ; GFX90A-NEXT: ;;#ASMEND
6622 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
6623 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6624 ; GFX90A-NEXT: ;;#ASMSTART
6625 ; GFX90A-NEXT: ; use s[8:9]
6626 ; GFX90A-NEXT: ;;#ASMEND
6627 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6629 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_1:
6631 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6632 ; GFX940-NEXT: ;;#ASMSTART
6633 ; GFX940-NEXT: ; def s[0:1]
6634 ; GFX940-NEXT: ;;#ASMEND
6635 ; GFX940-NEXT: ;;#ASMSTART
6636 ; GFX940-NEXT: ; def s[2:3]
6637 ; GFX940-NEXT: ;;#ASMEND
6638 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
6639 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6640 ; GFX940-NEXT: ;;#ASMSTART
6641 ; GFX940-NEXT: ; use s[8:9]
6642 ; GFX940-NEXT: ;;#ASMEND
6643 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6644 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6645 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6646 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6647 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6648 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 1>
6649 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6650 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6654 define void @s_shuffle_v3bf16_v3bf16__5_2_1() {
6655 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_2_1:
6657 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6658 ; GFX900-NEXT: ;;#ASMSTART
6659 ; GFX900-NEXT: ; def s[4:5]
6660 ; GFX900-NEXT: ;;#ASMEND
6661 ; GFX900-NEXT: ;;#ASMSTART
6662 ; GFX900-NEXT: ; def s[6:7]
6663 ; GFX900-NEXT: ;;#ASMEND
6664 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6665 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6666 ; GFX900-NEXT: ;;#ASMSTART
6667 ; GFX900-NEXT: ; use s[8:9]
6668 ; GFX900-NEXT: ;;#ASMEND
6669 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6671 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_2_1:
6673 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6674 ; GFX90A-NEXT: ;;#ASMSTART
6675 ; GFX90A-NEXT: ; def s[4:5]
6676 ; GFX90A-NEXT: ;;#ASMEND
6677 ; GFX90A-NEXT: ;;#ASMSTART
6678 ; GFX90A-NEXT: ; def s[6:7]
6679 ; GFX90A-NEXT: ;;#ASMEND
6680 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6681 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6682 ; GFX90A-NEXT: ;;#ASMSTART
6683 ; GFX90A-NEXT: ; use s[8:9]
6684 ; GFX90A-NEXT: ;;#ASMEND
6685 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6687 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_1:
6689 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6690 ; GFX940-NEXT: ;;#ASMSTART
6691 ; GFX940-NEXT: ; def s[0:1]
6692 ; GFX940-NEXT: ;;#ASMEND
6693 ; GFX940-NEXT: ;;#ASMSTART
6694 ; GFX940-NEXT: ; def s[2:3]
6695 ; GFX940-NEXT: ;;#ASMEND
6696 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
6697 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6698 ; GFX940-NEXT: ;;#ASMSTART
6699 ; GFX940-NEXT: ; use s[8:9]
6700 ; GFX940-NEXT: ;;#ASMEND
6701 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6702 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6703 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6704 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6705 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6706 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 1>
6707 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6708 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6712 define void @s_shuffle_v3bf16_v3bf16__5_3_1() {
6713 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_3_1:
6715 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6716 ; GFX900-NEXT: ;;#ASMSTART
6717 ; GFX900-NEXT: ; def s[4:5]
6718 ; GFX900-NEXT: ;;#ASMEND
6719 ; GFX900-NEXT: ;;#ASMSTART
6720 ; GFX900-NEXT: ; def s[6:7]
6721 ; GFX900-NEXT: ;;#ASMEND
6722 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6
6723 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6724 ; GFX900-NEXT: ;;#ASMSTART
6725 ; GFX900-NEXT: ; use s[8:9]
6726 ; GFX900-NEXT: ;;#ASMEND
6727 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6729 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_3_1:
6731 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6732 ; GFX90A-NEXT: ;;#ASMSTART
6733 ; GFX90A-NEXT: ; def s[4:5]
6734 ; GFX90A-NEXT: ;;#ASMEND
6735 ; GFX90A-NEXT: ;;#ASMSTART
6736 ; GFX90A-NEXT: ; def s[6:7]
6737 ; GFX90A-NEXT: ;;#ASMEND
6738 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6
6739 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6740 ; GFX90A-NEXT: ;;#ASMSTART
6741 ; GFX90A-NEXT: ; use s[8:9]
6742 ; GFX90A-NEXT: ;;#ASMEND
6743 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6745 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_1:
6747 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6748 ; GFX940-NEXT: ;;#ASMSTART
6749 ; GFX940-NEXT: ; def s[0:1]
6750 ; GFX940-NEXT: ;;#ASMEND
6751 ; GFX940-NEXT: ;;#ASMSTART
6752 ; GFX940-NEXT: ; def s[2:3]
6753 ; GFX940-NEXT: ;;#ASMEND
6754 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2
6755 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6756 ; GFX940-NEXT: ;;#ASMSTART
6757 ; GFX940-NEXT: ; use s[8:9]
6758 ; GFX940-NEXT: ;;#ASMEND
6759 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6760 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6761 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6762 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6763 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6764 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 1>
6765 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6766 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6770 define void @s_shuffle_v3bf16_v3bf16__5_4_1() {
6771 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_4_1:
6773 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6774 ; GFX900-NEXT: ;;#ASMSTART
6775 ; GFX900-NEXT: ; def s[4:5]
6776 ; GFX900-NEXT: ;;#ASMEND
6777 ; GFX900-NEXT: ;;#ASMSTART
6778 ; GFX900-NEXT: ; def s[6:7]
6779 ; GFX900-NEXT: ;;#ASMEND
6780 ; GFX900-NEXT: s_lshr_b32 s5, s6, 16
6781 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6782 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
6783 ; GFX900-NEXT: ;;#ASMSTART
6784 ; GFX900-NEXT: ; use s[8:9]
6785 ; GFX900-NEXT: ;;#ASMEND
6786 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6788 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_4_1:
6790 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6791 ; GFX90A-NEXT: ;;#ASMSTART
6792 ; GFX90A-NEXT: ; def s[4:5]
6793 ; GFX90A-NEXT: ;;#ASMEND
6794 ; GFX90A-NEXT: ;;#ASMSTART
6795 ; GFX90A-NEXT: ; def s[6:7]
6796 ; GFX90A-NEXT: ;;#ASMEND
6797 ; GFX90A-NEXT: s_lshr_b32 s5, s6, 16
6798 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
6799 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
6800 ; GFX90A-NEXT: ;;#ASMSTART
6801 ; GFX90A-NEXT: ; use s[8:9]
6802 ; GFX90A-NEXT: ;;#ASMEND
6803 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6805 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_1:
6807 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6808 ; GFX940-NEXT: ;;#ASMSTART
6809 ; GFX940-NEXT: ; def s[0:1]
6810 ; GFX940-NEXT: ;;#ASMEND
6811 ; GFX940-NEXT: ;;#ASMSTART
6812 ; GFX940-NEXT: ; def s[2:3]
6813 ; GFX940-NEXT: ;;#ASMEND
6814 ; GFX940-NEXT: s_lshr_b32 s1, s2, 16
6815 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
6816 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
6817 ; GFX940-NEXT: ;;#ASMSTART
6818 ; GFX940-NEXT: ; use s[8:9]
6819 ; GFX940-NEXT: ;;#ASMEND
6820 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6821 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6822 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
6823 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6824 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6825 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 1>
6826 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6827 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6831 define void @s_shuffle_v3bf16_v3bf16__u_2_2() {
6832 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__u_2_2:
6834 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6835 ; GFX9-NEXT: ;;#ASMSTART
6836 ; GFX9-NEXT: ; def s[8:9]
6837 ; GFX9-NEXT: ;;#ASMEND
6838 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
6839 ; GFX9-NEXT: ;;#ASMSTART
6840 ; GFX9-NEXT: ; use s[8:9]
6841 ; GFX9-NEXT: ;;#ASMEND
6842 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6843 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6844 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6845 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 2, i32 2>
6846 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6847 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6851 define void @s_shuffle_v3bf16_v3bf16__0_2_2() {
6852 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__0_2_2:
6854 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6855 ; GFX9-NEXT: ;;#ASMSTART
6856 ; GFX9-NEXT: ; def s[8:9]
6857 ; GFX9-NEXT: ;;#ASMEND
6858 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9
6859 ; GFX9-NEXT: ;;#ASMSTART
6860 ; GFX9-NEXT: ; use s[8:9]
6861 ; GFX9-NEXT: ;;#ASMEND
6862 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6863 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6864 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6865 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 2, i32 2>
6866 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6867 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6871 define void @s_shuffle_v3bf16_v3bf16__1_2_2() {
6872 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__1_2_2:
6874 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6875 ; GFX900-NEXT: ;;#ASMSTART
6876 ; GFX900-NEXT: ; def s[8:9]
6877 ; GFX900-NEXT: ;;#ASMEND
6878 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
6879 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
6880 ; GFX900-NEXT: ;;#ASMSTART
6881 ; GFX900-NEXT: ; use s[8:9]
6882 ; GFX900-NEXT: ;;#ASMEND
6883 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6885 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__1_2_2:
6887 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6888 ; GFX90A-NEXT: ;;#ASMSTART
6889 ; GFX90A-NEXT: ; def s[8:9]
6890 ; GFX90A-NEXT: ;;#ASMEND
6891 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
6892 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
6893 ; GFX90A-NEXT: ;;#ASMSTART
6894 ; GFX90A-NEXT: ; use s[8:9]
6895 ; GFX90A-NEXT: ;;#ASMEND
6896 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6898 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_2_2:
6900 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6901 ; GFX940-NEXT: ;;#ASMSTART
6902 ; GFX940-NEXT: ; def s[8:9]
6903 ; GFX940-NEXT: ;;#ASMEND
6904 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
6905 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
6906 ; GFX940-NEXT: ;;#ASMSTART
6907 ; GFX940-NEXT: ; use s[8:9]
6908 ; GFX940-NEXT: ;;#ASMEND
6909 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6910 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6911 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6912 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 2>
6913 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6914 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6918 define void @s_shuffle_v3bf16_v3bf16__2_2_2() {
6919 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__2_2_2:
6921 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6922 ; GFX9-NEXT: ;;#ASMSTART
6923 ; GFX9-NEXT: ; def s[8:9]
6924 ; GFX9-NEXT: ;;#ASMEND
6925 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9
6926 ; GFX9-NEXT: ;;#ASMSTART
6927 ; GFX9-NEXT: ; use s[8:9]
6928 ; GFX9-NEXT: ;;#ASMEND
6929 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6930 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6931 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6932 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
6933 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6934 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6938 define void @s_shuffle_v3bf16_v3bf16__3_2_2() {
6939 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__3_2_2:
6941 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6942 ; GFX9-NEXT: ;;#ASMSTART
6943 ; GFX9-NEXT: ; def s[8:9]
6944 ; GFX9-NEXT: ;;#ASMEND
6945 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
6946 ; GFX9-NEXT: ;;#ASMSTART
6947 ; GFX9-NEXT: ; use s[8:9]
6948 ; GFX9-NEXT: ;;#ASMEND
6949 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6950 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
6951 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
6952 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 2, i32 2>
6953 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6954 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
6958 define void @s_shuffle_v3bf16_v3bf16__4_2_2() {
6959 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__4_2_2:
6961 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6962 ; GFX900-NEXT: ;;#ASMSTART
6963 ; GFX900-NEXT: ; def s[4:5]
6964 ; GFX900-NEXT: ;;#ASMEND
6965 ; GFX900-NEXT: ;;#ASMSTART
6966 ; GFX900-NEXT: ; def s[8:9]
6967 ; GFX900-NEXT: ;;#ASMEND
6968 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
6969 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
6970 ; GFX900-NEXT: ;;#ASMSTART
6971 ; GFX900-NEXT: ; use s[8:9]
6972 ; GFX900-NEXT: ;;#ASMEND
6973 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6975 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__4_2_2:
6977 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6978 ; GFX90A-NEXT: ;;#ASMSTART
6979 ; GFX90A-NEXT: ; def s[4:5]
6980 ; GFX90A-NEXT: ;;#ASMEND
6981 ; GFX90A-NEXT: ;;#ASMSTART
6982 ; GFX90A-NEXT: ; def s[8:9]
6983 ; GFX90A-NEXT: ;;#ASMEND
6984 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
6985 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
6986 ; GFX90A-NEXT: ;;#ASMSTART
6987 ; GFX90A-NEXT: ; use s[8:9]
6988 ; GFX90A-NEXT: ;;#ASMEND
6989 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6991 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_2_2:
6993 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6994 ; GFX940-NEXT: ;;#ASMSTART
6995 ; GFX940-NEXT: ; def s[0:1]
6996 ; GFX940-NEXT: ;;#ASMEND
6997 ; GFX940-NEXT: ;;#ASMSTART
6998 ; GFX940-NEXT: ; def s[8:9]
6999 ; GFX940-NEXT: ;;#ASMEND
7000 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
7001 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
7002 ; GFX940-NEXT: ;;#ASMSTART
7003 ; GFX940-NEXT: ; use s[8:9]
7004 ; GFX940-NEXT: ;;#ASMEND
7005 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7006 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7007 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7008 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7009 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7010 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 2, i32 2>
7011 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7012 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7016 define void @s_shuffle_v3bf16_v3bf16__5_2_2() {
7017 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_2_2:
7019 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7020 ; GFX900-NEXT: ;;#ASMSTART
7021 ; GFX900-NEXT: ; def s[8:9]
7022 ; GFX900-NEXT: ;;#ASMEND
7023 ; GFX900-NEXT: ;;#ASMSTART
7024 ; GFX900-NEXT: ; def s[4:5]
7025 ; GFX900-NEXT: ;;#ASMEND
7026 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
7027 ; GFX900-NEXT: ;;#ASMSTART
7028 ; GFX900-NEXT: ; use s[8:9]
7029 ; GFX900-NEXT: ;;#ASMEND
7030 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7032 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_2_2:
7034 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7035 ; GFX90A-NEXT: ;;#ASMSTART
7036 ; GFX90A-NEXT: ; def s[8:9]
7037 ; GFX90A-NEXT: ;;#ASMEND
7038 ; GFX90A-NEXT: ;;#ASMSTART
7039 ; GFX90A-NEXT: ; def s[4:5]
7040 ; GFX90A-NEXT: ;;#ASMEND
7041 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
7042 ; GFX90A-NEXT: ;;#ASMSTART
7043 ; GFX90A-NEXT: ; use s[8:9]
7044 ; GFX90A-NEXT: ;;#ASMEND
7045 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7047 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_2:
7049 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7050 ; GFX940-NEXT: ;;#ASMSTART
7051 ; GFX940-NEXT: ; def s[8:9]
7052 ; GFX940-NEXT: ;;#ASMEND
7053 ; GFX940-NEXT: ;;#ASMSTART
7054 ; GFX940-NEXT: ; def s[0:1]
7055 ; GFX940-NEXT: ;;#ASMEND
7056 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
7057 ; GFX940-NEXT: ;;#ASMSTART
7058 ; GFX940-NEXT: ; use s[8:9]
7059 ; GFX940-NEXT: ;;#ASMEND
7060 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7061 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7062 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7063 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7064 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7065 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 2>
7066 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7067 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7071 define void @s_shuffle_v3bf16_v3bf16__5_u_2() {
7072 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_u_2:
7074 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7075 ; GFX900-NEXT: ;;#ASMSTART
7076 ; GFX900-NEXT: ; def s[8:9]
7077 ; GFX900-NEXT: ;;#ASMEND
7078 ; GFX900-NEXT: ;;#ASMSTART
7079 ; GFX900-NEXT: ; def s[4:5]
7080 ; GFX900-NEXT: ;;#ASMEND
7081 ; GFX900-NEXT: s_mov_b32 s8, s5
7082 ; GFX900-NEXT: ;;#ASMSTART
7083 ; GFX900-NEXT: ; use s[8:9]
7084 ; GFX900-NEXT: ;;#ASMEND
7085 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7087 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_u_2:
7089 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7090 ; GFX90A-NEXT: ;;#ASMSTART
7091 ; GFX90A-NEXT: ; def s[8:9]
7092 ; GFX90A-NEXT: ;;#ASMEND
7093 ; GFX90A-NEXT: ;;#ASMSTART
7094 ; GFX90A-NEXT: ; def s[4:5]
7095 ; GFX90A-NEXT: ;;#ASMEND
7096 ; GFX90A-NEXT: s_mov_b32 s8, s5
7097 ; GFX90A-NEXT: ;;#ASMSTART
7098 ; GFX90A-NEXT: ; use s[8:9]
7099 ; GFX90A-NEXT: ;;#ASMEND
7100 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7102 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_2:
7104 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7105 ; GFX940-NEXT: ;;#ASMSTART
7106 ; GFX940-NEXT: ; def s[8:9]
7107 ; GFX940-NEXT: ;;#ASMEND
7108 ; GFX940-NEXT: ;;#ASMSTART
7109 ; GFX940-NEXT: ; def s[0:1]
7110 ; GFX940-NEXT: ;;#ASMEND
7111 ; GFX940-NEXT: s_mov_b32 s8, s1
7112 ; GFX940-NEXT: ;;#ASMSTART
7113 ; GFX940-NEXT: ; use s[8:9]
7114 ; GFX940-NEXT: ;;#ASMEND
7115 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7116 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7117 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7118 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7119 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7120 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 2>
7121 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7122 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7126 define void @s_shuffle_v3bf16_v3bf16__5_0_2() {
7127 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_0_2:
7129 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7130 ; GFX900-NEXT: ;;#ASMSTART
7131 ; GFX900-NEXT: ; def s[8:9]
7132 ; GFX900-NEXT: ;;#ASMEND
7133 ; GFX900-NEXT: ;;#ASMSTART
7134 ; GFX900-NEXT: ; def s[4:5]
7135 ; GFX900-NEXT: ;;#ASMEND
7136 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s8
7137 ; GFX900-NEXT: ;;#ASMSTART
7138 ; GFX900-NEXT: ; use s[8:9]
7139 ; GFX900-NEXT: ;;#ASMEND
7140 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7142 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_0_2:
7144 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7145 ; GFX90A-NEXT: ;;#ASMSTART
7146 ; GFX90A-NEXT: ; def s[8:9]
7147 ; GFX90A-NEXT: ;;#ASMEND
7148 ; GFX90A-NEXT: ;;#ASMSTART
7149 ; GFX90A-NEXT: ; def s[4:5]
7150 ; GFX90A-NEXT: ;;#ASMEND
7151 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s8
7152 ; GFX90A-NEXT: ;;#ASMSTART
7153 ; GFX90A-NEXT: ; use s[8:9]
7154 ; GFX90A-NEXT: ;;#ASMEND
7155 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7157 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_2:
7159 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7160 ; GFX940-NEXT: ;;#ASMSTART
7161 ; GFX940-NEXT: ; def s[8:9]
7162 ; GFX940-NEXT: ;;#ASMEND
7163 ; GFX940-NEXT: ;;#ASMSTART
7164 ; GFX940-NEXT: ; def s[0:1]
7165 ; GFX940-NEXT: ;;#ASMEND
7166 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s8
7167 ; GFX940-NEXT: ;;#ASMSTART
7168 ; GFX940-NEXT: ; use s[8:9]
7169 ; GFX940-NEXT: ;;#ASMEND
7170 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7171 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7172 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7173 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7174 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7175 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 2>
7176 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7177 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7181 define void @s_shuffle_v3bf16_v3bf16__5_1_2() {
7182 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_1_2:
7184 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7185 ; GFX900-NEXT: ;;#ASMSTART
7186 ; GFX900-NEXT: ; def s[4:5]
7187 ; GFX900-NEXT: ;;#ASMEND
7188 ; GFX900-NEXT: ;;#ASMSTART
7189 ; GFX900-NEXT: ; def s[8:9]
7190 ; GFX900-NEXT: ;;#ASMEND
7191 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
7192 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7193 ; GFX900-NEXT: ;;#ASMSTART
7194 ; GFX900-NEXT: ; use s[8:9]
7195 ; GFX900-NEXT: ;;#ASMEND
7196 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7198 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_1_2:
7200 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7201 ; GFX90A-NEXT: ;;#ASMSTART
7202 ; GFX90A-NEXT: ; def s[4:5]
7203 ; GFX90A-NEXT: ;;#ASMEND
7204 ; GFX90A-NEXT: ;;#ASMSTART
7205 ; GFX90A-NEXT: ; def s[8:9]
7206 ; GFX90A-NEXT: ;;#ASMEND
7207 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
7208 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7209 ; GFX90A-NEXT: ;;#ASMSTART
7210 ; GFX90A-NEXT: ; use s[8:9]
7211 ; GFX90A-NEXT: ;;#ASMEND
7212 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7214 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_2:
7216 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7217 ; GFX940-NEXT: ;;#ASMSTART
7218 ; GFX940-NEXT: ; def s[0:1]
7219 ; GFX940-NEXT: ;;#ASMEND
7220 ; GFX940-NEXT: ;;#ASMSTART
7221 ; GFX940-NEXT: ; def s[8:9]
7222 ; GFX940-NEXT: ;;#ASMEND
7223 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
7224 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
7225 ; GFX940-NEXT: ;;#ASMSTART
7226 ; GFX940-NEXT: ; use s[8:9]
7227 ; GFX940-NEXT: ;;#ASMEND
7228 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7229 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7230 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7231 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7232 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7233 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 2>
7234 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7235 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7239 define void @s_shuffle_v3bf16_v3bf16__5_3_2() {
7240 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_3_2:
7242 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7243 ; GFX900-NEXT: ;;#ASMSTART
7244 ; GFX900-NEXT: ; def s[8:9]
7245 ; GFX900-NEXT: ;;#ASMEND
7246 ; GFX900-NEXT: ;;#ASMSTART
7247 ; GFX900-NEXT: ; def s[4:5]
7248 ; GFX900-NEXT: ;;#ASMEND
7249 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7250 ; GFX900-NEXT: ;;#ASMSTART
7251 ; GFX900-NEXT: ; use s[8:9]
7252 ; GFX900-NEXT: ;;#ASMEND
7253 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7255 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_3_2:
7257 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7258 ; GFX90A-NEXT: ;;#ASMSTART
7259 ; GFX90A-NEXT: ; def s[8:9]
7260 ; GFX90A-NEXT: ;;#ASMEND
7261 ; GFX90A-NEXT: ;;#ASMSTART
7262 ; GFX90A-NEXT: ; def s[4:5]
7263 ; GFX90A-NEXT: ;;#ASMEND
7264 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7265 ; GFX90A-NEXT: ;;#ASMSTART
7266 ; GFX90A-NEXT: ; use s[8:9]
7267 ; GFX90A-NEXT: ;;#ASMEND
7268 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7270 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_2:
7272 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7273 ; GFX940-NEXT: ;;#ASMSTART
7274 ; GFX940-NEXT: ; def s[8:9]
7275 ; GFX940-NEXT: ;;#ASMEND
7276 ; GFX940-NEXT: ;;#ASMSTART
7277 ; GFX940-NEXT: ; def s[0:1]
7278 ; GFX940-NEXT: ;;#ASMEND
7279 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
7280 ; GFX940-NEXT: ;;#ASMSTART
7281 ; GFX940-NEXT: ; use s[8:9]
7282 ; GFX940-NEXT: ;;#ASMEND
7283 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7284 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7285 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7286 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7287 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7288 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 2>
7289 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7290 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7294 define void @s_shuffle_v3bf16_v3bf16__5_4_2() {
7295 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_4_2:
7297 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7298 ; GFX900-NEXT: ;;#ASMSTART
7299 ; GFX900-NEXT: ; def s[4:5]
7300 ; GFX900-NEXT: ;;#ASMEND
7301 ; GFX900-NEXT: ;;#ASMSTART
7302 ; GFX900-NEXT: ; def s[8:9]
7303 ; GFX900-NEXT: ;;#ASMEND
7304 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
7305 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7306 ; GFX900-NEXT: ;;#ASMSTART
7307 ; GFX900-NEXT: ; use s[8:9]
7308 ; GFX900-NEXT: ;;#ASMEND
7309 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7311 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_4_2:
7313 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7314 ; GFX90A-NEXT: ;;#ASMSTART
7315 ; GFX90A-NEXT: ; def s[4:5]
7316 ; GFX90A-NEXT: ;;#ASMEND
7317 ; GFX90A-NEXT: ;;#ASMSTART
7318 ; GFX90A-NEXT: ; def s[8:9]
7319 ; GFX90A-NEXT: ;;#ASMEND
7320 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
7321 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7322 ; GFX90A-NEXT: ;;#ASMSTART
7323 ; GFX90A-NEXT: ; use s[8:9]
7324 ; GFX90A-NEXT: ;;#ASMEND
7325 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7327 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_2:
7329 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7330 ; GFX940-NEXT: ;;#ASMSTART
7331 ; GFX940-NEXT: ; def s[0:1]
7332 ; GFX940-NEXT: ;;#ASMEND
7333 ; GFX940-NEXT: ;;#ASMSTART
7334 ; GFX940-NEXT: ; def s[8:9]
7335 ; GFX940-NEXT: ;;#ASMEND
7336 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
7337 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
7338 ; GFX940-NEXT: ;;#ASMSTART
7339 ; GFX940-NEXT: ; use s[8:9]
7340 ; GFX940-NEXT: ;;#ASMEND
7341 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7342 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7343 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7344 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7345 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7346 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 2>
7347 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7348 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7352 define void @s_shuffle_v3bf16_v3bf16__u_3_3() {
7353 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__u_3_3:
7355 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7356 ; GFX9-NEXT: ;;#ASMSTART
7357 ; GFX9-NEXT: ; use s[8:9]
7358 ; GFX9-NEXT: ;;#ASMEND
7359 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7360 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7361 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7362 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 poison, i32 3, i32 3>
7363 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7364 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7368 define void @s_shuffle_v3bf16_v3bf16__0_3_3() {
7369 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__0_3_3:
7371 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7372 ; GFX900-NEXT: ;;#ASMSTART
7373 ; GFX900-NEXT: ; def s[8:9]
7374 ; GFX900-NEXT: ;;#ASMEND
7375 ; GFX900-NEXT: ;;#ASMSTART
7376 ; GFX900-NEXT: ; use s[8:9]
7377 ; GFX900-NEXT: ;;#ASMEND
7378 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7380 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__0_3_3:
7382 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7383 ; GFX90A-NEXT: ;;#ASMSTART
7384 ; GFX90A-NEXT: ; def s[8:9]
7385 ; GFX90A-NEXT: ;;#ASMEND
7386 ; GFX90A-NEXT: ;;#ASMSTART
7387 ; GFX90A-NEXT: ; use s[8:9]
7388 ; GFX90A-NEXT: ;;#ASMEND
7389 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7391 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_3_3:
7393 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7394 ; GFX940-NEXT: ;;#ASMSTART
7395 ; GFX940-NEXT: ; def s[8:9]
7396 ; GFX940-NEXT: ;;#ASMEND
7397 ; GFX940-NEXT: s_nop 0
7398 ; GFX940-NEXT: ;;#ASMSTART
7399 ; GFX940-NEXT: ; use s[8:9]
7400 ; GFX940-NEXT: ;;#ASMEND
7401 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7402 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7403 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7404 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 0, i32 3, i32 3>
7405 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7406 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7410 define void @s_shuffle_v3bf16_v3bf16__1_3_3() {
7411 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__1_3_3:
7413 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7414 ; GFX900-NEXT: ;;#ASMSTART
7415 ; GFX900-NEXT: ; def s[4:5]
7416 ; GFX900-NEXT: ;;#ASMEND
7417 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
7418 ; GFX900-NEXT: ;;#ASMSTART
7419 ; GFX900-NEXT: ; use s[8:9]
7420 ; GFX900-NEXT: ;;#ASMEND
7421 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7423 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__1_3_3:
7425 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7426 ; GFX90A-NEXT: ;;#ASMSTART
7427 ; GFX90A-NEXT: ; def s[4:5]
7428 ; GFX90A-NEXT: ;;#ASMEND
7429 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
7430 ; GFX90A-NEXT: ;;#ASMSTART
7431 ; GFX90A-NEXT: ; use s[8:9]
7432 ; GFX90A-NEXT: ;;#ASMEND
7433 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7435 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_3_3:
7437 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7438 ; GFX940-NEXT: ;;#ASMSTART
7439 ; GFX940-NEXT: ; def s[0:1]
7440 ; GFX940-NEXT: ;;#ASMEND
7441 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
7442 ; GFX940-NEXT: ;;#ASMSTART
7443 ; GFX940-NEXT: ; use s[8:9]
7444 ; GFX940-NEXT: ;;#ASMEND
7445 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7446 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7447 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7448 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 1, i32 3, i32 3>
7449 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7450 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7454 define void @s_shuffle_v3bf16_v3bf16__2_3_3() {
7455 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3:
7457 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7458 ; GFX900-NEXT: ;;#ASMSTART
7459 ; GFX900-NEXT: ; def s[4:5]
7460 ; GFX900-NEXT: ;;#ASMEND
7461 ; GFX900-NEXT: s_mov_b32 s8, s5
7462 ; GFX900-NEXT: ;;#ASMSTART
7463 ; GFX900-NEXT: ; use s[8:9]
7464 ; GFX900-NEXT: ;;#ASMEND
7465 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7467 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3:
7469 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7470 ; GFX90A-NEXT: ;;#ASMSTART
7471 ; GFX90A-NEXT: ; def s[4:5]
7472 ; GFX90A-NEXT: ;;#ASMEND
7473 ; GFX90A-NEXT: s_mov_b32 s8, s5
7474 ; GFX90A-NEXT: ;;#ASMSTART
7475 ; GFX90A-NEXT: ; use s[8:9]
7476 ; GFX90A-NEXT: ;;#ASMEND
7477 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7479 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3:
7481 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7482 ; GFX940-NEXT: ;;#ASMSTART
7483 ; GFX940-NEXT: ; def s[0:1]
7484 ; GFX940-NEXT: ;;#ASMEND
7485 ; GFX940-NEXT: s_mov_b32 s8, s1
7486 ; GFX940-NEXT: ;;#ASMSTART
7487 ; GFX940-NEXT: ; use s[8:9]
7488 ; GFX940-NEXT: ;;#ASMEND
7489 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7490 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7491 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7492 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 2, i32 3, i32 3>
7493 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7494 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7498 define void @s_shuffle_v3bf16_v3bf16__3_3_3() {
7499 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__3_3_3:
7501 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7502 ; GFX9-NEXT: ;;#ASMSTART
7503 ; GFX9-NEXT: ; use s[8:9]
7504 ; GFX9-NEXT: ;;#ASMEND
7505 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7506 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7507 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7508 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> <i32 3, i32 3, i32 3>
7509 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7510 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7514 define void @s_shuffle_v3bf16_v3bf16__4_3_3() {
7515 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__4_3_3:
7517 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7518 ; GFX900-NEXT: ;;#ASMSTART
7519 ; GFX900-NEXT: ; def s[4:5]
7520 ; GFX900-NEXT: ;;#ASMEND
7521 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
7522 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7523 ; GFX900-NEXT: s_mov_b32 s9, s4
7524 ; GFX900-NEXT: ;;#ASMSTART
7525 ; GFX900-NEXT: ; use s[8:9]
7526 ; GFX900-NEXT: ;;#ASMEND
7527 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7529 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__4_3_3:
7531 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7532 ; GFX90A-NEXT: ;;#ASMSTART
7533 ; GFX90A-NEXT: ; def s[4:5]
7534 ; GFX90A-NEXT: ;;#ASMEND
7535 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
7536 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7537 ; GFX90A-NEXT: s_mov_b32 s9, s4
7538 ; GFX90A-NEXT: ;;#ASMSTART
7539 ; GFX90A-NEXT: ; use s[8:9]
7540 ; GFX90A-NEXT: ;;#ASMEND
7541 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7543 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_3_3:
7545 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7546 ; GFX940-NEXT: ;;#ASMSTART
7547 ; GFX940-NEXT: ; def s[0:1]
7548 ; GFX940-NEXT: ;;#ASMEND
7549 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
7550 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
7551 ; GFX940-NEXT: s_mov_b32 s9, s0
7552 ; GFX940-NEXT: ;;#ASMSTART
7553 ; GFX940-NEXT: ; use s[8:9]
7554 ; GFX940-NEXT: ;;#ASMEND
7555 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7556 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7557 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7558 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7559 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7560 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 3, i32 3>
7561 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7562 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7566 define void @s_shuffle_v3bf16_v3bf16__5_3_3() {
7567 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_3_3:
7569 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7570 ; GFX900-NEXT: ;;#ASMSTART
7571 ; GFX900-NEXT: ; def s[4:5]
7572 ; GFX900-NEXT: ;;#ASMEND
7573 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7574 ; GFX900-NEXT: s_mov_b32 s9, s4
7575 ; GFX900-NEXT: ;;#ASMSTART
7576 ; GFX900-NEXT: ; use s[8:9]
7577 ; GFX900-NEXT: ;;#ASMEND
7578 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7580 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_3_3:
7582 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7583 ; GFX90A-NEXT: ;;#ASMSTART
7584 ; GFX90A-NEXT: ; def s[4:5]
7585 ; GFX90A-NEXT: ;;#ASMEND
7586 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
7587 ; GFX90A-NEXT: s_mov_b32 s9, s4
7588 ; GFX90A-NEXT: ;;#ASMSTART
7589 ; GFX90A-NEXT: ; use s[8:9]
7590 ; GFX90A-NEXT: ;;#ASMEND
7591 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7593 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_3:
7595 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7596 ; GFX940-NEXT: ;;#ASMSTART
7597 ; GFX940-NEXT: ; def s[0:1]
7598 ; GFX940-NEXT: ;;#ASMEND
7599 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
7600 ; GFX940-NEXT: s_mov_b32 s9, s0
7601 ; GFX940-NEXT: ;;#ASMSTART
7602 ; GFX940-NEXT: ; use s[8:9]
7603 ; GFX940-NEXT: ;;#ASMEND
7604 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7605 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7606 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7607 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7608 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7609 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 3>
7610 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7611 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7615 define void @s_shuffle_v3bf16_v3bf16__5_u_3() {
7616 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_u_3:
7618 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7619 ; GFX900-NEXT: ;;#ASMSTART
7620 ; GFX900-NEXT: ; def s[4:5]
7621 ; GFX900-NEXT: ;;#ASMEND
7622 ; GFX900-NEXT: s_mov_b32 s8, s5
7623 ; GFX900-NEXT: s_mov_b32 s9, s4
7624 ; GFX900-NEXT: ;;#ASMSTART
7625 ; GFX900-NEXT: ; use s[8:9]
7626 ; GFX900-NEXT: ;;#ASMEND
7627 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7629 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_u_3:
7631 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7632 ; GFX90A-NEXT: ;;#ASMSTART
7633 ; GFX90A-NEXT: ; def s[4:5]
7634 ; GFX90A-NEXT: ;;#ASMEND
7635 ; GFX90A-NEXT: s_mov_b32 s8, s5
7636 ; GFX90A-NEXT: s_mov_b32 s9, s4
7637 ; GFX90A-NEXT: ;;#ASMSTART
7638 ; GFX90A-NEXT: ; use s[8:9]
7639 ; GFX90A-NEXT: ;;#ASMEND
7640 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7642 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_3:
7644 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7645 ; GFX940-NEXT: ;;#ASMSTART
7646 ; GFX940-NEXT: ; def s[0:1]
7647 ; GFX940-NEXT: ;;#ASMEND
7648 ; GFX940-NEXT: s_mov_b32 s8, s1
7649 ; GFX940-NEXT: s_mov_b32 s9, s0
7650 ; GFX940-NEXT: ;;#ASMSTART
7651 ; GFX940-NEXT: ; use s[8:9]
7652 ; GFX940-NEXT: ;;#ASMEND
7653 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7654 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7655 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7656 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7657 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7658 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 3>
7659 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7660 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7664 define void @s_shuffle_v3bf16_v3bf16__5_0_3() {
7665 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_0_3:
7667 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7668 ; GFX900-NEXT: ;;#ASMSTART
7669 ; GFX900-NEXT: ; def s[4:5]
7670 ; GFX900-NEXT: ;;#ASMEND
7671 ; GFX900-NEXT: ;;#ASMSTART
7672 ; GFX900-NEXT: ; def s[6:7]
7673 ; GFX900-NEXT: ;;#ASMEND
7674 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
7675 ; GFX900-NEXT: s_mov_b32 s9, s6
7676 ; GFX900-NEXT: ;;#ASMSTART
7677 ; GFX900-NEXT: ; use s[8:9]
7678 ; GFX900-NEXT: ;;#ASMEND
7679 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7681 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_0_3:
7683 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7684 ; GFX90A-NEXT: ;;#ASMSTART
7685 ; GFX90A-NEXT: ; def s[4:5]
7686 ; GFX90A-NEXT: ;;#ASMEND
7687 ; GFX90A-NEXT: ;;#ASMSTART
7688 ; GFX90A-NEXT: ; def s[6:7]
7689 ; GFX90A-NEXT: ;;#ASMEND
7690 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
7691 ; GFX90A-NEXT: s_mov_b32 s9, s6
7692 ; GFX90A-NEXT: ;;#ASMSTART
7693 ; GFX90A-NEXT: ; use s[8:9]
7694 ; GFX90A-NEXT: ;;#ASMEND
7695 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7697 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_3:
7699 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7700 ; GFX940-NEXT: ;;#ASMSTART
7701 ; GFX940-NEXT: ; def s[0:1]
7702 ; GFX940-NEXT: ;;#ASMEND
7703 ; GFX940-NEXT: ;;#ASMSTART
7704 ; GFX940-NEXT: ; def s[2:3]
7705 ; GFX940-NEXT: ;;#ASMEND
7706 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
7707 ; GFX940-NEXT: s_mov_b32 s9, s2
7708 ; GFX940-NEXT: ;;#ASMSTART
7709 ; GFX940-NEXT: ; use s[8:9]
7710 ; GFX940-NEXT: ;;#ASMEND
7711 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7712 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7713 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7714 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7715 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7716 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 3>
7717 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7718 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7722 define void @s_shuffle_v3bf16_v3bf16__5_1_3() {
7723 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_1_3:
7725 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7726 ; GFX900-NEXT: ;;#ASMSTART
7727 ; GFX900-NEXT: ; def s[4:5]
7728 ; GFX900-NEXT: ;;#ASMEND
7729 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
7730 ; GFX900-NEXT: ;;#ASMSTART
7731 ; GFX900-NEXT: ; def s[6:7]
7732 ; GFX900-NEXT: ;;#ASMEND
7733 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
7734 ; GFX900-NEXT: s_mov_b32 s9, s6
7735 ; GFX900-NEXT: ;;#ASMSTART
7736 ; GFX900-NEXT: ; use s[8:9]
7737 ; GFX900-NEXT: ;;#ASMEND
7738 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7740 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_1_3:
7742 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7743 ; GFX90A-NEXT: ;;#ASMSTART
7744 ; GFX90A-NEXT: ; def s[4:5]
7745 ; GFX90A-NEXT: ;;#ASMEND
7746 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
7747 ; GFX90A-NEXT: ;;#ASMSTART
7748 ; GFX90A-NEXT: ; def s[6:7]
7749 ; GFX90A-NEXT: ;;#ASMEND
7750 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
7751 ; GFX90A-NEXT: s_mov_b32 s9, s6
7752 ; GFX90A-NEXT: ;;#ASMSTART
7753 ; GFX90A-NEXT: ; use s[8:9]
7754 ; GFX90A-NEXT: ;;#ASMEND
7755 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7757 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_3:
7759 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7760 ; GFX940-NEXT: ;;#ASMSTART
7761 ; GFX940-NEXT: ; def s[0:1]
7762 ; GFX940-NEXT: ;;#ASMEND
7763 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
7764 ; GFX940-NEXT: ;;#ASMSTART
7765 ; GFX940-NEXT: ; def s[2:3]
7766 ; GFX940-NEXT: ;;#ASMEND
7767 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
7768 ; GFX940-NEXT: s_mov_b32 s9, s2
7769 ; GFX940-NEXT: ;;#ASMSTART
7770 ; GFX940-NEXT: ; use s[8:9]
7771 ; GFX940-NEXT: ;;#ASMEND
7772 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7773 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7774 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7775 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7776 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7777 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 3>
7778 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7779 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7783 define void @s_shuffle_v3bf16_v3bf16__5_2_3() {
7784 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_2_3:
7786 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7787 ; GFX900-NEXT: ;;#ASMSTART
7788 ; GFX900-NEXT: ; def s[4:5]
7789 ; GFX900-NEXT: ;;#ASMEND
7790 ; GFX900-NEXT: ;;#ASMSTART
7791 ; GFX900-NEXT: ; def s[6:7]
7792 ; GFX900-NEXT: ;;#ASMEND
7793 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
7794 ; GFX900-NEXT: s_mov_b32 s9, s6
7795 ; GFX900-NEXT: ;;#ASMSTART
7796 ; GFX900-NEXT: ; use s[8:9]
7797 ; GFX900-NEXT: ;;#ASMEND
7798 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7800 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_2_3:
7802 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7803 ; GFX90A-NEXT: ;;#ASMSTART
7804 ; GFX90A-NEXT: ; def s[4:5]
7805 ; GFX90A-NEXT: ;;#ASMEND
7806 ; GFX90A-NEXT: ;;#ASMSTART
7807 ; GFX90A-NEXT: ; def s[6:7]
7808 ; GFX90A-NEXT: ;;#ASMEND
7809 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
7810 ; GFX90A-NEXT: s_mov_b32 s9, s6
7811 ; GFX90A-NEXT: ;;#ASMSTART
7812 ; GFX90A-NEXT: ; use s[8:9]
7813 ; GFX90A-NEXT: ;;#ASMEND
7814 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7816 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_3:
7818 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7819 ; GFX940-NEXT: ;;#ASMSTART
7820 ; GFX940-NEXT: ; def s[0:1]
7821 ; GFX940-NEXT: ;;#ASMEND
7822 ; GFX940-NEXT: ;;#ASMSTART
7823 ; GFX940-NEXT: ; def s[2:3]
7824 ; GFX940-NEXT: ;;#ASMEND
7825 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
7826 ; GFX940-NEXT: s_mov_b32 s9, s2
7827 ; GFX940-NEXT: ;;#ASMSTART
7828 ; GFX940-NEXT: ; use s[8:9]
7829 ; GFX940-NEXT: ;;#ASMEND
7830 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7831 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7832 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7833 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7834 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7835 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 3>
7836 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7837 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7841 define void @s_shuffle_v3bf16_v3bf16__5_4_3() {
7842 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_4_3:
7844 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7845 ; GFX900-NEXT: ;;#ASMSTART
7846 ; GFX900-NEXT: ; def s[4:5]
7847 ; GFX900-NEXT: ;;#ASMEND
7848 ; GFX900-NEXT: s_lshr_b32 s6, s4, 16
7849 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6
7850 ; GFX900-NEXT: s_mov_b32 s9, s4
7851 ; GFX900-NEXT: ;;#ASMSTART
7852 ; GFX900-NEXT: ; use s[8:9]
7853 ; GFX900-NEXT: ;;#ASMEND
7854 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7856 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_4_3:
7858 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7859 ; GFX90A-NEXT: ;;#ASMSTART
7860 ; GFX90A-NEXT: ; def s[4:5]
7861 ; GFX90A-NEXT: ;;#ASMEND
7862 ; GFX90A-NEXT: s_lshr_b32 s6, s4, 16
7863 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6
7864 ; GFX90A-NEXT: s_mov_b32 s9, s4
7865 ; GFX90A-NEXT: ;;#ASMSTART
7866 ; GFX90A-NEXT: ; use s[8:9]
7867 ; GFX90A-NEXT: ;;#ASMEND
7868 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7870 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_3:
7872 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7873 ; GFX940-NEXT: ;;#ASMSTART
7874 ; GFX940-NEXT: ; def s[0:1]
7875 ; GFX940-NEXT: ;;#ASMEND
7876 ; GFX940-NEXT: s_lshr_b32 s2, s0, 16
7877 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2
7878 ; GFX940-NEXT: s_mov_b32 s9, s0
7879 ; GFX940-NEXT: ;;#ASMSTART
7880 ; GFX940-NEXT: ; use s[8:9]
7881 ; GFX940-NEXT: ;;#ASMEND
7882 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7883 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7884 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7885 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7886 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7887 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 3>
7888 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7889 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7893 define void @s_shuffle_v3bf16_v3bf16__u_4_4() {
7894 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__u_4_4:
7896 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7897 ; GFX9-NEXT: ;;#ASMSTART
7898 ; GFX9-NEXT: ; def s[8:9]
7899 ; GFX9-NEXT: ;;#ASMEND
7900 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
7901 ; GFX9-NEXT: ;;#ASMSTART
7902 ; GFX9-NEXT: ; use s[8:9]
7903 ; GFX9-NEXT: ;;#ASMEND
7904 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7905 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7906 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7907 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7908 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7909 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 poison, i32 4, i32 4>
7910 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7911 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7915 define void @s_shuffle_v3bf16_v3bf16__0_4_4() {
7916 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__0_4_4:
7918 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7919 ; GFX900-NEXT: ;;#ASMSTART
7920 ; GFX900-NEXT: ; def s[6:7]
7921 ; GFX900-NEXT: ;;#ASMEND
7922 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
7923 ; GFX900-NEXT: ;;#ASMSTART
7924 ; GFX900-NEXT: ; def s[4:5]
7925 ; GFX900-NEXT: ;;#ASMEND
7926 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
7927 ; GFX900-NEXT: ;;#ASMSTART
7928 ; GFX900-NEXT: ; use s[8:9]
7929 ; GFX900-NEXT: ;;#ASMEND
7930 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7932 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__0_4_4:
7934 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7935 ; GFX90A-NEXT: ;;#ASMSTART
7936 ; GFX90A-NEXT: ; def s[6:7]
7937 ; GFX90A-NEXT: ;;#ASMEND
7938 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
7939 ; GFX90A-NEXT: ;;#ASMSTART
7940 ; GFX90A-NEXT: ; def s[4:5]
7941 ; GFX90A-NEXT: ;;#ASMEND
7942 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
7943 ; GFX90A-NEXT: ;;#ASMSTART
7944 ; GFX90A-NEXT: ; use s[8:9]
7945 ; GFX90A-NEXT: ;;#ASMEND
7946 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7948 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_4_4:
7950 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7951 ; GFX940-NEXT: ;;#ASMSTART
7952 ; GFX940-NEXT: ; def s[2:3]
7953 ; GFX940-NEXT: ;;#ASMEND
7954 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
7955 ; GFX940-NEXT: ;;#ASMSTART
7956 ; GFX940-NEXT: ; def s[0:1]
7957 ; GFX940-NEXT: ;;#ASMEND
7958 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
7959 ; GFX940-NEXT: ;;#ASMSTART
7960 ; GFX940-NEXT: ; use s[8:9]
7961 ; GFX940-NEXT: ;;#ASMEND
7962 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7963 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7964 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
7965 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7966 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
7967 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 0, i32 4, i32 4>
7968 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7969 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7973 define void @s_shuffle_v3bf16_v3bf16__1_4_4() {
7974 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__1_4_4:
7976 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7977 ; GFX900-NEXT: ;;#ASMSTART
7978 ; GFX900-NEXT: ; def s[4:5]
7979 ; GFX900-NEXT: ;;#ASMEND
7980 ; GFX900-NEXT: ;;#ASMSTART
7981 ; GFX900-NEXT: ; def s[6:7]
7982 ; GFX900-NEXT: ;;#ASMEND
7983 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
7984 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
7985 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
7986 ; GFX900-NEXT: ;;#ASMSTART
7987 ; GFX900-NEXT: ; use s[8:9]
7988 ; GFX900-NEXT: ;;#ASMEND
7989 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7991 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__1_4_4:
7993 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7994 ; GFX90A-NEXT: ;;#ASMSTART
7995 ; GFX90A-NEXT: ; def s[4:5]
7996 ; GFX90A-NEXT: ;;#ASMEND
7997 ; GFX90A-NEXT: ;;#ASMSTART
7998 ; GFX90A-NEXT: ; def s[6:7]
7999 ; GFX90A-NEXT: ;;#ASMEND
8000 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
8001 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
8002 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
8003 ; GFX90A-NEXT: ;;#ASMSTART
8004 ; GFX90A-NEXT: ; use s[8:9]
8005 ; GFX90A-NEXT: ;;#ASMEND
8006 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8008 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_4_4:
8010 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8011 ; GFX940-NEXT: ;;#ASMSTART
8012 ; GFX940-NEXT: ; def s[0:1]
8013 ; GFX940-NEXT: ;;#ASMEND
8014 ; GFX940-NEXT: ;;#ASMSTART
8015 ; GFX940-NEXT: ; def s[2:3]
8016 ; GFX940-NEXT: ;;#ASMEND
8017 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
8018 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
8019 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
8020 ; GFX940-NEXT: ;;#ASMSTART
8021 ; GFX940-NEXT: ; use s[8:9]
8022 ; GFX940-NEXT: ;;#ASMEND
8023 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8024 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8025 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8026 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8027 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8028 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 1, i32 4, i32 4>
8029 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8030 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8034 define void @s_shuffle_v3bf16_v3bf16__2_4_4() {
8035 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_4_4:
8037 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8038 ; GFX900-NEXT: ;;#ASMSTART
8039 ; GFX900-NEXT: ; def s[6:7]
8040 ; GFX900-NEXT: ;;#ASMEND
8041 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
8042 ; GFX900-NEXT: ;;#ASMSTART
8043 ; GFX900-NEXT: ; def s[4:5]
8044 ; GFX900-NEXT: ;;#ASMEND
8045 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
8046 ; GFX900-NEXT: ;;#ASMSTART
8047 ; GFX900-NEXT: ; use s[8:9]
8048 ; GFX900-NEXT: ;;#ASMEND
8049 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8051 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_4_4:
8053 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8054 ; GFX90A-NEXT: ;;#ASMSTART
8055 ; GFX90A-NEXT: ; def s[6:7]
8056 ; GFX90A-NEXT: ;;#ASMEND
8057 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
8058 ; GFX90A-NEXT: ;;#ASMSTART
8059 ; GFX90A-NEXT: ; def s[4:5]
8060 ; GFX90A-NEXT: ;;#ASMEND
8061 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
8062 ; GFX90A-NEXT: ;;#ASMSTART
8063 ; GFX90A-NEXT: ; use s[8:9]
8064 ; GFX90A-NEXT: ;;#ASMEND
8065 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8067 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_4_4:
8069 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8070 ; GFX940-NEXT: ;;#ASMSTART
8071 ; GFX940-NEXT: ; def s[2:3]
8072 ; GFX940-NEXT: ;;#ASMEND
8073 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
8074 ; GFX940-NEXT: ;;#ASMSTART
8075 ; GFX940-NEXT: ; def s[0:1]
8076 ; GFX940-NEXT: ;;#ASMEND
8077 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
8078 ; GFX940-NEXT: ;;#ASMSTART
8079 ; GFX940-NEXT: ; use s[8:9]
8080 ; GFX940-NEXT: ;;#ASMEND
8081 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8082 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8083 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8084 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8085 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8086 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 2, i32 4, i32 4>
8087 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8088 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8092 define void @s_shuffle_v3bf16_v3bf16__3_4_4() {
8093 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__3_4_4:
8095 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8096 ; GFX9-NEXT: ;;#ASMSTART
8097 ; GFX9-NEXT: ; def s[8:9]
8098 ; GFX9-NEXT: ;;#ASMEND
8099 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
8100 ; GFX9-NEXT: ;;#ASMSTART
8101 ; GFX9-NEXT: ; use s[8:9]
8102 ; GFX9-NEXT: ;;#ASMEND
8103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8104 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8105 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8106 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8107 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8108 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 3, i32 4, i32 4>
8109 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8110 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8114 define void @s_shuffle_v3bf16_v3bf16__4_4_4() {
8115 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__4_4_4:
8117 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8118 ; GFX900-NEXT: ;;#ASMSTART
8119 ; GFX900-NEXT: ; def s[4:5]
8120 ; GFX900-NEXT: ;;#ASMEND
8121 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
8122 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
8123 ; GFX900-NEXT: ;;#ASMSTART
8124 ; GFX900-NEXT: ; use s[8:9]
8125 ; GFX900-NEXT: ;;#ASMEND
8126 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8128 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__4_4_4:
8130 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8131 ; GFX90A-NEXT: ;;#ASMSTART
8132 ; GFX90A-NEXT: ; def s[4:5]
8133 ; GFX90A-NEXT: ;;#ASMEND
8134 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
8135 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
8136 ; GFX90A-NEXT: ;;#ASMSTART
8137 ; GFX90A-NEXT: ; use s[8:9]
8138 ; GFX90A-NEXT: ;;#ASMEND
8139 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8141 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_4_4:
8143 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8144 ; GFX940-NEXT: ;;#ASMSTART
8145 ; GFX940-NEXT: ; def s[0:1]
8146 ; GFX940-NEXT: ;;#ASMEND
8147 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
8148 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
8149 ; GFX940-NEXT: ;;#ASMSTART
8150 ; GFX940-NEXT: ; use s[8:9]
8151 ; GFX940-NEXT: ;;#ASMEND
8152 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8153 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8154 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8155 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8156 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8157 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 4, i32 4>
8158 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8159 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8163 define void @s_shuffle_v3bf16_v3bf16__5_4_4() {
8164 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_4_4:
8166 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8167 ; GFX900-NEXT: ;;#ASMSTART
8168 ; GFX900-NEXT: ; def s[4:5]
8169 ; GFX900-NEXT: ;;#ASMEND
8170 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
8171 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
8172 ; GFX900-NEXT: ;;#ASMSTART
8173 ; GFX900-NEXT: ; use s[8:9]
8174 ; GFX900-NEXT: ;;#ASMEND
8175 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8177 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_4_4:
8179 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8180 ; GFX90A-NEXT: ;;#ASMSTART
8181 ; GFX90A-NEXT: ; def s[4:5]
8182 ; GFX90A-NEXT: ;;#ASMEND
8183 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
8184 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
8185 ; GFX90A-NEXT: ;;#ASMSTART
8186 ; GFX90A-NEXT: ; use s[8:9]
8187 ; GFX90A-NEXT: ;;#ASMEND
8188 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8190 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_4:
8192 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8193 ; GFX940-NEXT: ;;#ASMSTART
8194 ; GFX940-NEXT: ; def s[0:1]
8195 ; GFX940-NEXT: ;;#ASMEND
8196 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
8197 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
8198 ; GFX940-NEXT: ;;#ASMSTART
8199 ; GFX940-NEXT: ; use s[8:9]
8200 ; GFX940-NEXT: ;;#ASMEND
8201 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8202 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8203 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8204 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8205 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8206 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 4>
8207 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8208 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8212 define void @s_shuffle_v3bf16_v3bf16__5_u_4() {
8213 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_u_4:
8215 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8216 ; GFX900-NEXT: ;;#ASMSTART
8217 ; GFX900-NEXT: ; def s[4:5]
8218 ; GFX900-NEXT: ;;#ASMEND
8219 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
8220 ; GFX900-NEXT: s_mov_b32 s8, s5
8221 ; GFX900-NEXT: ;;#ASMSTART
8222 ; GFX900-NEXT: ; use s[8:9]
8223 ; GFX900-NEXT: ;;#ASMEND
8224 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8226 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_u_4:
8228 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8229 ; GFX90A-NEXT: ;;#ASMSTART
8230 ; GFX90A-NEXT: ; def s[4:5]
8231 ; GFX90A-NEXT: ;;#ASMEND
8232 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
8233 ; GFX90A-NEXT: s_mov_b32 s8, s5
8234 ; GFX90A-NEXT: ;;#ASMSTART
8235 ; GFX90A-NEXT: ; use s[8:9]
8236 ; GFX90A-NEXT: ;;#ASMEND
8237 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8239 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_4:
8241 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8242 ; GFX940-NEXT: ;;#ASMSTART
8243 ; GFX940-NEXT: ; def s[0:1]
8244 ; GFX940-NEXT: ;;#ASMEND
8245 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
8246 ; GFX940-NEXT: s_mov_b32 s8, s1
8247 ; GFX940-NEXT: ;;#ASMSTART
8248 ; GFX940-NEXT: ; use s[8:9]
8249 ; GFX940-NEXT: ;;#ASMEND
8250 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8251 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8252 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8253 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8254 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8255 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 4>
8256 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8257 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8261 define void @s_shuffle_v3bf16_v3bf16__5_0_4() {
8262 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_0_4:
8264 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8265 ; GFX900-NEXT: ;;#ASMSTART
8266 ; GFX900-NEXT: ; def s[4:5]
8267 ; GFX900-NEXT: ;;#ASMEND
8268 ; GFX900-NEXT: ;;#ASMSTART
8269 ; GFX900-NEXT: ; def s[6:7]
8270 ; GFX900-NEXT: ;;#ASMEND
8271 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
8272 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
8273 ; GFX900-NEXT: ;;#ASMSTART
8274 ; GFX900-NEXT: ; use s[8:9]
8275 ; GFX900-NEXT: ;;#ASMEND
8276 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8278 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_0_4:
8280 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8281 ; GFX90A-NEXT: ;;#ASMSTART
8282 ; GFX90A-NEXT: ; def s[4:5]
8283 ; GFX90A-NEXT: ;;#ASMEND
8284 ; GFX90A-NEXT: ;;#ASMSTART
8285 ; GFX90A-NEXT: ; def s[6:7]
8286 ; GFX90A-NEXT: ;;#ASMEND
8287 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
8288 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
8289 ; GFX90A-NEXT: ;;#ASMSTART
8290 ; GFX90A-NEXT: ; use s[8:9]
8291 ; GFX90A-NEXT: ;;#ASMEND
8292 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8294 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_4:
8296 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8297 ; GFX940-NEXT: ;;#ASMSTART
8298 ; GFX940-NEXT: ; def s[0:1]
8299 ; GFX940-NEXT: ;;#ASMEND
8300 ; GFX940-NEXT: ;;#ASMSTART
8301 ; GFX940-NEXT: ; def s[2:3]
8302 ; GFX940-NEXT: ;;#ASMEND
8303 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
8304 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
8305 ; GFX940-NEXT: ;;#ASMSTART
8306 ; GFX940-NEXT: ; use s[8:9]
8307 ; GFX940-NEXT: ;;#ASMEND
8308 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8309 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8310 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8311 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8312 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8313 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 4>
8314 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8315 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8319 define void @s_shuffle_v3bf16_v3bf16__5_1_4() {
8320 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_1_4:
8322 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8323 ; GFX900-NEXT: ;;#ASMSTART
8324 ; GFX900-NEXT: ; def s[4:5]
8325 ; GFX900-NEXT: ;;#ASMEND
8326 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
8327 ; GFX900-NEXT: ;;#ASMSTART
8328 ; GFX900-NEXT: ; def s[6:7]
8329 ; GFX900-NEXT: ;;#ASMEND
8330 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
8331 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
8332 ; GFX900-NEXT: ;;#ASMSTART
8333 ; GFX900-NEXT: ; use s[8:9]
8334 ; GFX900-NEXT: ;;#ASMEND
8335 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8337 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_1_4:
8339 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8340 ; GFX90A-NEXT: ;;#ASMSTART
8341 ; GFX90A-NEXT: ; def s[4:5]
8342 ; GFX90A-NEXT: ;;#ASMEND
8343 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
8344 ; GFX90A-NEXT: ;;#ASMSTART
8345 ; GFX90A-NEXT: ; def s[6:7]
8346 ; GFX90A-NEXT: ;;#ASMEND
8347 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
8348 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
8349 ; GFX90A-NEXT: ;;#ASMSTART
8350 ; GFX90A-NEXT: ; use s[8:9]
8351 ; GFX90A-NEXT: ;;#ASMEND
8352 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8354 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_4:
8356 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8357 ; GFX940-NEXT: ;;#ASMSTART
8358 ; GFX940-NEXT: ; def s[0:1]
8359 ; GFX940-NEXT: ;;#ASMEND
8360 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
8361 ; GFX940-NEXT: ;;#ASMSTART
8362 ; GFX940-NEXT: ; def s[2:3]
8363 ; GFX940-NEXT: ;;#ASMEND
8364 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
8365 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
8366 ; GFX940-NEXT: ;;#ASMSTART
8367 ; GFX940-NEXT: ; use s[8:9]
8368 ; GFX940-NEXT: ;;#ASMEND
8369 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8370 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8371 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8372 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8373 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8374 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 4>
8375 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8376 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8380 define void @s_shuffle_v3bf16_v3bf16__5_2_4() {
8381 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_2_4:
8383 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8384 ; GFX900-NEXT: ;;#ASMSTART
8385 ; GFX900-NEXT: ; def s[4:5]
8386 ; GFX900-NEXT: ;;#ASMEND
8387 ; GFX900-NEXT: ;;#ASMSTART
8388 ; GFX900-NEXT: ; def s[6:7]
8389 ; GFX900-NEXT: ;;#ASMEND
8390 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
8391 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
8392 ; GFX900-NEXT: ;;#ASMSTART
8393 ; GFX900-NEXT: ; use s[8:9]
8394 ; GFX900-NEXT: ;;#ASMEND
8395 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8397 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_2_4:
8399 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8400 ; GFX90A-NEXT: ;;#ASMSTART
8401 ; GFX90A-NEXT: ; def s[4:5]
8402 ; GFX90A-NEXT: ;;#ASMEND
8403 ; GFX90A-NEXT: ;;#ASMSTART
8404 ; GFX90A-NEXT: ; def s[6:7]
8405 ; GFX90A-NEXT: ;;#ASMEND
8406 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
8407 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
8408 ; GFX90A-NEXT: ;;#ASMSTART
8409 ; GFX90A-NEXT: ; use s[8:9]
8410 ; GFX90A-NEXT: ;;#ASMEND
8411 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8413 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_4:
8415 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8416 ; GFX940-NEXT: ;;#ASMSTART
8417 ; GFX940-NEXT: ; def s[0:1]
8418 ; GFX940-NEXT: ;;#ASMEND
8419 ; GFX940-NEXT: ;;#ASMSTART
8420 ; GFX940-NEXT: ; def s[2:3]
8421 ; GFX940-NEXT: ;;#ASMEND
8422 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
8423 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
8424 ; GFX940-NEXT: ;;#ASMSTART
8425 ; GFX940-NEXT: ; use s[8:9]
8426 ; GFX940-NEXT: ;;#ASMEND
8427 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8428 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8429 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8430 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8431 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8432 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 4>
8433 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8434 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8438 define void @s_shuffle_v3bf16_v3bf16__5_3_4() {
8439 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_3_4:
8441 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8442 ; GFX900-NEXT: ;;#ASMSTART
8443 ; GFX900-NEXT: ; def s[4:5]
8444 ; GFX900-NEXT: ;;#ASMEND
8445 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8446 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
8447 ; GFX900-NEXT: ;;#ASMSTART
8448 ; GFX900-NEXT: ; use s[8:9]
8449 ; GFX900-NEXT: ;;#ASMEND
8450 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8452 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_3_4:
8454 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8455 ; GFX90A-NEXT: ;;#ASMSTART
8456 ; GFX90A-NEXT: ; def s[4:5]
8457 ; GFX90A-NEXT: ;;#ASMEND
8458 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8459 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
8460 ; GFX90A-NEXT: ;;#ASMSTART
8461 ; GFX90A-NEXT: ; use s[8:9]
8462 ; GFX90A-NEXT: ;;#ASMEND
8463 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8465 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_4:
8467 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8468 ; GFX940-NEXT: ;;#ASMSTART
8469 ; GFX940-NEXT: ; def s[0:1]
8470 ; GFX940-NEXT: ;;#ASMEND
8471 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
8472 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
8473 ; GFX940-NEXT: ;;#ASMSTART
8474 ; GFX940-NEXT: ; use s[8:9]
8475 ; GFX940-NEXT: ;;#ASMEND
8476 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8477 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8478 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8479 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8480 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8481 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 4>
8482 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8483 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8487 define void @s_shuffle_v3bf16_v3bf16__u_5_5() {
8488 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__u_5_5:
8490 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8491 ; GFX9-NEXT: ;;#ASMSTART
8492 ; GFX9-NEXT: ; def s[8:9]
8493 ; GFX9-NEXT: ;;#ASMEND
8494 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
8495 ; GFX9-NEXT: ;;#ASMSTART
8496 ; GFX9-NEXT: ; use s[8:9]
8497 ; GFX9-NEXT: ;;#ASMEND
8498 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8499 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8500 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8501 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8502 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8503 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 poison, i32 5, i32 5>
8504 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8505 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8509 define void @s_shuffle_v3bf16_v3bf16__0_5_5() {
8510 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__0_5_5:
8512 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8513 ; GFX900-NEXT: ;;#ASMSTART
8514 ; GFX900-NEXT: ; def s[8:9]
8515 ; GFX900-NEXT: ;;#ASMEND
8516 ; GFX900-NEXT: ;;#ASMSTART
8517 ; GFX900-NEXT: ; def s[4:5]
8518 ; GFX900-NEXT: ;;#ASMEND
8519 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
8520 ; GFX900-NEXT: ;;#ASMSTART
8521 ; GFX900-NEXT: ; use s[8:9]
8522 ; GFX900-NEXT: ;;#ASMEND
8523 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8525 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__0_5_5:
8527 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8528 ; GFX90A-NEXT: ;;#ASMSTART
8529 ; GFX90A-NEXT: ; def s[8:9]
8530 ; GFX90A-NEXT: ;;#ASMEND
8531 ; GFX90A-NEXT: ;;#ASMSTART
8532 ; GFX90A-NEXT: ; def s[4:5]
8533 ; GFX90A-NEXT: ;;#ASMEND
8534 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
8535 ; GFX90A-NEXT: ;;#ASMSTART
8536 ; GFX90A-NEXT: ; use s[8:9]
8537 ; GFX90A-NEXT: ;;#ASMEND
8538 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8540 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_5_5:
8542 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8543 ; GFX940-NEXT: ;;#ASMSTART
8544 ; GFX940-NEXT: ; def s[8:9]
8545 ; GFX940-NEXT: ;;#ASMEND
8546 ; GFX940-NEXT: ;;#ASMSTART
8547 ; GFX940-NEXT: ; def s[0:1]
8548 ; GFX940-NEXT: ;;#ASMEND
8549 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
8550 ; GFX940-NEXT: ;;#ASMSTART
8551 ; GFX940-NEXT: ; use s[8:9]
8552 ; GFX940-NEXT: ;;#ASMEND
8553 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8554 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8555 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8556 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8557 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8558 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 0, i32 5, i32 5>
8559 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8560 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8564 define void @s_shuffle_v3bf16_v3bf16__1_5_5() {
8565 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__1_5_5:
8567 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8568 ; GFX900-NEXT: ;;#ASMSTART
8569 ; GFX900-NEXT: ; def s[4:5]
8570 ; GFX900-NEXT: ;;#ASMEND
8571 ; GFX900-NEXT: ;;#ASMSTART
8572 ; GFX900-NEXT: ; def s[8:9]
8573 ; GFX900-NEXT: ;;#ASMEND
8574 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
8575 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
8576 ; GFX900-NEXT: ;;#ASMSTART
8577 ; GFX900-NEXT: ; use s[8:9]
8578 ; GFX900-NEXT: ;;#ASMEND
8579 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8581 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__1_5_5:
8583 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8584 ; GFX90A-NEXT: ;;#ASMSTART
8585 ; GFX90A-NEXT: ; def s[4:5]
8586 ; GFX90A-NEXT: ;;#ASMEND
8587 ; GFX90A-NEXT: ;;#ASMSTART
8588 ; GFX90A-NEXT: ; def s[8:9]
8589 ; GFX90A-NEXT: ;;#ASMEND
8590 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
8591 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
8592 ; GFX90A-NEXT: ;;#ASMSTART
8593 ; GFX90A-NEXT: ; use s[8:9]
8594 ; GFX90A-NEXT: ;;#ASMEND
8595 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8597 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_5_5:
8599 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8600 ; GFX940-NEXT: ;;#ASMSTART
8601 ; GFX940-NEXT: ; def s[0:1]
8602 ; GFX940-NEXT: ;;#ASMEND
8603 ; GFX940-NEXT: ;;#ASMSTART
8604 ; GFX940-NEXT: ; def s[8:9]
8605 ; GFX940-NEXT: ;;#ASMEND
8606 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
8607 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
8608 ; GFX940-NEXT: ;;#ASMSTART
8609 ; GFX940-NEXT: ; use s[8:9]
8610 ; GFX940-NEXT: ;;#ASMEND
8611 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8612 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8613 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8614 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8615 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8616 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 1, i32 5, i32 5>
8617 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8618 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8622 define void @s_shuffle_v3bf16_v3bf16__2_5_5() {
8623 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__2_5_5:
8625 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8626 ; GFX900-NEXT: ;;#ASMSTART
8627 ; GFX900-NEXT: ; def s[8:9]
8628 ; GFX900-NEXT: ;;#ASMEND
8629 ; GFX900-NEXT: ;;#ASMSTART
8630 ; GFX900-NEXT: ; def s[4:5]
8631 ; GFX900-NEXT: ;;#ASMEND
8632 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
8633 ; GFX900-NEXT: ;;#ASMSTART
8634 ; GFX900-NEXT: ; use s[8:9]
8635 ; GFX900-NEXT: ;;#ASMEND
8636 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8638 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__2_5_5:
8640 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8641 ; GFX90A-NEXT: ;;#ASMSTART
8642 ; GFX90A-NEXT: ; def s[8:9]
8643 ; GFX90A-NEXT: ;;#ASMEND
8644 ; GFX90A-NEXT: ;;#ASMSTART
8645 ; GFX90A-NEXT: ; def s[4:5]
8646 ; GFX90A-NEXT: ;;#ASMEND
8647 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
8648 ; GFX90A-NEXT: ;;#ASMSTART
8649 ; GFX90A-NEXT: ; use s[8:9]
8650 ; GFX90A-NEXT: ;;#ASMEND
8651 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8653 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_5_5:
8655 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8656 ; GFX940-NEXT: ;;#ASMSTART
8657 ; GFX940-NEXT: ; def s[8:9]
8658 ; GFX940-NEXT: ;;#ASMEND
8659 ; GFX940-NEXT: ;;#ASMSTART
8660 ; GFX940-NEXT: ; def s[0:1]
8661 ; GFX940-NEXT: ;;#ASMEND
8662 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
8663 ; GFX940-NEXT: ;;#ASMSTART
8664 ; GFX940-NEXT: ; use s[8:9]
8665 ; GFX940-NEXT: ;;#ASMEND
8666 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8667 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8668 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8669 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8670 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8671 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 2, i32 5, i32 5>
8672 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8673 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8677 define void @s_shuffle_v3bf16_v3bf16__3_5_5() {
8678 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__3_5_5:
8680 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8681 ; GFX9-NEXT: ;;#ASMSTART
8682 ; GFX9-NEXT: ; def s[8:9]
8683 ; GFX9-NEXT: ;;#ASMEND
8684 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9
8685 ; GFX9-NEXT: ;;#ASMSTART
8686 ; GFX9-NEXT: ; use s[8:9]
8687 ; GFX9-NEXT: ;;#ASMEND
8688 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8689 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8690 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8691 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8692 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8693 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 3, i32 5, i32 5>
8694 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8695 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8699 define void @s_shuffle_v3bf16_v3bf16__4_5_5() {
8700 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__4_5_5:
8702 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8703 ; GFX900-NEXT: ;;#ASMSTART
8704 ; GFX900-NEXT: ; def s[8:9]
8705 ; GFX900-NEXT: ;;#ASMEND
8706 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
8707 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
8708 ; GFX900-NEXT: ;;#ASMSTART
8709 ; GFX900-NEXT: ; use s[8:9]
8710 ; GFX900-NEXT: ;;#ASMEND
8711 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8713 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__4_5_5:
8715 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8716 ; GFX90A-NEXT: ;;#ASMSTART
8717 ; GFX90A-NEXT: ; def s[8:9]
8718 ; GFX90A-NEXT: ;;#ASMEND
8719 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
8720 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
8721 ; GFX90A-NEXT: ;;#ASMSTART
8722 ; GFX90A-NEXT: ; use s[8:9]
8723 ; GFX90A-NEXT: ;;#ASMEND
8724 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8726 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_5_5:
8728 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8729 ; GFX940-NEXT: ;;#ASMSTART
8730 ; GFX940-NEXT: ; def s[8:9]
8731 ; GFX940-NEXT: ;;#ASMEND
8732 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
8733 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
8734 ; GFX940-NEXT: ;;#ASMSTART
8735 ; GFX940-NEXT: ; use s[8:9]
8736 ; GFX940-NEXT: ;;#ASMEND
8737 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8738 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8739 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8740 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8741 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8742 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 4, i32 5, i32 5>
8743 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8744 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8748 define void @s_shuffle_v3bf16_v3bf16__5_u_5() {
8749 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__5_u_5:
8751 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8752 ; GFX9-NEXT: ;;#ASMSTART
8753 ; GFX9-NEXT: ; def s[8:9]
8754 ; GFX9-NEXT: ;;#ASMEND
8755 ; GFX9-NEXT: s_mov_b32 s8, s9
8756 ; GFX9-NEXT: ;;#ASMSTART
8757 ; GFX9-NEXT: ; use s[8:9]
8758 ; GFX9-NEXT: ;;#ASMEND
8759 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8760 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8761 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8762 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8763 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8764 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 poison, i32 5>
8765 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8766 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8770 define void @s_shuffle_v3bf16_v3bf16__5_0_5() {
8771 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_0_5:
8773 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8774 ; GFX900-NEXT: ;;#ASMSTART
8775 ; GFX900-NEXT: ; def s[8:9]
8776 ; GFX900-NEXT: ;;#ASMEND
8777 ; GFX900-NEXT: ;;#ASMSTART
8778 ; GFX900-NEXT: ; def s[4:5]
8779 ; GFX900-NEXT: ;;#ASMEND
8780 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
8781 ; GFX900-NEXT: ;;#ASMSTART
8782 ; GFX900-NEXT: ; use s[8:9]
8783 ; GFX900-NEXT: ;;#ASMEND
8784 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8786 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_0_5:
8788 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8789 ; GFX90A-NEXT: ;;#ASMSTART
8790 ; GFX90A-NEXT: ; def s[8:9]
8791 ; GFX90A-NEXT: ;;#ASMEND
8792 ; GFX90A-NEXT: ;;#ASMSTART
8793 ; GFX90A-NEXT: ; def s[4:5]
8794 ; GFX90A-NEXT: ;;#ASMEND
8795 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
8796 ; GFX90A-NEXT: ;;#ASMSTART
8797 ; GFX90A-NEXT: ; use s[8:9]
8798 ; GFX90A-NEXT: ;;#ASMEND
8799 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8801 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_5:
8803 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8804 ; GFX940-NEXT: ;;#ASMSTART
8805 ; GFX940-NEXT: ; def s[8:9]
8806 ; GFX940-NEXT: ;;#ASMEND
8807 ; GFX940-NEXT: ;;#ASMSTART
8808 ; GFX940-NEXT: ; def s[0:1]
8809 ; GFX940-NEXT: ;;#ASMEND
8810 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
8811 ; GFX940-NEXT: ;;#ASMSTART
8812 ; GFX940-NEXT: ; use s[8:9]
8813 ; GFX940-NEXT: ;;#ASMEND
8814 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8815 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8816 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8817 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8818 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8819 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 0, i32 5>
8820 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8821 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8825 define void @s_shuffle_v3bf16_v3bf16__5_1_5() {
8826 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_1_5:
8828 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8829 ; GFX900-NEXT: ;;#ASMSTART
8830 ; GFX900-NEXT: ; def s[4:5]
8831 ; GFX900-NEXT: ;;#ASMEND
8832 ; GFX900-NEXT: ;;#ASMSTART
8833 ; GFX900-NEXT: ; def s[8:9]
8834 ; GFX900-NEXT: ;;#ASMEND
8835 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
8836 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
8837 ; GFX900-NEXT: ;;#ASMSTART
8838 ; GFX900-NEXT: ; use s[8:9]
8839 ; GFX900-NEXT: ;;#ASMEND
8840 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8842 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_1_5:
8844 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8845 ; GFX90A-NEXT: ;;#ASMSTART
8846 ; GFX90A-NEXT: ; def s[4:5]
8847 ; GFX90A-NEXT: ;;#ASMEND
8848 ; GFX90A-NEXT: ;;#ASMSTART
8849 ; GFX90A-NEXT: ; def s[8:9]
8850 ; GFX90A-NEXT: ;;#ASMEND
8851 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
8852 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
8853 ; GFX90A-NEXT: ;;#ASMSTART
8854 ; GFX90A-NEXT: ; use s[8:9]
8855 ; GFX90A-NEXT: ;;#ASMEND
8856 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8858 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_5:
8860 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8861 ; GFX940-NEXT: ;;#ASMSTART
8862 ; GFX940-NEXT: ; def s[0:1]
8863 ; GFX940-NEXT: ;;#ASMEND
8864 ; GFX940-NEXT: ;;#ASMSTART
8865 ; GFX940-NEXT: ; def s[8:9]
8866 ; GFX940-NEXT: ;;#ASMEND
8867 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
8868 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
8869 ; GFX940-NEXT: ;;#ASMSTART
8870 ; GFX940-NEXT: ; use s[8:9]
8871 ; GFX940-NEXT: ;;#ASMEND
8872 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8873 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8874 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8875 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8876 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8877 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 1, i32 5>
8878 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8879 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8883 define void @s_shuffle_v3bf16_v3bf16__5_2_5() {
8884 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_2_5:
8886 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8887 ; GFX900-NEXT: ;;#ASMSTART
8888 ; GFX900-NEXT: ; def s[8:9]
8889 ; GFX900-NEXT: ;;#ASMEND
8890 ; GFX900-NEXT: ;;#ASMSTART
8891 ; GFX900-NEXT: ; def s[4:5]
8892 ; GFX900-NEXT: ;;#ASMEND
8893 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5
8894 ; GFX900-NEXT: ;;#ASMSTART
8895 ; GFX900-NEXT: ; use s[8:9]
8896 ; GFX900-NEXT: ;;#ASMEND
8897 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8899 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_2_5:
8901 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8902 ; GFX90A-NEXT: ;;#ASMSTART
8903 ; GFX90A-NEXT: ; def s[8:9]
8904 ; GFX90A-NEXT: ;;#ASMEND
8905 ; GFX90A-NEXT: ;;#ASMSTART
8906 ; GFX90A-NEXT: ; def s[4:5]
8907 ; GFX90A-NEXT: ;;#ASMEND
8908 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5
8909 ; GFX90A-NEXT: ;;#ASMSTART
8910 ; GFX90A-NEXT: ; use s[8:9]
8911 ; GFX90A-NEXT: ;;#ASMEND
8912 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8914 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_5:
8916 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8917 ; GFX940-NEXT: ;;#ASMSTART
8918 ; GFX940-NEXT: ; def s[8:9]
8919 ; GFX940-NEXT: ;;#ASMEND
8920 ; GFX940-NEXT: ;;#ASMSTART
8921 ; GFX940-NEXT: ; def s[0:1]
8922 ; GFX940-NEXT: ;;#ASMEND
8923 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1
8924 ; GFX940-NEXT: ;;#ASMSTART
8925 ; GFX940-NEXT: ; use s[8:9]
8926 ; GFX940-NEXT: ;;#ASMEND
8927 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8928 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8929 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8930 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8931 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8932 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 2, i32 5>
8933 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8934 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8938 define void @s_shuffle_v3bf16_v3bf16__5_3_5() {
8939 ; GFX9-LABEL: s_shuffle_v3bf16_v3bf16__5_3_5:
8941 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8942 ; GFX9-NEXT: ;;#ASMSTART
8943 ; GFX9-NEXT: ; def s[8:9]
8944 ; GFX9-NEXT: ;;#ASMEND
8945 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s8
8946 ; GFX9-NEXT: ;;#ASMSTART
8947 ; GFX9-NEXT: ; use s[8:9]
8948 ; GFX9-NEXT: ;;#ASMEND
8949 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8950 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8951 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8952 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8953 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
8954 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 3, i32 5>
8955 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8956 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8960 define void @s_shuffle_v3bf16_v3bf16__5_4_5() {
8961 ; GFX900-LABEL: s_shuffle_v3bf16_v3bf16__5_4_5:
8963 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8964 ; GFX900-NEXT: ;;#ASMSTART
8965 ; GFX900-NEXT: ; def s[8:9]
8966 ; GFX900-NEXT: ;;#ASMEND
8967 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
8968 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
8969 ; GFX900-NEXT: ;;#ASMSTART
8970 ; GFX900-NEXT: ; use s[8:9]
8971 ; GFX900-NEXT: ;;#ASMEND
8972 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8974 ; GFX90A-LABEL: s_shuffle_v3bf16_v3bf16__5_4_5:
8976 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8977 ; GFX90A-NEXT: ;;#ASMSTART
8978 ; GFX90A-NEXT: ; def s[8:9]
8979 ; GFX90A-NEXT: ;;#ASMEND
8980 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
8981 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
8982 ; GFX90A-NEXT: ;;#ASMSTART
8983 ; GFX90A-NEXT: ; use s[8:9]
8984 ; GFX90A-NEXT: ;;#ASMEND
8985 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8987 ; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_5:
8989 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8990 ; GFX940-NEXT: ;;#ASMSTART
8991 ; GFX940-NEXT: ; def s[8:9]
8992 ; GFX940-NEXT: ;;#ASMEND
8993 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
8994 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
8995 ; GFX940-NEXT: ;;#ASMSTART
8996 ; GFX940-NEXT: ; use s[8:9]
8997 ; GFX940-NEXT: ;;#ASMEND
8998 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8999 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9000 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9001 %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
9002 %extract31 = shufflevector <4 x bfloat> %vec1, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 2>
9003 %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> %extract31, <3 x i32> <i32 5, i32 4, i32 5>
9004 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9005 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9008 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
9009 ; GFX90APLUS: {{.*}}