1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v3bf16_v4bf16__u_u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v3bf16_v4bf16__u_u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
13 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> poison
14 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
18 define void @v_shuffle_v3bf16_v4bf16__0_u_u(ptr addrspace(1) inreg %ptr) {
19 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_u_u:
21 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
23 ; GFX900-NEXT: ;;#ASMSTART
24 ; GFX900-NEXT: ; def v[0:1]
25 ; GFX900-NEXT: ;;#ASMEND
26 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
27 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
28 ; GFX900-NEXT: s_waitcnt vmcnt(0)
29 ; GFX900-NEXT: s_setpc_b64 s[30:31]
31 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_u_u:
33 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
35 ; GFX90A-NEXT: ;;#ASMSTART
36 ; GFX90A-NEXT: ; def v[0:1]
37 ; GFX90A-NEXT: ;;#ASMEND
38 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
39 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
40 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
41 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
43 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_u_u:
45 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
47 ; GFX940-NEXT: ;;#ASMSTART
48 ; GFX940-NEXT: ; def v[0:1]
49 ; GFX940-NEXT: ;;#ASMEND
50 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
51 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
52 ; GFX940-NEXT: s_waitcnt vmcnt(0)
53 ; GFX940-NEXT: s_setpc_b64 s[30:31]
54 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
55 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
56 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
60 define void @v_shuffle_v3bf16_v4bf16__1_u_u(ptr addrspace(1) inreg %ptr) {
61 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_u_u:
63 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX900-NEXT: ;;#ASMSTART
65 ; GFX900-NEXT: ; def v[0:1]
66 ; GFX900-NEXT: ;;#ASMEND
67 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
68 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
69 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
70 ; GFX900-NEXT: s_waitcnt vmcnt(0)
71 ; GFX900-NEXT: s_setpc_b64 s[30:31]
73 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_u_u:
75 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX90A-NEXT: ;;#ASMSTART
77 ; GFX90A-NEXT: ; def v[0:1]
78 ; GFX90A-NEXT: ;;#ASMEND
79 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
80 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
81 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
82 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
83 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
85 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_u_u:
87 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX940-NEXT: ;;#ASMSTART
89 ; GFX940-NEXT: ; def v[0:1]
90 ; GFX940-NEXT: ;;#ASMEND
91 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
92 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
93 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
94 ; GFX940-NEXT: s_waitcnt vmcnt(0)
95 ; GFX940-NEXT: s_setpc_b64 s[30:31]
96 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
97 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
98 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
102 define void @v_shuffle_v3bf16_v4bf16__2_u_u(ptr addrspace(1) inreg %ptr) {
103 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_u_u:
105 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
107 ; GFX900-NEXT: ;;#ASMSTART
108 ; GFX900-NEXT: ; def v[0:1]
109 ; GFX900-NEXT: ;;#ASMEND
110 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
111 ; GFX900-NEXT: s_waitcnt vmcnt(0)
112 ; GFX900-NEXT: s_setpc_b64 s[30:31]
114 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_u_u:
116 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
118 ; GFX90A-NEXT: ;;#ASMSTART
119 ; GFX90A-NEXT: ; def v[0:1]
120 ; GFX90A-NEXT: ;;#ASMEND
121 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
122 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
123 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
125 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_u_u:
127 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
129 ; GFX940-NEXT: ;;#ASMSTART
130 ; GFX940-NEXT: ; def v[0:1]
131 ; GFX940-NEXT: ;;#ASMEND
132 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
133 ; GFX940-NEXT: s_waitcnt vmcnt(0)
134 ; GFX940-NEXT: s_setpc_b64 s[30:31]
135 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
136 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
137 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
141 define void @v_shuffle_v3bf16_v4bf16__3_u_u(ptr addrspace(1) inreg %ptr) {
142 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_u_u:
144 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145 ; GFX900-NEXT: ;;#ASMSTART
146 ; GFX900-NEXT: ; def v[0:1]
147 ; GFX900-NEXT: ;;#ASMEND
148 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
149 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
150 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
151 ; GFX900-NEXT: s_waitcnt vmcnt(0)
152 ; GFX900-NEXT: s_setpc_b64 s[30:31]
154 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_u_u:
156 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX90A-NEXT: ;;#ASMSTART
158 ; GFX90A-NEXT: ; def v[0:1]
159 ; GFX90A-NEXT: ;;#ASMEND
160 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
161 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
162 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
163 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
164 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
166 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_u_u:
168 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169 ; GFX940-NEXT: ;;#ASMSTART
170 ; GFX940-NEXT: ; def v[0:1]
171 ; GFX940-NEXT: ;;#ASMEND
172 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
173 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
174 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
175 ; GFX940-NEXT: s_waitcnt vmcnt(0)
176 ; GFX940-NEXT: s_setpc_b64 s[30:31]
177 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
178 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
179 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
183 define void @v_shuffle_v3bf16_v4bf16__4_u_u(ptr addrspace(1) inreg %ptr) {
184 ; GFX9-LABEL: v_shuffle_v3bf16_v4bf16__4_u_u:
186 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX9-NEXT: s_setpc_b64 s[30:31]
188 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
189 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 poison, i32 poison>
190 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
194 define void @v_shuffle_v3bf16_v4bf16__5_u_u(ptr addrspace(1) inreg %ptr) {
195 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_u_u:
197 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198 ; GFX900-NEXT: ;;#ASMSTART
199 ; GFX900-NEXT: ; def v[0:1]
200 ; GFX900-NEXT: ;;#ASMEND
201 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
202 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
203 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
204 ; GFX900-NEXT: s_waitcnt vmcnt(0)
205 ; GFX900-NEXT: s_setpc_b64 s[30:31]
207 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_u_u:
209 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX90A-NEXT: ;;#ASMSTART
211 ; GFX90A-NEXT: ; def v[0:1]
212 ; GFX90A-NEXT: ;;#ASMEND
213 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
214 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
215 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
216 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
217 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
219 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_u_u:
221 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222 ; GFX940-NEXT: ;;#ASMSTART
223 ; GFX940-NEXT: ; def v[0:1]
224 ; GFX940-NEXT: ;;#ASMEND
225 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
226 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
227 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
228 ; GFX940-NEXT: s_waitcnt vmcnt(0)
229 ; GFX940-NEXT: s_setpc_b64 s[30:31]
230 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
231 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
232 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison>
233 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
237 define void @v_shuffle_v3bf16_v4bf16__6_u_u(ptr addrspace(1) inreg %ptr) {
238 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_u_u:
240 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
242 ; GFX900-NEXT: ;;#ASMSTART
243 ; GFX900-NEXT: ; def v[0:1]
244 ; GFX900-NEXT: ;;#ASMEND
245 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
246 ; GFX900-NEXT: s_waitcnt vmcnt(0)
247 ; GFX900-NEXT: s_setpc_b64 s[30:31]
249 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_u_u:
251 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
253 ; GFX90A-NEXT: ;;#ASMSTART
254 ; GFX90A-NEXT: ; def v[0:1]
255 ; GFX90A-NEXT: ;;#ASMEND
256 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
257 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
258 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
260 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_u_u:
262 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
264 ; GFX940-NEXT: ;;#ASMSTART
265 ; GFX940-NEXT: ; def v[0:1]
266 ; GFX940-NEXT: ;;#ASMEND
267 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
268 ; GFX940-NEXT: s_waitcnt vmcnt(0)
269 ; GFX940-NEXT: s_setpc_b64 s[30:31]
270 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
271 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
272 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison>
273 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
277 define void @v_shuffle_v3bf16_v4bf16__7_u_u(ptr addrspace(1) inreg %ptr) {
278 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_u:
280 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; GFX900-NEXT: ;;#ASMSTART
282 ; GFX900-NEXT: ; def v[0:1]
283 ; GFX900-NEXT: ;;#ASMEND
284 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
285 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
286 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
287 ; GFX900-NEXT: s_waitcnt vmcnt(0)
288 ; GFX900-NEXT: s_setpc_b64 s[30:31]
290 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_u:
292 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293 ; GFX90A-NEXT: ;;#ASMSTART
294 ; GFX90A-NEXT: ; def v[0:1]
295 ; GFX90A-NEXT: ;;#ASMEND
296 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
297 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
298 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
299 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
300 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
302 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_u:
304 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX940-NEXT: ;;#ASMSTART
306 ; GFX940-NEXT: ; def v[0:1]
307 ; GFX940-NEXT: ;;#ASMEND
308 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
309 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
310 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
311 ; GFX940-NEXT: s_waitcnt vmcnt(0)
312 ; GFX940-NEXT: s_setpc_b64 s[30:31]
313 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
314 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
315 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison>
316 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
320 define void @v_shuffle_v3bf16_v4bf16__7_0_u(ptr addrspace(1) inreg %ptr) {
321 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_u:
323 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324 ; GFX900-NEXT: ;;#ASMSTART
325 ; GFX900-NEXT: ; def v[0:1]
326 ; GFX900-NEXT: ;;#ASMEND
327 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
328 ; GFX900-NEXT: ;;#ASMSTART
329 ; GFX900-NEXT: ; def v[1:2]
330 ; GFX900-NEXT: ;;#ASMEND
331 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16
332 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
333 ; GFX900-NEXT: s_waitcnt vmcnt(0)
334 ; GFX900-NEXT: s_setpc_b64 s[30:31]
336 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_u:
338 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX90A-NEXT: ;;#ASMSTART
340 ; GFX90A-NEXT: ; def v[0:1]
341 ; GFX90A-NEXT: ;;#ASMEND
342 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
343 ; GFX90A-NEXT: ;;#ASMSTART
344 ; GFX90A-NEXT: ; def v[2:3]
345 ; GFX90A-NEXT: ;;#ASMEND
346 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
347 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
348 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
349 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
351 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_u:
353 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; GFX940-NEXT: ;;#ASMSTART
355 ; GFX940-NEXT: ; def v[0:1]
356 ; GFX940-NEXT: ;;#ASMEND
357 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
358 ; GFX940-NEXT: ;;#ASMSTART
359 ; GFX940-NEXT: ; def v[2:3]
360 ; GFX940-NEXT: ;;#ASMEND
361 ; GFX940-NEXT: s_nop 0
362 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
363 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
364 ; GFX940-NEXT: s_waitcnt vmcnt(0)
365 ; GFX940-NEXT: s_setpc_b64 s[30:31]
366 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
367 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
368 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 poison>
369 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
373 define void @v_shuffle_v3bf16_v4bf16__7_1_u(ptr addrspace(1) inreg %ptr) {
374 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_u:
376 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX900-NEXT: ;;#ASMSTART
378 ; GFX900-NEXT: ; def v[0:1]
379 ; GFX900-NEXT: ;;#ASMEND
380 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
381 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
382 ; GFX900-NEXT: ;;#ASMSTART
383 ; GFX900-NEXT: ; def v[1:2]
384 ; GFX900-NEXT: ;;#ASMEND
385 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
386 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
387 ; GFX900-NEXT: s_waitcnt vmcnt(0)
388 ; GFX900-NEXT: s_setpc_b64 s[30:31]
390 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_u:
392 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393 ; GFX90A-NEXT: ;;#ASMSTART
394 ; GFX90A-NEXT: ; def v[0:1]
395 ; GFX90A-NEXT: ;;#ASMEND
396 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
397 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
398 ; GFX90A-NEXT: ;;#ASMSTART
399 ; GFX90A-NEXT: ; def v[2:3]
400 ; GFX90A-NEXT: ;;#ASMEND
401 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
402 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
403 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
404 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
406 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_u:
408 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409 ; GFX940-NEXT: ;;#ASMSTART
410 ; GFX940-NEXT: ; def v[0:1]
411 ; GFX940-NEXT: ;;#ASMEND
412 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
413 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
414 ; GFX940-NEXT: ;;#ASMSTART
415 ; GFX940-NEXT: ; def v[2:3]
416 ; GFX940-NEXT: ;;#ASMEND
417 ; GFX940-NEXT: s_nop 0
418 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
419 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
420 ; GFX940-NEXT: s_waitcnt vmcnt(0)
421 ; GFX940-NEXT: s_setpc_b64 s[30:31]
422 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
423 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
424 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 poison>
425 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
429 define void @v_shuffle_v3bf16_v4bf16__7_2_u(ptr addrspace(1) inreg %ptr) {
430 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_u:
432 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX900-NEXT: ;;#ASMSTART
434 ; GFX900-NEXT: ; def v[0:1]
435 ; GFX900-NEXT: ;;#ASMEND
436 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
437 ; GFX900-NEXT: ;;#ASMSTART
438 ; GFX900-NEXT: ; def v[2:3]
439 ; GFX900-NEXT: ;;#ASMEND
440 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
441 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
442 ; GFX900-NEXT: s_waitcnt vmcnt(0)
443 ; GFX900-NEXT: s_setpc_b64 s[30:31]
445 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_u:
447 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448 ; GFX90A-NEXT: ;;#ASMSTART
449 ; GFX90A-NEXT: ; def v[0:1]
450 ; GFX90A-NEXT: ;;#ASMEND
451 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
452 ; GFX90A-NEXT: ;;#ASMSTART
453 ; GFX90A-NEXT: ; def v[2:3]
454 ; GFX90A-NEXT: ;;#ASMEND
455 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
456 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
457 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
458 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
460 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_u:
462 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463 ; GFX940-NEXT: ;;#ASMSTART
464 ; GFX940-NEXT: ; def v[0:1]
465 ; GFX940-NEXT: ;;#ASMEND
466 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
467 ; GFX940-NEXT: ;;#ASMSTART
468 ; GFX940-NEXT: ; def v[2:3]
469 ; GFX940-NEXT: ;;#ASMEND
470 ; GFX940-NEXT: s_nop 0
471 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
472 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
473 ; GFX940-NEXT: s_waitcnt vmcnt(0)
474 ; GFX940-NEXT: s_setpc_b64 s[30:31]
475 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
476 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
477 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 poison>
478 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
482 define void @v_shuffle_v3bf16_v4bf16__7_3_u(ptr addrspace(1) inreg %ptr) {
483 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_u:
485 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486 ; GFX900-NEXT: ;;#ASMSTART
487 ; GFX900-NEXT: ; def v[0:1]
488 ; GFX900-NEXT: ;;#ASMEND
489 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
490 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
491 ; GFX900-NEXT: ;;#ASMSTART
492 ; GFX900-NEXT: ; def v[2:3]
493 ; GFX900-NEXT: ;;#ASMEND
494 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
495 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
496 ; GFX900-NEXT: s_waitcnt vmcnt(0)
497 ; GFX900-NEXT: s_setpc_b64 s[30:31]
499 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_u:
501 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502 ; GFX90A-NEXT: ;;#ASMSTART
503 ; GFX90A-NEXT: ; def v[0:1]
504 ; GFX90A-NEXT: ;;#ASMEND
505 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
506 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
507 ; GFX90A-NEXT: ;;#ASMSTART
508 ; GFX90A-NEXT: ; def v[2:3]
509 ; GFX90A-NEXT: ;;#ASMEND
510 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
511 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
512 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
513 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
515 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_u:
517 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518 ; GFX940-NEXT: ;;#ASMSTART
519 ; GFX940-NEXT: ; def v[0:1]
520 ; GFX940-NEXT: ;;#ASMEND
521 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
522 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
523 ; GFX940-NEXT: ;;#ASMSTART
524 ; GFX940-NEXT: ; def v[2:3]
525 ; GFX940-NEXT: ;;#ASMEND
526 ; GFX940-NEXT: s_nop 0
527 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
528 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
529 ; GFX940-NEXT: s_waitcnt vmcnt(0)
530 ; GFX940-NEXT: s_setpc_b64 s[30:31]
531 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
532 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
533 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 poison>
534 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
538 define void @v_shuffle_v3bf16_v4bf16__7_4_u(ptr addrspace(1) inreg %ptr) {
539 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_u:
541 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542 ; GFX900-NEXT: ;;#ASMSTART
543 ; GFX900-NEXT: ; def v[0:1]
544 ; GFX900-NEXT: ;;#ASMEND
545 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
546 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16
547 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
548 ; GFX900-NEXT: s_waitcnt vmcnt(0)
549 ; GFX900-NEXT: s_setpc_b64 s[30:31]
551 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_u:
553 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX90A-NEXT: ;;#ASMSTART
555 ; GFX90A-NEXT: ; def v[0:1]
556 ; GFX90A-NEXT: ;;#ASMEND
557 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
558 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16
559 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
560 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
561 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
563 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_u:
565 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX940-NEXT: ;;#ASMSTART
567 ; GFX940-NEXT: ; def v[0:1]
568 ; GFX940-NEXT: ;;#ASMEND
569 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
570 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16
571 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
572 ; GFX940-NEXT: s_waitcnt vmcnt(0)
573 ; GFX940-NEXT: s_setpc_b64 s[30:31]
574 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
575 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
576 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 poison>
577 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
581 define void @v_shuffle_v3bf16_v4bf16__7_5_u(ptr addrspace(1) inreg %ptr) {
582 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_u:
584 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX900-NEXT: ;;#ASMSTART
586 ; GFX900-NEXT: ; def v[0:1]
587 ; GFX900-NEXT: ;;#ASMEND
588 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
589 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
590 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
591 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
592 ; GFX900-NEXT: s_waitcnt vmcnt(0)
593 ; GFX900-NEXT: s_setpc_b64 s[30:31]
595 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_u:
597 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; GFX90A-NEXT: ;;#ASMSTART
599 ; GFX90A-NEXT: ; def v[0:1]
600 ; GFX90A-NEXT: ;;#ASMEND
601 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
602 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
603 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
604 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
605 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
606 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
608 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_u:
610 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611 ; GFX940-NEXT: ;;#ASMSTART
612 ; GFX940-NEXT: ; def v[0:1]
613 ; GFX940-NEXT: ;;#ASMEND
614 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
615 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
616 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
617 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
618 ; GFX940-NEXT: s_waitcnt vmcnt(0)
619 ; GFX940-NEXT: s_setpc_b64 s[30:31]
620 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
621 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
622 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
623 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
627 define void @v_shuffle_v3bf16_v4bf16__7_6_u(ptr addrspace(1) inreg %ptr) {
628 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_u:
630 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631 ; GFX900-NEXT: ;;#ASMSTART
632 ; GFX900-NEXT: ; def v[0:1]
633 ; GFX900-NEXT: ;;#ASMEND
634 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
635 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16
636 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
637 ; GFX900-NEXT: s_waitcnt vmcnt(0)
638 ; GFX900-NEXT: s_setpc_b64 s[30:31]
640 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_u:
642 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643 ; GFX90A-NEXT: ;;#ASMSTART
644 ; GFX90A-NEXT: ; def v[0:1]
645 ; GFX90A-NEXT: ;;#ASMEND
646 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
647 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16
648 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
649 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
650 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
652 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_u:
654 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655 ; GFX940-NEXT: ;;#ASMSTART
656 ; GFX940-NEXT: ; def v[0:1]
657 ; GFX940-NEXT: ;;#ASMEND
658 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
659 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16
660 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
661 ; GFX940-NEXT: s_waitcnt vmcnt(0)
662 ; GFX940-NEXT: s_setpc_b64 s[30:31]
663 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
664 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
665 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 poison>
666 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
670 define void @v_shuffle_v3bf16_v4bf16__7_7_u(ptr addrspace(1) inreg %ptr) {
671 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_u:
673 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674 ; GFX900-NEXT: ;;#ASMSTART
675 ; GFX900-NEXT: ; def v[0:1]
676 ; GFX900-NEXT: ;;#ASMEND
677 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
678 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
679 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
680 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
681 ; GFX900-NEXT: s_waitcnt vmcnt(0)
682 ; GFX900-NEXT: s_setpc_b64 s[30:31]
684 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_u:
686 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
687 ; GFX90A-NEXT: ;;#ASMSTART
688 ; GFX90A-NEXT: ; def v[0:1]
689 ; GFX90A-NEXT: ;;#ASMEND
690 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
691 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
692 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
693 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
694 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
695 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
697 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_u:
699 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GFX940-NEXT: ;;#ASMSTART
701 ; GFX940-NEXT: ; def v[0:1]
702 ; GFX940-NEXT: ;;#ASMEND
703 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
704 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
705 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
706 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
707 ; GFX940-NEXT: s_waitcnt vmcnt(0)
708 ; GFX940-NEXT: s_setpc_b64 s[30:31]
709 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
710 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
711 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
712 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
716 define void @v_shuffle_v3bf16_v4bf16__7_7_0(ptr addrspace(1) inreg %ptr) {
717 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_0:
719 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720 ; GFX900-NEXT: ;;#ASMSTART
721 ; GFX900-NEXT: ; def v[0:1]
722 ; GFX900-NEXT: ;;#ASMEND
723 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
724 ; GFX900-NEXT: ;;#ASMSTART
725 ; GFX900-NEXT: ; def v[1:2]
726 ; GFX900-NEXT: ;;#ASMEND
727 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
728 ; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4
729 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
730 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
731 ; GFX900-NEXT: s_waitcnt vmcnt(0)
732 ; GFX900-NEXT: s_setpc_b64 s[30:31]
734 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_0:
736 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
738 ; GFX90A-NEXT: ;;#ASMSTART
739 ; GFX90A-NEXT: ; def v[0:1]
740 ; GFX90A-NEXT: ;;#ASMEND
741 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
742 ; GFX90A-NEXT: ;;#ASMSTART
743 ; GFX90A-NEXT: ; def v[2:3]
744 ; GFX90A-NEXT: ;;#ASMEND
745 ; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4
746 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
747 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
748 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
749 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
751 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_0:
753 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
755 ; GFX940-NEXT: ;;#ASMSTART
756 ; GFX940-NEXT: ; def v[0:1]
757 ; GFX940-NEXT: ;;#ASMEND
758 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
759 ; GFX940-NEXT: ;;#ASMSTART
760 ; GFX940-NEXT: ; def v[2:3]
761 ; GFX940-NEXT: ;;#ASMEND
762 ; GFX940-NEXT: s_nop 0
763 ; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2
764 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
765 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
766 ; GFX940-NEXT: s_waitcnt vmcnt(0)
767 ; GFX940-NEXT: s_setpc_b64 s[30:31]
768 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
769 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
770 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
771 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
775 define void @v_shuffle_v3bf16_v4bf16__7_7_1(ptr addrspace(1) inreg %ptr) {
776 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_1:
778 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX900-NEXT: ;;#ASMSTART
780 ; GFX900-NEXT: ; def v[0:1]
781 ; GFX900-NEXT: ;;#ASMEND
782 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
783 ; GFX900-NEXT: ;;#ASMSTART
784 ; GFX900-NEXT: ; def v[1:2]
785 ; GFX900-NEXT: ;;#ASMEND
786 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
787 ; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4
788 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
789 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
790 ; GFX900-NEXT: s_waitcnt vmcnt(0)
791 ; GFX900-NEXT: s_setpc_b64 s[30:31]
793 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_1:
795 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
797 ; GFX90A-NEXT: ;;#ASMSTART
798 ; GFX90A-NEXT: ; def v[0:1]
799 ; GFX90A-NEXT: ;;#ASMEND
800 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
801 ; GFX90A-NEXT: ;;#ASMSTART
802 ; GFX90A-NEXT: ; def v[2:3]
803 ; GFX90A-NEXT: ;;#ASMEND
804 ; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4
805 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
806 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
807 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
808 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
810 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_1:
812 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
814 ; GFX940-NEXT: ;;#ASMSTART
815 ; GFX940-NEXT: ; def v[0:1]
816 ; GFX940-NEXT: ;;#ASMEND
817 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
818 ; GFX940-NEXT: ;;#ASMSTART
819 ; GFX940-NEXT: ; def v[2:3]
820 ; GFX940-NEXT: ;;#ASMEND
821 ; GFX940-NEXT: s_nop 0
822 ; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2
823 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
824 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
825 ; GFX940-NEXT: s_waitcnt vmcnt(0)
826 ; GFX940-NEXT: s_setpc_b64 s[30:31]
827 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
828 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
829 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 1>
830 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
834 define void @v_shuffle_v3bf16_v4bf16__7_7_2(ptr addrspace(1) inreg %ptr) {
835 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_2:
837 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
838 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
839 ; GFX900-NEXT: ;;#ASMSTART
840 ; GFX900-NEXT: ; def v[0:1]
841 ; GFX900-NEXT: ;;#ASMEND
842 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
843 ; GFX900-NEXT: ;;#ASMSTART
844 ; GFX900-NEXT: ; def v[2:3]
845 ; GFX900-NEXT: ;;#ASMEND
846 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
847 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
848 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
849 ; GFX900-NEXT: s_waitcnt vmcnt(0)
850 ; GFX900-NEXT: s_setpc_b64 s[30:31]
852 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_2:
854 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
856 ; GFX90A-NEXT: ;;#ASMSTART
857 ; GFX90A-NEXT: ; def v[0:1]
858 ; GFX90A-NEXT: ;;#ASMEND
859 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
860 ; GFX90A-NEXT: ;;#ASMSTART
861 ; GFX90A-NEXT: ; def v[2:3]
862 ; GFX90A-NEXT: ;;#ASMEND
863 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
864 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
865 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
866 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
867 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
869 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_2:
871 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
873 ; GFX940-NEXT: ;;#ASMSTART
874 ; GFX940-NEXT: ; def v[0:1]
875 ; GFX940-NEXT: ;;#ASMEND
876 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
877 ; GFX940-NEXT: ;;#ASMSTART
878 ; GFX940-NEXT: ; def v[2:3]
879 ; GFX940-NEXT: ;;#ASMEND
880 ; GFX940-NEXT: s_nop 0
881 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
882 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
883 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
884 ; GFX940-NEXT: s_waitcnt vmcnt(0)
885 ; GFX940-NEXT: s_setpc_b64 s[30:31]
886 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
887 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
888 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 2>
889 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
893 define void @v_shuffle_v3bf16_v4bf16__7_7_3(ptr addrspace(1) inreg %ptr) {
894 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_3:
896 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
897 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
898 ; GFX900-NEXT: ;;#ASMSTART
899 ; GFX900-NEXT: ; def v[0:1]
900 ; GFX900-NEXT: ;;#ASMEND
901 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
902 ; GFX900-NEXT: ;;#ASMSTART
903 ; GFX900-NEXT: ; def v[2:3]
904 ; GFX900-NEXT: ;;#ASMEND
905 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
906 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
907 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
908 ; GFX900-NEXT: s_waitcnt vmcnt(0)
909 ; GFX900-NEXT: s_setpc_b64 s[30:31]
911 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_3:
913 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
915 ; GFX90A-NEXT: ;;#ASMSTART
916 ; GFX90A-NEXT: ; def v[0:1]
917 ; GFX90A-NEXT: ;;#ASMEND
918 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
919 ; GFX90A-NEXT: ;;#ASMSTART
920 ; GFX90A-NEXT: ; def v[2:3]
921 ; GFX90A-NEXT: ;;#ASMEND
922 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
923 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
924 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
925 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
926 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
928 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_3:
930 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
932 ; GFX940-NEXT: ;;#ASMSTART
933 ; GFX940-NEXT: ; def v[0:1]
934 ; GFX940-NEXT: ;;#ASMEND
935 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
936 ; GFX940-NEXT: ;;#ASMSTART
937 ; GFX940-NEXT: ; def v[2:3]
938 ; GFX940-NEXT: ;;#ASMEND
939 ; GFX940-NEXT: s_nop 0
940 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
941 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
942 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
943 ; GFX940-NEXT: s_waitcnt vmcnt(0)
944 ; GFX940-NEXT: s_setpc_b64 s[30:31]
945 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
946 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
947 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 3>
948 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
952 define void @v_shuffle_v3bf16_v4bf16__7_7_4(ptr addrspace(1) inreg %ptr) {
953 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_4:
955 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
957 ; GFX900-NEXT: ;;#ASMSTART
958 ; GFX900-NEXT: ; def v[0:1]
959 ; GFX900-NEXT: ;;#ASMEND
960 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
961 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
962 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
963 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
964 ; GFX900-NEXT: s_waitcnt vmcnt(0)
965 ; GFX900-NEXT: s_setpc_b64 s[30:31]
967 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_4:
969 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
971 ; GFX90A-NEXT: ;;#ASMSTART
972 ; GFX90A-NEXT: ; def v[0:1]
973 ; GFX90A-NEXT: ;;#ASMEND
974 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
975 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
976 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
977 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
978 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
979 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
981 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_4:
983 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
985 ; GFX940-NEXT: ;;#ASMSTART
986 ; GFX940-NEXT: ; def v[0:1]
987 ; GFX940-NEXT: ;;#ASMEND
988 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
989 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
990 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
991 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
992 ; GFX940-NEXT: s_waitcnt vmcnt(0)
993 ; GFX940-NEXT: s_setpc_b64 s[30:31]
994 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
995 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
996 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
997 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1001 define void @v_shuffle_v3bf16_v4bf16__7_7_5(ptr addrspace(1) inreg %ptr) {
1002 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_5:
1004 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1005 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1006 ; GFX900-NEXT: ;;#ASMSTART
1007 ; GFX900-NEXT: ; def v[0:1]
1008 ; GFX900-NEXT: ;;#ASMEND
1009 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1010 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
1011 ; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
1012 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1013 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1014 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1016 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_5:
1018 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1020 ; GFX90A-NEXT: ;;#ASMSTART
1021 ; GFX90A-NEXT: ; def v[0:1]
1022 ; GFX90A-NEXT: ;;#ASMEND
1023 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1024 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
1025 ; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
1026 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1027 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1028 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1030 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_5:
1032 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1034 ; GFX940-NEXT: ;;#ASMSTART
1035 ; GFX940-NEXT: ; def v[0:1]
1036 ; GFX940-NEXT: ;;#ASMEND
1037 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1038 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
1039 ; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
1040 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1041 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1042 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1043 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1044 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1045 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
1046 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1050 define void @v_shuffle_v3bf16_v4bf16__7_7_6(ptr addrspace(1) inreg %ptr) {
1051 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_6:
1053 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1055 ; GFX900-NEXT: ;;#ASMSTART
1056 ; GFX900-NEXT: ; def v[0:1]
1057 ; GFX900-NEXT: ;;#ASMEND
1058 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1059 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
1060 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
1061 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1062 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1063 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1065 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_6:
1067 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1069 ; GFX90A-NEXT: ;;#ASMSTART
1070 ; GFX90A-NEXT: ; def v[0:1]
1071 ; GFX90A-NEXT: ;;#ASMEND
1072 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1073 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
1074 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
1075 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1076 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1077 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1079 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_6:
1081 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1083 ; GFX940-NEXT: ;;#ASMSTART
1084 ; GFX940-NEXT: ; def v[0:1]
1085 ; GFX940-NEXT: ;;#ASMEND
1086 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1087 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
1088 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
1089 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1090 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1091 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1092 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1093 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1094 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
1095 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1099 define void @v_shuffle_v3bf16_v4bf16__7_7_7(ptr addrspace(1) inreg %ptr) {
1100 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_7_7:
1102 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103 ; GFX900-NEXT: ;;#ASMSTART
1104 ; GFX900-NEXT: ; def v[0:1]
1105 ; GFX900-NEXT: ;;#ASMEND
1106 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1107 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1108 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1109 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
1110 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1111 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1112 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1113 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1115 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_7_7:
1117 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GFX90A-NEXT: ;;#ASMSTART
1119 ; GFX90A-NEXT: ; def v[0:1]
1120 ; GFX90A-NEXT: ;;#ASMEND
1121 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1122 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1123 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1124 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
1125 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1126 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1127 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1128 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1130 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_7:
1132 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133 ; GFX940-NEXT: ;;#ASMSTART
1134 ; GFX940-NEXT: ; def v[0:1]
1135 ; GFX940-NEXT: ;;#ASMEND
1136 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1137 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1138 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1139 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
1140 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1141 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1142 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1143 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1144 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1145 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1146 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
1147 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1151 define void @v_shuffle_v3bf16_v4bf16__u_0_0(ptr addrspace(1) inreg %ptr) {
1152 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__u_0_0:
1154 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1155 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1156 ; GFX900-NEXT: ;;#ASMSTART
1157 ; GFX900-NEXT: ; def v[0:1]
1158 ; GFX900-NEXT: ;;#ASMEND
1159 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1160 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1161 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1162 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1163 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1165 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__u_0_0:
1167 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1169 ; GFX90A-NEXT: ;;#ASMSTART
1170 ; GFX90A-NEXT: ; def v[0:1]
1171 ; GFX90A-NEXT: ;;#ASMEND
1172 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1173 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1174 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1175 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1176 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1178 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_0_0:
1180 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1181 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1182 ; GFX940-NEXT: ;;#ASMSTART
1183 ; GFX940-NEXT: ; def v[0:1]
1184 ; GFX940-NEXT: ;;#ASMEND
1185 ; GFX940-NEXT: s_nop 0
1186 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1187 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1188 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1189 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1190 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1191 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1192 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 0, i32 0>
1193 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1197 define void @v_shuffle_v3bf16_v4bf16__0_0_0(ptr addrspace(1) inreg %ptr) {
1198 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_0_0:
1200 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1201 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1202 ; GFX900-NEXT: ;;#ASMSTART
1203 ; GFX900-NEXT: ; def v[0:1]
1204 ; GFX900-NEXT: ;;#ASMEND
1205 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1206 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1207 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1208 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1209 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1210 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1212 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_0_0:
1214 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1216 ; GFX90A-NEXT: ;;#ASMSTART
1217 ; GFX90A-NEXT: ; def v[0:1]
1218 ; GFX90A-NEXT: ;;#ASMEND
1219 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1220 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1221 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1222 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1223 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1224 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1226 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_0_0:
1228 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1229 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1230 ; GFX940-NEXT: ;;#ASMSTART
1231 ; GFX940-NEXT: ; def v[0:1]
1232 ; GFX940-NEXT: ;;#ASMEND
1233 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1234 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1235 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1236 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1237 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1238 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1239 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1240 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> zeroinitializer
1241 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1245 define void @v_shuffle_v3bf16_v4bf16__1_0_0(ptr addrspace(1) inreg %ptr) {
1246 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_0_0:
1248 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1250 ; GFX900-NEXT: ;;#ASMSTART
1251 ; GFX900-NEXT: ; def v[0:1]
1252 ; GFX900-NEXT: ;;#ASMEND
1253 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16
1254 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1255 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1256 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1257 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1259 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_0_0:
1261 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1263 ; GFX90A-NEXT: ;;#ASMSTART
1264 ; GFX90A-NEXT: ; def v[0:1]
1265 ; GFX90A-NEXT: ;;#ASMEND
1266 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16
1267 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1268 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1269 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1270 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1272 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_0_0:
1274 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1276 ; GFX940-NEXT: ;;#ASMSTART
1277 ; GFX940-NEXT: ; def v[0:1]
1278 ; GFX940-NEXT: ;;#ASMEND
1279 ; GFX940-NEXT: s_nop 0
1280 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16
1281 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1282 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1283 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1284 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1285 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1286 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 0, i32 0>
1287 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1291 define void @v_shuffle_v3bf16_v4bf16__2_0_0(ptr addrspace(1) inreg %ptr) {
1292 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_0_0:
1294 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1296 ; GFX900-NEXT: ;;#ASMSTART
1297 ; GFX900-NEXT: ; def v[0:1]
1298 ; GFX900-NEXT: ;;#ASMEND
1299 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1300 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
1301 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1302 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1303 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1304 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1306 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_0_0:
1308 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1310 ; GFX90A-NEXT: ;;#ASMSTART
1311 ; GFX90A-NEXT: ; def v[0:1]
1312 ; GFX90A-NEXT: ;;#ASMEND
1313 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1314 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
1315 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1316 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1317 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1318 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1320 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_0_0:
1322 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1323 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1324 ; GFX940-NEXT: ;;#ASMSTART
1325 ; GFX940-NEXT: ; def v[0:1]
1326 ; GFX940-NEXT: ;;#ASMEND
1327 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1328 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
1329 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1330 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1331 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1332 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1333 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1334 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 0>
1335 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1339 define void @v_shuffle_v3bf16_v4bf16__3_0_0(ptr addrspace(1) inreg %ptr) {
1340 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_0_0:
1342 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1344 ; GFX900-NEXT: ;;#ASMSTART
1345 ; GFX900-NEXT: ; def v[0:1]
1346 ; GFX900-NEXT: ;;#ASMEND
1347 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16
1348 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1349 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1350 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1351 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_0_0:
1355 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1357 ; GFX90A-NEXT: ;;#ASMSTART
1358 ; GFX90A-NEXT: ; def v[0:1]
1359 ; GFX90A-NEXT: ;;#ASMEND
1360 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16
1361 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1362 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1363 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1364 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1366 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_0_0:
1368 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1369 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1370 ; GFX940-NEXT: ;;#ASMSTART
1371 ; GFX940-NEXT: ; def v[0:1]
1372 ; GFX940-NEXT: ;;#ASMEND
1373 ; GFX940-NEXT: s_nop 0
1374 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16
1375 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1376 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1377 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1378 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1379 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1380 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 0, i32 0>
1381 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1385 define void @v_shuffle_v3bf16_v4bf16__4_0_0(ptr addrspace(1) inreg %ptr) {
1386 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__4_0_0:
1388 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1390 ; GFX900-NEXT: ;;#ASMSTART
1391 ; GFX900-NEXT: ; def v[0:1]
1392 ; GFX900-NEXT: ;;#ASMEND
1393 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1394 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
1395 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1396 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1397 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1399 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__4_0_0:
1401 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1403 ; GFX90A-NEXT: ;;#ASMSTART
1404 ; GFX90A-NEXT: ; def v[0:1]
1405 ; GFX90A-NEXT: ;;#ASMEND
1406 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1407 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
1408 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1409 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1410 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1412 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_0_0:
1414 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1416 ; GFX940-NEXT: ;;#ASMSTART
1417 ; GFX940-NEXT: ; def v[0:1]
1418 ; GFX940-NEXT: ;;#ASMEND
1419 ; GFX940-NEXT: s_nop 0
1420 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0
1421 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1422 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1423 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1424 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1425 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1426 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 0, i32 0>
1427 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1431 define void @v_shuffle_v3bf16_v4bf16__5_0_0(ptr addrspace(1) inreg %ptr) {
1432 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_0_0:
1434 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435 ; GFX900-NEXT: ;;#ASMSTART
1436 ; GFX900-NEXT: ; def v[0:1]
1437 ; GFX900-NEXT: ;;#ASMEND
1438 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1439 ; GFX900-NEXT: ;;#ASMSTART
1440 ; GFX900-NEXT: ; def v[1:2]
1441 ; GFX900-NEXT: ;;#ASMEND
1442 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16
1443 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1444 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1445 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1446 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1448 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_0_0:
1450 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1451 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1452 ; GFX90A-NEXT: ;;#ASMSTART
1453 ; GFX90A-NEXT: ; def v[0:1]
1454 ; GFX90A-NEXT: ;;#ASMEND
1455 ; GFX90A-NEXT: ;;#ASMSTART
1456 ; GFX90A-NEXT: ; def v[2:3]
1457 ; GFX90A-NEXT: ;;#ASMEND
1458 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v2, 16
1459 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1460 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1461 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1462 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1464 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_0_0:
1466 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1468 ; GFX940-NEXT: ;;#ASMSTART
1469 ; GFX940-NEXT: ; def v[0:1]
1470 ; GFX940-NEXT: ;;#ASMEND
1471 ; GFX940-NEXT: ;;#ASMSTART
1472 ; GFX940-NEXT: ; def v[2:3]
1473 ; GFX940-NEXT: ;;#ASMEND
1474 ; GFX940-NEXT: s_nop 0
1475 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16
1476 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1477 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1478 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1479 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1480 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1481 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1482 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
1483 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1487 define void @v_shuffle_v3bf16_v4bf16__6_0_0(ptr addrspace(1) inreg %ptr) {
1488 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_0_0:
1490 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491 ; GFX900-NEXT: ;;#ASMSTART
1492 ; GFX900-NEXT: ; def v[0:1]
1493 ; GFX900-NEXT: ;;#ASMEND
1494 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1495 ; GFX900-NEXT: ;;#ASMSTART
1496 ; GFX900-NEXT: ; def v[1:2]
1497 ; GFX900-NEXT: ;;#ASMEND
1498 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1499 ; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4
1500 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1501 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1502 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1503 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1505 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_0_0:
1507 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1509 ; GFX90A-NEXT: ;;#ASMSTART
1510 ; GFX90A-NEXT: ; def v[0:1]
1511 ; GFX90A-NEXT: ;;#ASMEND
1512 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1513 ; GFX90A-NEXT: ;;#ASMSTART
1514 ; GFX90A-NEXT: ; def v[2:3]
1515 ; GFX90A-NEXT: ;;#ASMEND
1516 ; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4
1517 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1518 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1519 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1520 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1522 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_0_0:
1524 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1526 ; GFX940-NEXT: ;;#ASMSTART
1527 ; GFX940-NEXT: ; def v[0:1]
1528 ; GFX940-NEXT: ;;#ASMEND
1529 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1530 ; GFX940-NEXT: ;;#ASMSTART
1531 ; GFX940-NEXT: ; def v[2:3]
1532 ; GFX940-NEXT: ;;#ASMEND
1533 ; GFX940-NEXT: s_nop 0
1534 ; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2
1535 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1536 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1537 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1538 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1539 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1540 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1541 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
1542 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1546 define void @v_shuffle_v3bf16_v4bf16__7_0_0(ptr addrspace(1) inreg %ptr) {
1547 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_0:
1549 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1550 ; GFX900-NEXT: ;;#ASMSTART
1551 ; GFX900-NEXT: ; def v[0:1]
1552 ; GFX900-NEXT: ;;#ASMEND
1553 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1554 ; GFX900-NEXT: ;;#ASMSTART
1555 ; GFX900-NEXT: ; def v[1:2]
1556 ; GFX900-NEXT: ;;#ASMEND
1557 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v2, 16
1558 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1559 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1560 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1561 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1563 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_0:
1565 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1567 ; GFX90A-NEXT: ;;#ASMSTART
1568 ; GFX90A-NEXT: ; def v[0:1]
1569 ; GFX90A-NEXT: ;;#ASMEND
1570 ; GFX90A-NEXT: ;;#ASMSTART
1571 ; GFX90A-NEXT: ; def v[2:3]
1572 ; GFX90A-NEXT: ;;#ASMEND
1573 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v3, 16
1574 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1575 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1576 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1577 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1579 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_0:
1581 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1583 ; GFX940-NEXT: ;;#ASMSTART
1584 ; GFX940-NEXT: ; def v[0:1]
1585 ; GFX940-NEXT: ;;#ASMEND
1586 ; GFX940-NEXT: ;;#ASMSTART
1587 ; GFX940-NEXT: ; def v[2:3]
1588 ; GFX940-NEXT: ;;#ASMEND
1589 ; GFX940-NEXT: s_nop 0
1590 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16
1591 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1592 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1593 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1594 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1595 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1596 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1597 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
1598 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1602 define void @v_shuffle_v3bf16_v4bf16__7_u_0(ptr addrspace(1) inreg %ptr) {
1603 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_0:
1605 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606 ; GFX900-NEXT: ;;#ASMSTART
1607 ; GFX900-NEXT: ; def v[0:1]
1608 ; GFX900-NEXT: ;;#ASMEND
1609 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1610 ; GFX900-NEXT: ;;#ASMSTART
1611 ; GFX900-NEXT: ; def v[1:2]
1612 ; GFX900-NEXT: ;;#ASMEND
1613 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v2, 16
1614 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1615 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1616 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1617 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1619 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_0:
1621 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1623 ; GFX90A-NEXT: ;;#ASMSTART
1624 ; GFX90A-NEXT: ; def v[0:1]
1625 ; GFX90A-NEXT: ;;#ASMEND
1626 ; GFX90A-NEXT: ;;#ASMSTART
1627 ; GFX90A-NEXT: ; def v[2:3]
1628 ; GFX90A-NEXT: ;;#ASMEND
1629 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v3, 16
1630 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1631 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1632 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1633 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1635 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_0:
1637 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1639 ; GFX940-NEXT: ;;#ASMSTART
1640 ; GFX940-NEXT: ; def v[0:1]
1641 ; GFX940-NEXT: ;;#ASMEND
1642 ; GFX940-NEXT: ;;#ASMSTART
1643 ; GFX940-NEXT: ; def v[2:3]
1644 ; GFX940-NEXT: ;;#ASMEND
1645 ; GFX940-NEXT: s_nop 0
1646 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16
1647 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1648 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1649 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1650 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1651 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1652 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1653 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
1654 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1658 define void @v_shuffle_v3bf16_v4bf16__7_1_0(ptr addrspace(1) inreg %ptr) {
1659 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_0:
1661 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662 ; GFX900-NEXT: ;;#ASMSTART
1663 ; GFX900-NEXT: ; def v[0:1]
1664 ; GFX900-NEXT: ;;#ASMEND
1665 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1666 ; GFX900-NEXT: ;;#ASMSTART
1667 ; GFX900-NEXT: ; def v[1:2]
1668 ; GFX900-NEXT: ;;#ASMEND
1669 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1670 ; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4
1671 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1672 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1673 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1674 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1676 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_0:
1678 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1680 ; GFX90A-NEXT: ;;#ASMSTART
1681 ; GFX90A-NEXT: ; def v[0:1]
1682 ; GFX90A-NEXT: ;;#ASMEND
1683 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1684 ; GFX90A-NEXT: ;;#ASMSTART
1685 ; GFX90A-NEXT: ; def v[2:3]
1686 ; GFX90A-NEXT: ;;#ASMEND
1687 ; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4
1688 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1689 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1690 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1691 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1693 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_0:
1695 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1697 ; GFX940-NEXT: ;;#ASMSTART
1698 ; GFX940-NEXT: ; def v[0:1]
1699 ; GFX940-NEXT: ;;#ASMEND
1700 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1701 ; GFX940-NEXT: ;;#ASMSTART
1702 ; GFX940-NEXT: ; def v[2:3]
1703 ; GFX940-NEXT: ;;#ASMEND
1704 ; GFX940-NEXT: s_nop 0
1705 ; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2
1706 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1707 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1708 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1709 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1710 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1711 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1712 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
1713 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1717 define void @v_shuffle_v3bf16_v4bf16__7_2_0(ptr addrspace(1) inreg %ptr) {
1718 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_0:
1720 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1721 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
1722 ; GFX900-NEXT: ;;#ASMSTART
1723 ; GFX900-NEXT: ; def v[0:1]
1724 ; GFX900-NEXT: ;;#ASMEND
1725 ; GFX900-NEXT: ;;#ASMSTART
1726 ; GFX900-NEXT: ; def v[2:3]
1727 ; GFX900-NEXT: ;;#ASMEND
1728 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16
1729 ; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4
1730 ; GFX900-NEXT: global_store_dword v4, v1, s[16:17]
1731 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1732 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1734 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_0:
1736 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1738 ; GFX90A-NEXT: ;;#ASMSTART
1739 ; GFX90A-NEXT: ; def v[0:1]
1740 ; GFX90A-NEXT: ;;#ASMEND
1741 ; GFX90A-NEXT: ;;#ASMSTART
1742 ; GFX90A-NEXT: ; def v[2:3]
1743 ; GFX90A-NEXT: ;;#ASMEND
1744 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16
1745 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1746 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1747 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1748 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1750 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_0:
1752 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1754 ; GFX940-NEXT: ;;#ASMSTART
1755 ; GFX940-NEXT: ; def v[0:1]
1756 ; GFX940-NEXT: ;;#ASMEND
1757 ; GFX940-NEXT: ;;#ASMSTART
1758 ; GFX940-NEXT: ; def v[2:3]
1759 ; GFX940-NEXT: ;;#ASMEND
1760 ; GFX940-NEXT: s_nop 0
1761 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16
1762 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1763 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1764 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1765 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1766 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1767 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1768 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
1769 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1773 define void @v_shuffle_v3bf16_v4bf16__7_3_0(ptr addrspace(1) inreg %ptr) {
1774 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_0:
1776 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1777 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
1778 ; GFX900-NEXT: ;;#ASMSTART
1779 ; GFX900-NEXT: ; def v[0:1]
1780 ; GFX900-NEXT: ;;#ASMEND
1781 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1782 ; GFX900-NEXT: ;;#ASMSTART
1783 ; GFX900-NEXT: ; def v[2:3]
1784 ; GFX900-NEXT: ;;#ASMEND
1785 ; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4
1786 ; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4
1787 ; GFX900-NEXT: global_store_dword v4, v1, s[16:17]
1788 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1789 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1791 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_0:
1793 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1794 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1795 ; GFX90A-NEXT: ;;#ASMSTART
1796 ; GFX90A-NEXT: ; def v[0:1]
1797 ; GFX90A-NEXT: ;;#ASMEND
1798 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1799 ; GFX90A-NEXT: ;;#ASMSTART
1800 ; GFX90A-NEXT: ; def v[2:3]
1801 ; GFX90A-NEXT: ;;#ASMEND
1802 ; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4
1803 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1804 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1805 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1806 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1808 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_0:
1810 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1811 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1812 ; GFX940-NEXT: ;;#ASMSTART
1813 ; GFX940-NEXT: ; def v[0:1]
1814 ; GFX940-NEXT: ;;#ASMEND
1815 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1816 ; GFX940-NEXT: ;;#ASMSTART
1817 ; GFX940-NEXT: ; def v[2:3]
1818 ; GFX940-NEXT: ;;#ASMEND
1819 ; GFX940-NEXT: s_nop 0
1820 ; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2
1821 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1822 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1823 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1824 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1825 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1826 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1827 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
1828 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1832 define void @v_shuffle_v3bf16_v4bf16__7_4_0(ptr addrspace(1) inreg %ptr) {
1833 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_0:
1835 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1836 ; GFX900-NEXT: ;;#ASMSTART
1837 ; GFX900-NEXT: ; def v[0:1]
1838 ; GFX900-NEXT: ;;#ASMEND
1839 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1840 ; GFX900-NEXT: ;;#ASMSTART
1841 ; GFX900-NEXT: ; def v[1:2]
1842 ; GFX900-NEXT: ;;#ASMEND
1843 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16
1844 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1845 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1846 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1847 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1849 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_0:
1851 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1853 ; GFX90A-NEXT: ;;#ASMSTART
1854 ; GFX90A-NEXT: ; def v[0:1]
1855 ; GFX90A-NEXT: ;;#ASMEND
1856 ; GFX90A-NEXT: ;;#ASMSTART
1857 ; GFX90A-NEXT: ; def v[2:3]
1858 ; GFX90A-NEXT: ;;#ASMEND
1859 ; GFX90A-NEXT: v_alignbit_b32 v1, v2, v3, 16
1860 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1861 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1862 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1863 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1865 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_0:
1867 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1868 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1869 ; GFX940-NEXT: ;;#ASMSTART
1870 ; GFX940-NEXT: ; def v[0:1]
1871 ; GFX940-NEXT: ;;#ASMEND
1872 ; GFX940-NEXT: ;;#ASMSTART
1873 ; GFX940-NEXT: ; def v[2:3]
1874 ; GFX940-NEXT: ;;#ASMEND
1875 ; GFX940-NEXT: s_nop 0
1876 ; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16
1877 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1878 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1879 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1880 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1881 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1882 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1883 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
1884 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1888 define void @v_shuffle_v3bf16_v4bf16__7_5_0(ptr addrspace(1) inreg %ptr) {
1889 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_0:
1891 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1892 ; GFX900-NEXT: ;;#ASMSTART
1893 ; GFX900-NEXT: ; def v[0:1]
1894 ; GFX900-NEXT: ;;#ASMEND
1895 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1896 ; GFX900-NEXT: ;;#ASMSTART
1897 ; GFX900-NEXT: ; def v[1:2]
1898 ; GFX900-NEXT: ;;#ASMEND
1899 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1900 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
1901 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1902 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1903 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1904 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1906 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_0:
1908 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1910 ; GFX90A-NEXT: ;;#ASMSTART
1911 ; GFX90A-NEXT: ; def v[0:1]
1912 ; GFX90A-NEXT: ;;#ASMEND
1913 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1914 ; GFX90A-NEXT: ;;#ASMSTART
1915 ; GFX90A-NEXT: ; def v[2:3]
1916 ; GFX90A-NEXT: ;;#ASMEND
1917 ; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4
1918 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1919 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1920 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1921 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1923 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_0:
1925 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1927 ; GFX940-NEXT: ;;#ASMSTART
1928 ; GFX940-NEXT: ; def v[0:1]
1929 ; GFX940-NEXT: ;;#ASMEND
1930 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1931 ; GFX940-NEXT: ;;#ASMSTART
1932 ; GFX940-NEXT: ; def v[2:3]
1933 ; GFX940-NEXT: ;;#ASMEND
1934 ; GFX940-NEXT: s_nop 0
1935 ; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2
1936 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1937 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1938 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1939 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1940 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1941 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1942 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
1943 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
1947 define void @v_shuffle_v3bf16_v4bf16__7_6_0(ptr addrspace(1) inreg %ptr) {
1948 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_0:
1950 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951 ; GFX900-NEXT: ;;#ASMSTART
1952 ; GFX900-NEXT: ; def v[0:1]
1953 ; GFX900-NEXT: ;;#ASMEND
1954 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1955 ; GFX900-NEXT: ;;#ASMSTART
1956 ; GFX900-NEXT: ; def v[1:2]
1957 ; GFX900-NEXT: ;;#ASMEND
1958 ; GFX900-NEXT: v_alignbit_b32 v1, v2, v2, 16
1959 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
1960 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
1961 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1962 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1964 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_0:
1966 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1967 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1968 ; GFX90A-NEXT: ;;#ASMSTART
1969 ; GFX90A-NEXT: ; def v[0:1]
1970 ; GFX90A-NEXT: ;;#ASMEND
1971 ; GFX90A-NEXT: ;;#ASMSTART
1972 ; GFX90A-NEXT: ; def v[2:3]
1973 ; GFX90A-NEXT: ;;#ASMEND
1974 ; GFX90A-NEXT: v_alignbit_b32 v1, v3, v3, 16
1975 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
1976 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
1977 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1978 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1980 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_0:
1982 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1983 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1984 ; GFX940-NEXT: ;;#ASMSTART
1985 ; GFX940-NEXT: ; def v[0:1]
1986 ; GFX940-NEXT: ;;#ASMEND
1987 ; GFX940-NEXT: ;;#ASMSTART
1988 ; GFX940-NEXT: ; def v[2:3]
1989 ; GFX940-NEXT: ;;#ASMEND
1990 ; GFX940-NEXT: s_nop 0
1991 ; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16
1992 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1993 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
1994 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1995 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1996 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
1997 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
1998 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
1999 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2003 define void @v_shuffle_v3bf16_v4bf16__u_1_1(ptr addrspace(1) inreg %ptr) {
2004 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__u_1_1:
2006 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2008 ; GFX900-NEXT: ;;#ASMSTART
2009 ; GFX900-NEXT: ; def v[0:1]
2010 ; GFX900-NEXT: ;;#ASMEND
2011 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2012 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2013 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
2014 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2015 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2017 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__u_1_1:
2019 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2020 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2021 ; GFX90A-NEXT: ;;#ASMSTART
2022 ; GFX90A-NEXT: ; def v[0:1]
2023 ; GFX90A-NEXT: ;;#ASMEND
2024 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2025 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2026 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
2027 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2028 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2030 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_1_1:
2032 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2034 ; GFX940-NEXT: ;;#ASMSTART
2035 ; GFX940-NEXT: ; def v[0:1]
2036 ; GFX940-NEXT: ;;#ASMEND
2037 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2038 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2039 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2040 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2041 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2042 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2043 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 1, i32 1>
2044 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2048 define void @v_shuffle_v3bf16_v4bf16__0_1_1(ptr addrspace(1) inreg %ptr) {
2049 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_1_1:
2051 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2053 ; GFX900-NEXT: ;;#ASMSTART
2054 ; GFX900-NEXT: ; def v[0:1]
2055 ; GFX900-NEXT: ;;#ASMEND
2056 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2057 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2058 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
2059 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2060 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2062 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_1_1:
2064 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2066 ; GFX90A-NEXT: ;;#ASMSTART
2067 ; GFX90A-NEXT: ; def v[0:1]
2068 ; GFX90A-NEXT: ;;#ASMEND
2069 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2070 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2071 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
2072 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2073 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2075 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_1_1:
2077 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2078 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2079 ; GFX940-NEXT: ;;#ASMSTART
2080 ; GFX940-NEXT: ; def v[0:1]
2081 ; GFX940-NEXT: ;;#ASMEND
2082 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2083 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2084 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2085 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2086 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2087 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2088 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 1>
2089 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2093 define void @v_shuffle_v3bf16_v4bf16__1_1_1(ptr addrspace(1) inreg %ptr) {
2094 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_1_1:
2096 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2097 ; GFX900-NEXT: ;;#ASMSTART
2098 ; GFX900-NEXT: ; def v[0:1]
2099 ; GFX900-NEXT: ;;#ASMEND
2100 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2101 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2102 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
2103 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2104 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
2105 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
2106 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2107 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2109 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_1_1:
2111 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2112 ; GFX90A-NEXT: ;;#ASMSTART
2113 ; GFX90A-NEXT: ; def v[0:1]
2114 ; GFX90A-NEXT: ;;#ASMEND
2115 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2116 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2117 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
2118 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2119 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
2120 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
2121 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2122 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2124 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_1_1:
2126 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2127 ; GFX940-NEXT: ;;#ASMSTART
2128 ; GFX940-NEXT: ; def v[0:1]
2129 ; GFX940-NEXT: ;;#ASMEND
2130 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2131 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2132 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
2133 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2134 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
2135 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2136 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2137 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2138 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2139 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
2140 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2144 define void @v_shuffle_v3bf16_v4bf16__2_1_1(ptr addrspace(1) inreg %ptr) {
2145 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_1_1:
2147 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2148 ; GFX900-NEXT: ;;#ASMSTART
2149 ; GFX900-NEXT: ; def v[0:1]
2150 ; GFX900-NEXT: ;;#ASMEND
2151 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2152 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2153 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0
2154 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2155 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
2156 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
2157 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2158 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2160 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_1_1:
2162 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2163 ; GFX90A-NEXT: ;;#ASMSTART
2164 ; GFX90A-NEXT: ; def v[0:1]
2165 ; GFX90A-NEXT: ;;#ASMEND
2166 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2167 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2168 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0
2169 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2170 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
2171 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
2172 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2173 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2175 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_1_1:
2177 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2178 ; GFX940-NEXT: ;;#ASMSTART
2179 ; GFX940-NEXT: ; def v[0:1]
2180 ; GFX940-NEXT: ;;#ASMEND
2181 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2182 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2183 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0
2184 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2185 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2186 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
2187 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2188 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2189 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2190 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 1, i32 1>
2191 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2195 define void @v_shuffle_v3bf16_v4bf16__3_1_1(ptr addrspace(1) inreg %ptr) {
2196 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_1_1:
2198 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2199 ; GFX900-NEXT: ;;#ASMSTART
2200 ; GFX900-NEXT: ; def v[0:1]
2201 ; GFX900-NEXT: ;;#ASMEND
2202 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2203 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2204 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
2205 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2206 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
2207 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
2208 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2209 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2211 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_1_1:
2213 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2214 ; GFX90A-NEXT: ;;#ASMSTART
2215 ; GFX90A-NEXT: ; def v[0:1]
2216 ; GFX90A-NEXT: ;;#ASMEND
2217 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2218 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2219 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
2220 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2221 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
2222 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
2223 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2224 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2226 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_1_1:
2228 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2229 ; GFX940-NEXT: ;;#ASMSTART
2230 ; GFX940-NEXT: ; def v[0:1]
2231 ; GFX940-NEXT: ;;#ASMEND
2232 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2233 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2234 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
2235 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2236 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
2237 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2238 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2239 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2240 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2241 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 1, i32 1>
2242 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2246 define void @v_shuffle_v3bf16_v4bf16__4_1_1(ptr addrspace(1) inreg %ptr) {
2247 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__4_1_1:
2249 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2251 ; GFX900-NEXT: ;;#ASMSTART
2252 ; GFX900-NEXT: ; def v[0:1]
2253 ; GFX900-NEXT: ;;#ASMEND
2254 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2255 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2256 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
2257 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2258 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2260 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__4_1_1:
2262 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2264 ; GFX90A-NEXT: ;;#ASMSTART
2265 ; GFX90A-NEXT: ; def v[0:1]
2266 ; GFX90A-NEXT: ;;#ASMEND
2267 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2268 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2269 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
2270 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2271 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2273 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_1_1:
2275 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2276 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2277 ; GFX940-NEXT: ;;#ASMSTART
2278 ; GFX940-NEXT: ; def v[0:1]
2279 ; GFX940-NEXT: ;;#ASMEND
2280 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2281 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2282 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2283 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2284 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2285 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2286 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 1, i32 1>
2287 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2291 define void @v_shuffle_v3bf16_v4bf16__5_1_1(ptr addrspace(1) inreg %ptr) {
2292 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_1_1:
2294 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295 ; GFX900-NEXT: ;;#ASMSTART
2296 ; GFX900-NEXT: ; def v[0:1]
2297 ; GFX900-NEXT: ;;#ASMEND
2298 ; GFX900-NEXT: ;;#ASMSTART
2299 ; GFX900-NEXT: ; def v[1:2]
2300 ; GFX900-NEXT: ;;#ASMEND
2301 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2302 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2303 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
2304 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2305 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2306 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
2307 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2308 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2310 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_1_1:
2312 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313 ; GFX90A-NEXT: ;;#ASMSTART
2314 ; GFX90A-NEXT: ; def v[0:1]
2315 ; GFX90A-NEXT: ;;#ASMEND
2316 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2317 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2318 ; GFX90A-NEXT: ;;#ASMSTART
2319 ; GFX90A-NEXT: ; def v[2:3]
2320 ; GFX90A-NEXT: ;;#ASMEND
2321 ; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4
2322 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2323 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2324 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
2325 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2326 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2328 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_1_1:
2330 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2331 ; GFX940-NEXT: ;;#ASMSTART
2332 ; GFX940-NEXT: ; def v[0:1]
2333 ; GFX940-NEXT: ;;#ASMEND
2334 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2335 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2336 ; GFX940-NEXT: ;;#ASMSTART
2337 ; GFX940-NEXT: ; def v[2:3]
2338 ; GFX940-NEXT: ;;#ASMEND
2339 ; GFX940-NEXT: s_nop 0
2340 ; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2
2341 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2342 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2343 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2344 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2345 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2346 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2347 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2348 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
2349 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2353 define void @v_shuffle_v3bf16_v4bf16__6_1_1(ptr addrspace(1) inreg %ptr) {
2354 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_1_1:
2356 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2357 ; GFX900-NEXT: ;;#ASMSTART
2358 ; GFX900-NEXT: ; def v[0:1]
2359 ; GFX900-NEXT: ;;#ASMEND
2360 ; GFX900-NEXT: ;;#ASMSTART
2361 ; GFX900-NEXT: ; def v[1:2]
2362 ; GFX900-NEXT: ;;#ASMEND
2363 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2364 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2365 ; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0
2366 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2367 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
2368 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2369 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2370 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2372 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_1_1:
2374 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375 ; GFX90A-NEXT: ;;#ASMSTART
2376 ; GFX90A-NEXT: ; def v[0:1]
2377 ; GFX90A-NEXT: ;;#ASMEND
2378 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2379 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2380 ; GFX90A-NEXT: ;;#ASMSTART
2381 ; GFX90A-NEXT: ; def v[2:3]
2382 ; GFX90A-NEXT: ;;#ASMEND
2383 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0
2384 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2385 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
2386 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2387 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2388 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2390 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_1_1:
2392 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2393 ; GFX940-NEXT: ;;#ASMSTART
2394 ; GFX940-NEXT: ; def v[0:1]
2395 ; GFX940-NEXT: ;;#ASMEND
2396 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2397 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2398 ; GFX940-NEXT: ;;#ASMSTART
2399 ; GFX940-NEXT: ; def v[2:3]
2400 ; GFX940-NEXT: ;;#ASMEND
2401 ; GFX940-NEXT: s_nop 0
2402 ; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0
2403 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2404 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2405 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2406 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2407 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2408 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2409 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2410 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
2411 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2415 define void @v_shuffle_v3bf16_v4bf16__7_1_1(ptr addrspace(1) inreg %ptr) {
2416 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_1:
2418 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2419 ; GFX900-NEXT: ;;#ASMSTART
2420 ; GFX900-NEXT: ; def v[0:1]
2421 ; GFX900-NEXT: ;;#ASMEND
2422 ; GFX900-NEXT: ;;#ASMSTART
2423 ; GFX900-NEXT: ; def v[1:2]
2424 ; GFX900-NEXT: ;;#ASMEND
2425 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2426 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2427 ; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4
2428 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2429 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2430 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
2431 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2432 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2434 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_1:
2436 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2437 ; GFX90A-NEXT: ;;#ASMSTART
2438 ; GFX90A-NEXT: ; def v[0:1]
2439 ; GFX90A-NEXT: ;;#ASMEND
2440 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2441 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2442 ; GFX90A-NEXT: ;;#ASMSTART
2443 ; GFX90A-NEXT: ; def v[2:3]
2444 ; GFX90A-NEXT: ;;#ASMEND
2445 ; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4
2446 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2447 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2448 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
2449 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2450 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2452 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_1:
2454 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2455 ; GFX940-NEXT: ;;#ASMSTART
2456 ; GFX940-NEXT: ; def v[0:1]
2457 ; GFX940-NEXT: ;;#ASMEND
2458 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2459 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2460 ; GFX940-NEXT: ;;#ASMSTART
2461 ; GFX940-NEXT: ; def v[2:3]
2462 ; GFX940-NEXT: ;;#ASMEND
2463 ; GFX940-NEXT: s_nop 0
2464 ; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2
2465 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2466 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2467 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2468 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2469 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2470 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2471 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2472 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
2473 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2477 define void @v_shuffle_v3bf16_v4bf16__7_u_1(ptr addrspace(1) inreg %ptr) {
2478 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_1:
2480 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2481 ; GFX900-NEXT: ;;#ASMSTART
2482 ; GFX900-NEXT: ; def v[0:1]
2483 ; GFX900-NEXT: ;;#ASMEND
2484 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2485 ; GFX900-NEXT: ;;#ASMSTART
2486 ; GFX900-NEXT: ; def v[1:2]
2487 ; GFX900-NEXT: ;;#ASMEND
2488 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v2, 16
2489 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
2490 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2491 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2492 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2494 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_1:
2496 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2498 ; GFX90A-NEXT: ;;#ASMSTART
2499 ; GFX90A-NEXT: ; def v[0:1]
2500 ; GFX90A-NEXT: ;;#ASMEND
2501 ; GFX90A-NEXT: ;;#ASMSTART
2502 ; GFX90A-NEXT: ; def v[2:3]
2503 ; GFX90A-NEXT: ;;#ASMEND
2504 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v3, 16
2505 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2506 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2507 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2508 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2510 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_1:
2512 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2513 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2514 ; GFX940-NEXT: ;;#ASMSTART
2515 ; GFX940-NEXT: ; def v[0:1]
2516 ; GFX940-NEXT: ;;#ASMEND
2517 ; GFX940-NEXT: ;;#ASMSTART
2518 ; GFX940-NEXT: ; def v[2:3]
2519 ; GFX940-NEXT: ;;#ASMEND
2520 ; GFX940-NEXT: s_nop 0
2521 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16
2522 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2523 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2524 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2525 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2526 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2527 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2528 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
2529 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2533 define void @v_shuffle_v3bf16_v4bf16__7_0_1(ptr addrspace(1) inreg %ptr) {
2534 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_1:
2536 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2537 ; GFX900-NEXT: ;;#ASMSTART
2538 ; GFX900-NEXT: ; def v[0:1]
2539 ; GFX900-NEXT: ;;#ASMEND
2540 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2541 ; GFX900-NEXT: ;;#ASMSTART
2542 ; GFX900-NEXT: ; def v[1:2]
2543 ; GFX900-NEXT: ;;#ASMEND
2544 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v2, 16
2545 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
2546 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2547 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2548 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2550 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_1:
2552 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2553 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2554 ; GFX90A-NEXT: ;;#ASMSTART
2555 ; GFX90A-NEXT: ; def v[0:1]
2556 ; GFX90A-NEXT: ;;#ASMEND
2557 ; GFX90A-NEXT: ;;#ASMSTART
2558 ; GFX90A-NEXT: ; def v[2:3]
2559 ; GFX90A-NEXT: ;;#ASMEND
2560 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v3, 16
2561 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2562 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2563 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2564 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2566 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_1:
2568 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2569 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2570 ; GFX940-NEXT: ;;#ASMSTART
2571 ; GFX940-NEXT: ; def v[0:1]
2572 ; GFX940-NEXT: ;;#ASMEND
2573 ; GFX940-NEXT: ;;#ASMSTART
2574 ; GFX940-NEXT: ; def v[2:3]
2575 ; GFX940-NEXT: ;;#ASMEND
2576 ; GFX940-NEXT: s_nop 0
2577 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16
2578 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2579 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2580 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2581 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2582 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2583 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2584 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
2585 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2589 define void @v_shuffle_v3bf16_v4bf16__7_2_1(ptr addrspace(1) inreg %ptr) {
2590 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_1:
2592 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2593 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2594 ; GFX900-NEXT: ;;#ASMSTART
2595 ; GFX900-NEXT: ; def v[0:1]
2596 ; GFX900-NEXT: ;;#ASMEND
2597 ; GFX900-NEXT: ;;#ASMSTART
2598 ; GFX900-NEXT: ; def v[2:3]
2599 ; GFX900-NEXT: ;;#ASMEND
2600 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16
2601 ; GFX900-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2602 ; GFX900-NEXT: global_store_dword v4, v1, s[16:17]
2603 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2604 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2606 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_1:
2608 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2609 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2610 ; GFX90A-NEXT: ;;#ASMSTART
2611 ; GFX90A-NEXT: ; def v[0:1]
2612 ; GFX90A-NEXT: ;;#ASMEND
2613 ; GFX90A-NEXT: ;;#ASMSTART
2614 ; GFX90A-NEXT: ; def v[2:3]
2615 ; GFX90A-NEXT: ;;#ASMEND
2616 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16
2617 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2618 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2619 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2620 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2622 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_1:
2624 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2625 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2626 ; GFX940-NEXT: ;;#ASMSTART
2627 ; GFX940-NEXT: ; def v[0:1]
2628 ; GFX940-NEXT: ;;#ASMEND
2629 ; GFX940-NEXT: ;;#ASMSTART
2630 ; GFX940-NEXT: ; def v[2:3]
2631 ; GFX940-NEXT: ;;#ASMEND
2632 ; GFX940-NEXT: s_nop 0
2633 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16
2634 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2635 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2636 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2637 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2638 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2639 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2640 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
2641 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2645 define void @v_shuffle_v3bf16_v4bf16__7_3_1(ptr addrspace(1) inreg %ptr) {
2646 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_1:
2648 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2649 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2650 ; GFX900-NEXT: ;;#ASMSTART
2651 ; GFX900-NEXT: ; def v[0:1]
2652 ; GFX900-NEXT: ;;#ASMEND
2653 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2654 ; GFX900-NEXT: ;;#ASMSTART
2655 ; GFX900-NEXT: ; def v[2:3]
2656 ; GFX900-NEXT: ;;#ASMEND
2657 ; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4
2658 ; GFX900-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2659 ; GFX900-NEXT: global_store_dword v4, v1, s[16:17]
2660 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2661 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2663 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_1:
2665 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2666 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2667 ; GFX90A-NEXT: ;;#ASMSTART
2668 ; GFX90A-NEXT: ; def v[0:1]
2669 ; GFX90A-NEXT: ;;#ASMEND
2670 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2671 ; GFX90A-NEXT: ;;#ASMSTART
2672 ; GFX90A-NEXT: ; def v[2:3]
2673 ; GFX90A-NEXT: ;;#ASMEND
2674 ; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4
2675 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2676 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2677 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2678 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2680 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_1:
2682 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2683 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2684 ; GFX940-NEXT: ;;#ASMSTART
2685 ; GFX940-NEXT: ; def v[0:1]
2686 ; GFX940-NEXT: ;;#ASMEND
2687 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2688 ; GFX940-NEXT: ;;#ASMSTART
2689 ; GFX940-NEXT: ; def v[2:3]
2690 ; GFX940-NEXT: ;;#ASMEND
2691 ; GFX940-NEXT: s_nop 0
2692 ; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2
2693 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2694 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2695 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2696 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2697 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2698 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2699 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
2700 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2704 define void @v_shuffle_v3bf16_v4bf16__7_4_1(ptr addrspace(1) inreg %ptr) {
2705 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_1:
2707 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2708 ; GFX900-NEXT: ;;#ASMSTART
2709 ; GFX900-NEXT: ; def v[0:1]
2710 ; GFX900-NEXT: ;;#ASMEND
2711 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2712 ; GFX900-NEXT: ;;#ASMSTART
2713 ; GFX900-NEXT: ; def v[1:2]
2714 ; GFX900-NEXT: ;;#ASMEND
2715 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16
2716 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
2717 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2718 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2719 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2721 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_1:
2723 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2724 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2725 ; GFX90A-NEXT: ;;#ASMSTART
2726 ; GFX90A-NEXT: ; def v[0:1]
2727 ; GFX90A-NEXT: ;;#ASMEND
2728 ; GFX90A-NEXT: ;;#ASMSTART
2729 ; GFX90A-NEXT: ; def v[2:3]
2730 ; GFX90A-NEXT: ;;#ASMEND
2731 ; GFX90A-NEXT: v_alignbit_b32 v1, v2, v3, 16
2732 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2733 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2734 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2735 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2737 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_1:
2739 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2740 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2741 ; GFX940-NEXT: ;;#ASMSTART
2742 ; GFX940-NEXT: ; def v[0:1]
2743 ; GFX940-NEXT: ;;#ASMEND
2744 ; GFX940-NEXT: ;;#ASMSTART
2745 ; GFX940-NEXT: ; def v[2:3]
2746 ; GFX940-NEXT: ;;#ASMEND
2747 ; GFX940-NEXT: s_nop 0
2748 ; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16
2749 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2750 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2751 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2752 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2753 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2754 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2755 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
2756 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2760 define void @v_shuffle_v3bf16_v4bf16__7_5_1(ptr addrspace(1) inreg %ptr) {
2761 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_1:
2763 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2764 ; GFX900-NEXT: ;;#ASMSTART
2765 ; GFX900-NEXT: ; def v[0:1]
2766 ; GFX900-NEXT: ;;#ASMEND
2767 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2768 ; GFX900-NEXT: ;;#ASMSTART
2769 ; GFX900-NEXT: ; def v[1:2]
2770 ; GFX900-NEXT: ;;#ASMEND
2771 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2772 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
2773 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
2774 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2775 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2776 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2778 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_1:
2780 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2781 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2782 ; GFX90A-NEXT: ;;#ASMSTART
2783 ; GFX90A-NEXT: ; def v[0:1]
2784 ; GFX90A-NEXT: ;;#ASMEND
2785 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2786 ; GFX90A-NEXT: ;;#ASMSTART
2787 ; GFX90A-NEXT: ; def v[2:3]
2788 ; GFX90A-NEXT: ;;#ASMEND
2789 ; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4
2790 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2791 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2792 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2793 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2795 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_1:
2797 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2798 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2799 ; GFX940-NEXT: ;;#ASMSTART
2800 ; GFX940-NEXT: ; def v[0:1]
2801 ; GFX940-NEXT: ;;#ASMEND
2802 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2803 ; GFX940-NEXT: ;;#ASMSTART
2804 ; GFX940-NEXT: ; def v[2:3]
2805 ; GFX940-NEXT: ;;#ASMEND
2806 ; GFX940-NEXT: s_nop 0
2807 ; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2
2808 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2809 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2810 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2811 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2812 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2813 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2814 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
2815 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2819 define void @v_shuffle_v3bf16_v4bf16__7_6_1(ptr addrspace(1) inreg %ptr) {
2820 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_1:
2822 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2823 ; GFX900-NEXT: ;;#ASMSTART
2824 ; GFX900-NEXT: ; def v[0:1]
2825 ; GFX900-NEXT: ;;#ASMEND
2826 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2827 ; GFX900-NEXT: ;;#ASMSTART
2828 ; GFX900-NEXT: ; def v[1:2]
2829 ; GFX900-NEXT: ;;#ASMEND
2830 ; GFX900-NEXT: v_alignbit_b32 v1, v2, v2, 16
2831 ; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4
2832 ; GFX900-NEXT: global_store_dword v3, v1, s[16:17]
2833 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2834 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2836 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_1:
2838 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2839 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2840 ; GFX90A-NEXT: ;;#ASMSTART
2841 ; GFX90A-NEXT: ; def v[0:1]
2842 ; GFX90A-NEXT: ;;#ASMEND
2843 ; GFX90A-NEXT: ;;#ASMSTART
2844 ; GFX90A-NEXT: ; def v[2:3]
2845 ; GFX90A-NEXT: ;;#ASMEND
2846 ; GFX90A-NEXT: v_alignbit_b32 v1, v3, v3, 16
2847 ; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4
2848 ; GFX90A-NEXT: global_store_dword v4, v1, s[16:17]
2849 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2850 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2852 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_1:
2854 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2855 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2856 ; GFX940-NEXT: ;;#ASMSTART
2857 ; GFX940-NEXT: ; def v[0:1]
2858 ; GFX940-NEXT: ;;#ASMEND
2859 ; GFX940-NEXT: ;;#ASMSTART
2860 ; GFX940-NEXT: ; def v[2:3]
2861 ; GFX940-NEXT: ;;#ASMEND
2862 ; GFX940-NEXT: s_nop 0
2863 ; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16
2864 ; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2865 ; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1
2866 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2867 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2868 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2869 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
2870 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
2871 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2875 define void @v_shuffle_v3bf16_v4bf16__u_2_2(ptr addrspace(1) inreg %ptr) {
2876 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__u_2_2:
2878 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2879 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2880 ; GFX900-NEXT: ;;#ASMSTART
2881 ; GFX900-NEXT: ; def v[0:1]
2882 ; GFX900-NEXT: ;;#ASMEND
2883 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2884 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2885 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2886 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2887 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2889 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__u_2_2:
2891 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2892 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2893 ; GFX90A-NEXT: ;;#ASMSTART
2894 ; GFX90A-NEXT: ; def v[0:1]
2895 ; GFX90A-NEXT: ;;#ASMEND
2896 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2897 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2898 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2899 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2900 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2902 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_2_2:
2904 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2905 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2906 ; GFX940-NEXT: ;;#ASMSTART
2907 ; GFX940-NEXT: ; def v[0:1]
2908 ; GFX940-NEXT: ;;#ASMEND
2909 ; GFX940-NEXT: s_nop 0
2910 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2911 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2912 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2913 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2914 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2915 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2916 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 2, i32 2>
2917 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2921 define void @v_shuffle_v3bf16_v4bf16__0_2_2(ptr addrspace(1) inreg %ptr) {
2922 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_2_2:
2924 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2925 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2926 ; GFX900-NEXT: ;;#ASMSTART
2927 ; GFX900-NEXT: ; def v[0:1]
2928 ; GFX900-NEXT: ;;#ASMEND
2929 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2930 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
2931 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2932 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2933 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2934 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2936 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_2_2:
2938 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2939 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2940 ; GFX90A-NEXT: ;;#ASMSTART
2941 ; GFX90A-NEXT: ; def v[0:1]
2942 ; GFX90A-NEXT: ;;#ASMEND
2943 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2944 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
2945 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2946 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2947 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2948 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2950 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_2_2:
2952 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2953 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2954 ; GFX940-NEXT: ;;#ASMSTART
2955 ; GFX940-NEXT: ; def v[0:1]
2956 ; GFX940-NEXT: ;;#ASMEND
2957 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2958 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
2959 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2960 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2961 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2962 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2963 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
2964 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 2, i32 2>
2965 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
2969 define void @v_shuffle_v3bf16_v4bf16__1_2_2(ptr addrspace(1) inreg %ptr) {
2970 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_2_2:
2972 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2973 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2974 ; GFX900-NEXT: ;;#ASMSTART
2975 ; GFX900-NEXT: ; def v[0:1]
2976 ; GFX900-NEXT: ;;#ASMEND
2977 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16
2978 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
2979 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2980 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2981 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2983 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_2_2:
2985 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2986 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2987 ; GFX90A-NEXT: ;;#ASMSTART
2988 ; GFX90A-NEXT: ; def v[0:1]
2989 ; GFX90A-NEXT: ;;#ASMEND
2990 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16
2991 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
2992 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2993 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2994 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2996 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_2_2:
2998 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2999 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3000 ; GFX940-NEXT: ;;#ASMSTART
3001 ; GFX940-NEXT: ; def v[0:1]
3002 ; GFX940-NEXT: ;;#ASMEND
3003 ; GFX940-NEXT: s_nop 0
3004 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16
3005 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3006 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3007 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3008 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3009 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3010 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 2>
3011 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3015 define void @v_shuffle_v3bf16_v4bf16__2_2_2(ptr addrspace(1) inreg %ptr) {
3016 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_2_2:
3018 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3019 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3020 ; GFX900-NEXT: ;;#ASMSTART
3021 ; GFX900-NEXT: ; def v[0:1]
3022 ; GFX900-NEXT: ;;#ASMEND
3023 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3024 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
3025 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
3026 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3027 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3028 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3030 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_2_2:
3032 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3034 ; GFX90A-NEXT: ;;#ASMSTART
3035 ; GFX90A-NEXT: ; def v[0:1]
3036 ; GFX90A-NEXT: ;;#ASMEND
3037 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3038 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
3039 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
3040 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3041 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3042 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3044 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_2_2:
3046 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3047 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3048 ; GFX940-NEXT: ;;#ASMSTART
3049 ; GFX940-NEXT: ; def v[0:1]
3050 ; GFX940-NEXT: ;;#ASMEND
3051 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3052 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
3053 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3054 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3055 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3056 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3057 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3058 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
3059 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3063 define void @v_shuffle_v3bf16_v4bf16__3_2_2(ptr addrspace(1) inreg %ptr) {
3064 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_2_2:
3066 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3067 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3068 ; GFX900-NEXT: ;;#ASMSTART
3069 ; GFX900-NEXT: ; def v[0:1]
3070 ; GFX900-NEXT: ;;#ASMEND
3071 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16
3072 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
3073 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3074 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3075 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3077 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_2_2:
3079 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3080 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3081 ; GFX90A-NEXT: ;;#ASMSTART
3082 ; GFX90A-NEXT: ; def v[0:1]
3083 ; GFX90A-NEXT: ;;#ASMEND
3084 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16
3085 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
3086 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3087 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3088 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3090 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_2_2:
3092 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3093 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3094 ; GFX940-NEXT: ;;#ASMSTART
3095 ; GFX940-NEXT: ; def v[0:1]
3096 ; GFX940-NEXT: ;;#ASMEND
3097 ; GFX940-NEXT: s_nop 0
3098 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16
3099 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3100 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3101 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3102 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3103 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3104 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 2, i32 2>
3105 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3109 define void @v_shuffle_v3bf16_v4bf16__4_2_2(ptr addrspace(1) inreg %ptr) {
3110 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__4_2_2:
3112 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3113 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3114 ; GFX900-NEXT: ;;#ASMSTART
3115 ; GFX900-NEXT: ; def v[0:1]
3116 ; GFX900-NEXT: ;;#ASMEND
3117 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
3118 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
3119 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3120 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3121 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3123 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__4_2_2:
3125 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3126 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3127 ; GFX90A-NEXT: ;;#ASMSTART
3128 ; GFX90A-NEXT: ; def v[0:1]
3129 ; GFX90A-NEXT: ;;#ASMEND
3130 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
3131 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
3132 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3133 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3134 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3136 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_2_2:
3138 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3139 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3140 ; GFX940-NEXT: ;;#ASMSTART
3141 ; GFX940-NEXT: ; def v[0:1]
3142 ; GFX940-NEXT: ;;#ASMEND
3143 ; GFX940-NEXT: s_nop 0
3144 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
3145 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3146 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3147 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3148 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3149 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3150 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 2, i32 2>
3151 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3155 define void @v_shuffle_v3bf16_v4bf16__5_2_2(ptr addrspace(1) inreg %ptr) {
3156 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_2_2:
3158 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3159 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3160 ; GFX900-NEXT: ;;#ASMSTART
3161 ; GFX900-NEXT: ; def v[0:1]
3162 ; GFX900-NEXT: ;;#ASMEND
3163 ; GFX900-NEXT: ;;#ASMSTART
3164 ; GFX900-NEXT: ; def v[2:3]
3165 ; GFX900-NEXT: ;;#ASMEND
3166 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16
3167 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3168 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3169 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3170 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3172 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_2_2:
3174 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3175 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3176 ; GFX90A-NEXT: ;;#ASMSTART
3177 ; GFX90A-NEXT: ; def v[0:1]
3178 ; GFX90A-NEXT: ;;#ASMEND
3179 ; GFX90A-NEXT: ;;#ASMSTART
3180 ; GFX90A-NEXT: ; def v[2:3]
3181 ; GFX90A-NEXT: ;;#ASMEND
3182 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16
3183 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3184 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3185 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3186 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3188 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_2_2:
3190 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3191 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3192 ; GFX940-NEXT: ;;#ASMSTART
3193 ; GFX940-NEXT: ; def v[0:1]
3194 ; GFX940-NEXT: ;;#ASMEND
3195 ; GFX940-NEXT: ;;#ASMSTART
3196 ; GFX940-NEXT: ; def v[2:3]
3197 ; GFX940-NEXT: ;;#ASMEND
3198 ; GFX940-NEXT: s_nop 0
3199 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16
3200 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3201 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3202 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3203 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3204 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3205 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3206 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
3207 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3211 define void @v_shuffle_v3bf16_v4bf16__6_2_2(ptr addrspace(1) inreg %ptr) {
3212 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_2_2:
3214 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3215 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3216 ; GFX900-NEXT: ;;#ASMSTART
3217 ; GFX900-NEXT: ; def v[0:1]
3218 ; GFX900-NEXT: ;;#ASMEND
3219 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3220 ; GFX900-NEXT: ;;#ASMSTART
3221 ; GFX900-NEXT: ; def v[2:3]
3222 ; GFX900-NEXT: ;;#ASMEND
3223 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
3224 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3225 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3226 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3227 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3229 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_2_2:
3231 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3233 ; GFX90A-NEXT: ;;#ASMSTART
3234 ; GFX90A-NEXT: ; def v[0:1]
3235 ; GFX90A-NEXT: ;;#ASMEND
3236 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3237 ; GFX90A-NEXT: ;;#ASMSTART
3238 ; GFX90A-NEXT: ; def v[2:3]
3239 ; GFX90A-NEXT: ;;#ASMEND
3240 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
3241 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3242 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3243 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3244 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3246 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_2_2:
3248 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3250 ; GFX940-NEXT: ;;#ASMSTART
3251 ; GFX940-NEXT: ; def v[0:1]
3252 ; GFX940-NEXT: ;;#ASMEND
3253 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3254 ; GFX940-NEXT: ;;#ASMSTART
3255 ; GFX940-NEXT: ; def v[2:3]
3256 ; GFX940-NEXT: ;;#ASMEND
3257 ; GFX940-NEXT: s_nop 0
3258 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
3259 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3260 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3261 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3262 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3263 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3264 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3265 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
3266 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3270 define void @v_shuffle_v3bf16_v4bf16__7_2_2(ptr addrspace(1) inreg %ptr) {
3271 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_2:
3273 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3274 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3275 ; GFX900-NEXT: ;;#ASMSTART
3276 ; GFX900-NEXT: ; def v[0:1]
3277 ; GFX900-NEXT: ;;#ASMEND
3278 ; GFX900-NEXT: ;;#ASMSTART
3279 ; GFX900-NEXT: ; def v[2:3]
3280 ; GFX900-NEXT: ;;#ASMEND
3281 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
3282 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3283 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3284 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3285 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3287 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_2:
3289 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3290 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3291 ; GFX90A-NEXT: ;;#ASMSTART
3292 ; GFX90A-NEXT: ; def v[0:1]
3293 ; GFX90A-NEXT: ;;#ASMEND
3294 ; GFX90A-NEXT: ;;#ASMSTART
3295 ; GFX90A-NEXT: ; def v[2:3]
3296 ; GFX90A-NEXT: ;;#ASMEND
3297 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
3298 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3299 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3300 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3301 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3303 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_2:
3305 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3306 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3307 ; GFX940-NEXT: ;;#ASMSTART
3308 ; GFX940-NEXT: ; def v[0:1]
3309 ; GFX940-NEXT: ;;#ASMEND
3310 ; GFX940-NEXT: ;;#ASMSTART
3311 ; GFX940-NEXT: ; def v[2:3]
3312 ; GFX940-NEXT: ;;#ASMEND
3313 ; GFX940-NEXT: s_nop 0
3314 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
3315 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3316 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3317 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3318 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3319 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3320 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3321 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
3322 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3326 define void @v_shuffle_v3bf16_v4bf16__7_u_2(ptr addrspace(1) inreg %ptr) {
3327 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_2:
3329 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3330 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3331 ; GFX900-NEXT: ;;#ASMSTART
3332 ; GFX900-NEXT: ; def v[0:1]
3333 ; GFX900-NEXT: ;;#ASMEND
3334 ; GFX900-NEXT: ;;#ASMSTART
3335 ; GFX900-NEXT: ; def v[2:3]
3336 ; GFX900-NEXT: ;;#ASMEND
3337 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16
3338 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3339 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3340 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3341 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3343 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_2:
3345 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3346 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3347 ; GFX90A-NEXT: ;;#ASMSTART
3348 ; GFX90A-NEXT: ; def v[0:1]
3349 ; GFX90A-NEXT: ;;#ASMEND
3350 ; GFX90A-NEXT: ;;#ASMSTART
3351 ; GFX90A-NEXT: ; def v[2:3]
3352 ; GFX90A-NEXT: ;;#ASMEND
3353 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16
3354 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3355 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3356 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3357 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3359 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_2:
3361 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3362 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3363 ; GFX940-NEXT: ;;#ASMSTART
3364 ; GFX940-NEXT: ; def v[0:1]
3365 ; GFX940-NEXT: ;;#ASMEND
3366 ; GFX940-NEXT: ;;#ASMSTART
3367 ; GFX940-NEXT: ; def v[2:3]
3368 ; GFX940-NEXT: ;;#ASMEND
3369 ; GFX940-NEXT: s_nop 0
3370 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16
3371 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3372 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3373 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3374 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3375 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3376 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3377 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
3378 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3382 define void @v_shuffle_v3bf16_v4bf16__7_0_2(ptr addrspace(1) inreg %ptr) {
3383 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_2:
3385 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3386 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3387 ; GFX900-NEXT: ;;#ASMSTART
3388 ; GFX900-NEXT: ; def v[0:1]
3389 ; GFX900-NEXT: ;;#ASMEND
3390 ; GFX900-NEXT: ;;#ASMSTART
3391 ; GFX900-NEXT: ; def v[2:3]
3392 ; GFX900-NEXT: ;;#ASMEND
3393 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16
3394 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3395 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3396 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3397 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3399 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_2:
3401 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3402 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3403 ; GFX90A-NEXT: ;;#ASMSTART
3404 ; GFX90A-NEXT: ; def v[0:1]
3405 ; GFX90A-NEXT: ;;#ASMEND
3406 ; GFX90A-NEXT: ;;#ASMSTART
3407 ; GFX90A-NEXT: ; def v[2:3]
3408 ; GFX90A-NEXT: ;;#ASMEND
3409 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
3410 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3411 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3412 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3413 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3415 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_2:
3417 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3418 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3419 ; GFX940-NEXT: ;;#ASMSTART
3420 ; GFX940-NEXT: ; def v[0:1]
3421 ; GFX940-NEXT: ;;#ASMEND
3422 ; GFX940-NEXT: ;;#ASMSTART
3423 ; GFX940-NEXT: ; def v[2:3]
3424 ; GFX940-NEXT: ;;#ASMEND
3425 ; GFX940-NEXT: s_nop 0
3426 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
3427 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3428 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3429 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3430 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3431 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3432 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3433 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
3434 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3438 define void @v_shuffle_v3bf16_v4bf16__7_1_2(ptr addrspace(1) inreg %ptr) {
3439 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_2:
3441 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3442 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3443 ; GFX900-NEXT: ;;#ASMSTART
3444 ; GFX900-NEXT: ; def v[0:1]
3445 ; GFX900-NEXT: ;;#ASMEND
3446 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3447 ; GFX900-NEXT: ;;#ASMSTART
3448 ; GFX900-NEXT: ; def v[2:3]
3449 ; GFX900-NEXT: ;;#ASMEND
3450 ; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4
3451 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3452 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3453 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3454 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3456 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_2:
3458 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3459 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3460 ; GFX90A-NEXT: ;;#ASMSTART
3461 ; GFX90A-NEXT: ; def v[0:1]
3462 ; GFX90A-NEXT: ;;#ASMEND
3463 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3464 ; GFX90A-NEXT: ;;#ASMSTART
3465 ; GFX90A-NEXT: ; def v[2:3]
3466 ; GFX90A-NEXT: ;;#ASMEND
3467 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
3468 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3469 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3470 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3471 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3473 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_2:
3475 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3476 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3477 ; GFX940-NEXT: ;;#ASMSTART
3478 ; GFX940-NEXT: ; def v[0:1]
3479 ; GFX940-NEXT: ;;#ASMEND
3480 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3481 ; GFX940-NEXT: ;;#ASMSTART
3482 ; GFX940-NEXT: ; def v[2:3]
3483 ; GFX940-NEXT: ;;#ASMEND
3484 ; GFX940-NEXT: s_nop 0
3485 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
3486 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3487 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3488 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3489 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3490 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3491 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3492 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
3493 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3497 define void @v_shuffle_v3bf16_v4bf16__7_3_2(ptr addrspace(1) inreg %ptr) {
3498 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_2:
3500 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3501 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3502 ; GFX900-NEXT: ;;#ASMSTART
3503 ; GFX900-NEXT: ; def v[0:1]
3504 ; GFX900-NEXT: ;;#ASMEND
3505 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3506 ; GFX900-NEXT: ;;#ASMSTART
3507 ; GFX900-NEXT: ; def v[2:3]
3508 ; GFX900-NEXT: ;;#ASMEND
3509 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
3510 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3511 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3512 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3513 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3515 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_2:
3517 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3518 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3519 ; GFX90A-NEXT: ;;#ASMSTART
3520 ; GFX90A-NEXT: ; def v[0:1]
3521 ; GFX90A-NEXT: ;;#ASMEND
3522 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3523 ; GFX90A-NEXT: ;;#ASMSTART
3524 ; GFX90A-NEXT: ; def v[2:3]
3525 ; GFX90A-NEXT: ;;#ASMEND
3526 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
3527 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3528 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3529 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3530 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3532 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_2:
3534 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3535 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3536 ; GFX940-NEXT: ;;#ASMSTART
3537 ; GFX940-NEXT: ; def v[0:1]
3538 ; GFX940-NEXT: ;;#ASMEND
3539 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3540 ; GFX940-NEXT: ;;#ASMSTART
3541 ; GFX940-NEXT: ; def v[2:3]
3542 ; GFX940-NEXT: ;;#ASMEND
3543 ; GFX940-NEXT: s_nop 0
3544 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
3545 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3546 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3547 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3548 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3549 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3550 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3551 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
3552 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3556 define void @v_shuffle_v3bf16_v4bf16__7_4_2(ptr addrspace(1) inreg %ptr) {
3557 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_2:
3559 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3560 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3561 ; GFX900-NEXT: ;;#ASMSTART
3562 ; GFX900-NEXT: ; def v[0:1]
3563 ; GFX900-NEXT: ;;#ASMEND
3564 ; GFX900-NEXT: ;;#ASMSTART
3565 ; GFX900-NEXT: ; def v[2:3]
3566 ; GFX900-NEXT: ;;#ASMEND
3567 ; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16
3568 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3569 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3570 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3571 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3573 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_2:
3575 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3576 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3577 ; GFX90A-NEXT: ;;#ASMSTART
3578 ; GFX90A-NEXT: ; def v[0:1]
3579 ; GFX90A-NEXT: ;;#ASMEND
3580 ; GFX90A-NEXT: ;;#ASMSTART
3581 ; GFX90A-NEXT: ; def v[2:3]
3582 ; GFX90A-NEXT: ;;#ASMEND
3583 ; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16
3584 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3585 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3586 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3587 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3589 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_2:
3591 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3592 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3593 ; GFX940-NEXT: ;;#ASMSTART
3594 ; GFX940-NEXT: ; def v[0:1]
3595 ; GFX940-NEXT: ;;#ASMEND
3596 ; GFX940-NEXT: ;;#ASMSTART
3597 ; GFX940-NEXT: ; def v[2:3]
3598 ; GFX940-NEXT: ;;#ASMEND
3599 ; GFX940-NEXT: s_nop 0
3600 ; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16
3601 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3602 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3603 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3604 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3605 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3606 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3607 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
3608 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3612 define void @v_shuffle_v3bf16_v4bf16__7_5_2(ptr addrspace(1) inreg %ptr) {
3613 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_2:
3615 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3616 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3617 ; GFX900-NEXT: ;;#ASMSTART
3618 ; GFX900-NEXT: ; def v[0:1]
3619 ; GFX900-NEXT: ;;#ASMEND
3620 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3621 ; GFX900-NEXT: ;;#ASMSTART
3622 ; GFX900-NEXT: ; def v[2:3]
3623 ; GFX900-NEXT: ;;#ASMEND
3624 ; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4
3625 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3626 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3627 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3628 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3630 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_2:
3632 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3633 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3634 ; GFX90A-NEXT: ;;#ASMSTART
3635 ; GFX90A-NEXT: ; def v[0:1]
3636 ; GFX90A-NEXT: ;;#ASMEND
3637 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3638 ; GFX90A-NEXT: ;;#ASMSTART
3639 ; GFX90A-NEXT: ; def v[2:3]
3640 ; GFX90A-NEXT: ;;#ASMEND
3641 ; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4
3642 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3643 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3644 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3645 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3647 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_2:
3649 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3650 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3651 ; GFX940-NEXT: ;;#ASMSTART
3652 ; GFX940-NEXT: ; def v[0:1]
3653 ; GFX940-NEXT: ;;#ASMEND
3654 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3655 ; GFX940-NEXT: ;;#ASMSTART
3656 ; GFX940-NEXT: ; def v[2:3]
3657 ; GFX940-NEXT: ;;#ASMEND
3658 ; GFX940-NEXT: s_nop 0
3659 ; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2
3660 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3661 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3662 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3663 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3664 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3665 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3666 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
3667 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3671 define void @v_shuffle_v3bf16_v4bf16__7_6_2(ptr addrspace(1) inreg %ptr) {
3672 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_2:
3674 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3675 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3676 ; GFX900-NEXT: ;;#ASMSTART
3677 ; GFX900-NEXT: ; def v[0:1]
3678 ; GFX900-NEXT: ;;#ASMEND
3679 ; GFX900-NEXT: ;;#ASMSTART
3680 ; GFX900-NEXT: ; def v[2:3]
3681 ; GFX900-NEXT: ;;#ASMEND
3682 ; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16
3683 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
3684 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3685 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3686 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3688 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_2:
3690 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3691 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3692 ; GFX90A-NEXT: ;;#ASMSTART
3693 ; GFX90A-NEXT: ; def v[0:1]
3694 ; GFX90A-NEXT: ;;#ASMEND
3695 ; GFX90A-NEXT: ;;#ASMSTART
3696 ; GFX90A-NEXT: ; def v[2:3]
3697 ; GFX90A-NEXT: ;;#ASMEND
3698 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16
3699 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
3700 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3701 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3702 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3704 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_2:
3706 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3707 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3708 ; GFX940-NEXT: ;;#ASMSTART
3709 ; GFX940-NEXT: ; def v[0:1]
3710 ; GFX940-NEXT: ;;#ASMEND
3711 ; GFX940-NEXT: ;;#ASMSTART
3712 ; GFX940-NEXT: ; def v[2:3]
3713 ; GFX940-NEXT: ;;#ASMEND
3714 ; GFX940-NEXT: s_nop 0
3715 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16
3716 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3717 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3718 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3719 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3720 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3721 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
3722 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
3723 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3727 define void @v_shuffle_v3bf16_v4bf16__u_3_3(ptr addrspace(1) inreg %ptr) {
3728 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__u_3_3:
3730 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3731 ; GFX900-NEXT: ;;#ASMSTART
3732 ; GFX900-NEXT: ; def v[0:1]
3733 ; GFX900-NEXT: ;;#ASMEND
3734 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3735 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3736 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
3737 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3738 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3739 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
3740 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3741 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3743 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__u_3_3:
3745 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3746 ; GFX90A-NEXT: ;;#ASMSTART
3747 ; GFX90A-NEXT: ; def v[0:1]
3748 ; GFX90A-NEXT: ;;#ASMEND
3749 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3750 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3751 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
3752 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3753 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3754 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
3755 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3756 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3758 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_3_3:
3760 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3761 ; GFX940-NEXT: ;;#ASMSTART
3762 ; GFX940-NEXT: ; def v[0:1]
3763 ; GFX940-NEXT: ;;#ASMEND
3764 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3765 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3766 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
3767 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3768 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3769 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3770 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3771 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3772 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3773 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 3, i32 3>
3774 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3778 define void @v_shuffle_v3bf16_v4bf16__0_3_3(ptr addrspace(1) inreg %ptr) {
3779 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_3_3:
3781 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3782 ; GFX900-NEXT: ;;#ASMSTART
3783 ; GFX900-NEXT: ; def v[0:1]
3784 ; GFX900-NEXT: ;;#ASMEND
3785 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3786 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3787 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
3788 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3789 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3790 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
3791 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3792 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3794 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_3_3:
3796 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3797 ; GFX90A-NEXT: ;;#ASMSTART
3798 ; GFX90A-NEXT: ; def v[0:1]
3799 ; GFX90A-NEXT: ;;#ASMEND
3800 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3801 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3802 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
3803 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3804 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3805 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
3806 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3807 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3809 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_3_3:
3811 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3812 ; GFX940-NEXT: ;;#ASMSTART
3813 ; GFX940-NEXT: ; def v[0:1]
3814 ; GFX940-NEXT: ;;#ASMEND
3815 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3816 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3817 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
3818 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3819 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3820 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3821 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3822 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3823 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3824 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 3, i32 3>
3825 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3829 define void @v_shuffle_v3bf16_v4bf16__1_3_3(ptr addrspace(1) inreg %ptr) {
3830 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_3_3:
3832 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3833 ; GFX900-NEXT: ;;#ASMSTART
3834 ; GFX900-NEXT: ; def v[0:1]
3835 ; GFX900-NEXT: ;;#ASMEND
3836 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3837 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3838 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
3839 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3840 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3841 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
3842 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3843 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3845 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_3_3:
3847 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3848 ; GFX90A-NEXT: ;;#ASMSTART
3849 ; GFX90A-NEXT: ; def v[0:1]
3850 ; GFX90A-NEXT: ;;#ASMEND
3851 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3852 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3853 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
3854 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3855 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3856 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
3857 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3858 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3860 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_3_3:
3862 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3863 ; GFX940-NEXT: ;;#ASMSTART
3864 ; GFX940-NEXT: ; def v[0:1]
3865 ; GFX940-NEXT: ;;#ASMEND
3866 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3867 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3868 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
3869 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3870 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3871 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3872 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3873 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3874 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3875 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 3, i32 3>
3876 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3880 define void @v_shuffle_v3bf16_v4bf16__2_3_3(ptr addrspace(1) inreg %ptr) {
3881 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_3_3:
3883 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3884 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3885 ; GFX900-NEXT: ;;#ASMSTART
3886 ; GFX900-NEXT: ; def v[0:1]
3887 ; GFX900-NEXT: ;;#ASMEND
3888 ; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4
3889 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3890 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3891 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3893 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_3_3:
3895 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3896 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3897 ; GFX90A-NEXT: ;;#ASMSTART
3898 ; GFX90A-NEXT: ; def v[0:1]
3899 ; GFX90A-NEXT: ;;#ASMEND
3900 ; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4
3901 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3902 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3903 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3905 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_3_3:
3907 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3908 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3909 ; GFX940-NEXT: ;;#ASMSTART
3910 ; GFX940-NEXT: ; def v[0:1]
3911 ; GFX940-NEXT: ;;#ASMEND
3912 ; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
3913 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3914 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3915 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3916 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3917 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 3, i32 3>
3918 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3922 define void @v_shuffle_v3bf16_v4bf16__3_3_3(ptr addrspace(1) inreg %ptr) {
3923 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_3_3:
3925 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3926 ; GFX900-NEXT: ;;#ASMSTART
3927 ; GFX900-NEXT: ; def v[0:1]
3928 ; GFX900-NEXT: ;;#ASMEND
3929 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3930 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3931 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1
3932 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
3933 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3934 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
3935 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3936 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3938 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_3_3:
3940 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3941 ; GFX90A-NEXT: ;;#ASMSTART
3942 ; GFX90A-NEXT: ; def v[0:1]
3943 ; GFX90A-NEXT: ;;#ASMEND
3944 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3945 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3946 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1
3947 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
3948 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3949 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
3950 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3951 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3953 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_3_3:
3955 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3956 ; GFX940-NEXT: ;;#ASMSTART
3957 ; GFX940-NEXT: ; def v[0:1]
3958 ; GFX940-NEXT: ;;#ASMEND
3959 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3960 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3961 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1
3962 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
3963 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3964 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3965 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3966 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3967 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
3968 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 3, i32 3>
3969 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
3973 define void @v_shuffle_v3bf16_v4bf16__4_3_3(ptr addrspace(1) inreg %ptr) {
3974 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__4_3_3:
3976 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3977 ; GFX900-NEXT: ;;#ASMSTART
3978 ; GFX900-NEXT: ; def v[0:1]
3979 ; GFX900-NEXT: ;;#ASMEND
3980 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3981 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3982 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
3983 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3984 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3985 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
3986 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3987 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3989 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__4_3_3:
3991 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3992 ; GFX90A-NEXT: ;;#ASMSTART
3993 ; GFX90A-NEXT: ; def v[0:1]
3994 ; GFX90A-NEXT: ;;#ASMEND
3995 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3996 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3997 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
3998 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3999 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4000 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4001 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4002 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4004 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_3_3:
4006 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4007 ; GFX940-NEXT: ;;#ASMSTART
4008 ; GFX940-NEXT: ; def v[0:1]
4009 ; GFX940-NEXT: ;;#ASMEND
4010 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
4011 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4012 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
4013 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4014 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4015 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4016 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4017 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4018 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4019 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 3, i32 3>
4020 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4024 define void @v_shuffle_v3bf16_v4bf16__5_3_3(ptr addrspace(1) inreg %ptr) {
4025 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_3_3:
4027 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4028 ; GFX900-NEXT: ;;#ASMSTART
4029 ; GFX900-NEXT: ; def v[0:1]
4030 ; GFX900-NEXT: ;;#ASMEND
4031 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
4032 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4033 ; GFX900-NEXT: ;;#ASMSTART
4034 ; GFX900-NEXT: ; def v[2:3]
4035 ; GFX900-NEXT: ;;#ASMEND
4036 ; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4
4037 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4038 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4039 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
4040 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4041 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4043 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_3_3:
4045 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4046 ; GFX90A-NEXT: ;;#ASMSTART
4047 ; GFX90A-NEXT: ; def v[0:1]
4048 ; GFX90A-NEXT: ;;#ASMEND
4049 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
4050 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4051 ; GFX90A-NEXT: ;;#ASMSTART
4052 ; GFX90A-NEXT: ; def v[2:3]
4053 ; GFX90A-NEXT: ;;#ASMEND
4054 ; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4
4055 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4056 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4057 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
4058 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4059 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4061 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_3_3:
4063 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4064 ; GFX940-NEXT: ;;#ASMSTART
4065 ; GFX940-NEXT: ; def v[0:1]
4066 ; GFX940-NEXT: ;;#ASMEND
4067 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
4068 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4069 ; GFX940-NEXT: ;;#ASMSTART
4070 ; GFX940-NEXT: ; def v[2:3]
4071 ; GFX940-NEXT: ;;#ASMEND
4072 ; GFX940-NEXT: s_nop 0
4073 ; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2
4074 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4075 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4076 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4077 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4078 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4079 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4080 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4081 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
4082 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4086 define void @v_shuffle_v3bf16_v4bf16__6_3_3(ptr addrspace(1) inreg %ptr) {
4087 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_3_3:
4089 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4090 ; GFX900-NEXT: ;;#ASMSTART
4091 ; GFX900-NEXT: ; def v[0:1]
4092 ; GFX900-NEXT: ;;#ASMEND
4093 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
4094 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4095 ; GFX900-NEXT: ;;#ASMSTART
4096 ; GFX900-NEXT: ; def v[2:3]
4097 ; GFX900-NEXT: ;;#ASMEND
4098 ; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1
4099 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4100 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
4101 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4102 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4103 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4105 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_3_3:
4107 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4108 ; GFX90A-NEXT: ;;#ASMSTART
4109 ; GFX90A-NEXT: ; def v[0:1]
4110 ; GFX90A-NEXT: ;;#ASMEND
4111 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
4112 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4113 ; GFX90A-NEXT: ;;#ASMSTART
4114 ; GFX90A-NEXT: ; def v[2:3]
4115 ; GFX90A-NEXT: ;;#ASMEND
4116 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1
4117 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4118 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
4119 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4120 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4121 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4123 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_3_3:
4125 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4126 ; GFX940-NEXT: ;;#ASMSTART
4127 ; GFX940-NEXT: ; def v[0:1]
4128 ; GFX940-NEXT: ;;#ASMEND
4129 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
4130 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4131 ; GFX940-NEXT: ;;#ASMSTART
4132 ; GFX940-NEXT: ; def v[2:3]
4133 ; GFX940-NEXT: ;;#ASMEND
4134 ; GFX940-NEXT: s_nop 0
4135 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1
4136 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4137 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4138 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4139 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4140 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4141 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4142 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4143 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
4144 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4148 define void @v_shuffle_v3bf16_v4bf16__7_3_3(ptr addrspace(1) inreg %ptr) {
4149 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_3:
4151 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4152 ; GFX900-NEXT: ;;#ASMSTART
4153 ; GFX900-NEXT: ; def v[0:1]
4154 ; GFX900-NEXT: ;;#ASMEND
4155 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
4156 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4157 ; GFX900-NEXT: ;;#ASMSTART
4158 ; GFX900-NEXT: ; def v[2:3]
4159 ; GFX900-NEXT: ;;#ASMEND
4160 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
4161 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4162 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4163 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
4164 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4165 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4167 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_3:
4169 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4170 ; GFX90A-NEXT: ;;#ASMSTART
4171 ; GFX90A-NEXT: ; def v[0:1]
4172 ; GFX90A-NEXT: ;;#ASMEND
4173 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
4174 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4175 ; GFX90A-NEXT: ;;#ASMSTART
4176 ; GFX90A-NEXT: ; def v[2:3]
4177 ; GFX90A-NEXT: ;;#ASMEND
4178 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
4179 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4180 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4181 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
4182 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4183 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4185 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_3:
4187 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4188 ; GFX940-NEXT: ;;#ASMSTART
4189 ; GFX940-NEXT: ; def v[0:1]
4190 ; GFX940-NEXT: ;;#ASMEND
4191 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
4192 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4193 ; GFX940-NEXT: ;;#ASMSTART
4194 ; GFX940-NEXT: ; def v[2:3]
4195 ; GFX940-NEXT: ;;#ASMEND
4196 ; GFX940-NEXT: s_nop 0
4197 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
4198 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
4199 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4200 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4201 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4202 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4203 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4204 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4205 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
4206 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4210 define void @v_shuffle_v3bf16_v4bf16__7_u_3(ptr addrspace(1) inreg %ptr) {
4211 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_3:
4213 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4214 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4215 ; GFX900-NEXT: ;;#ASMSTART
4216 ; GFX900-NEXT: ; def v[0:1]
4217 ; GFX900-NEXT: ;;#ASMEND
4218 ; GFX900-NEXT: ;;#ASMSTART
4219 ; GFX900-NEXT: ; def v[2:3]
4220 ; GFX900-NEXT: ;;#ASMEND
4221 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16
4222 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4223 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4224 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4225 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4227 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_3:
4229 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4230 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4231 ; GFX90A-NEXT: ;;#ASMSTART
4232 ; GFX90A-NEXT: ; def v[0:1]
4233 ; GFX90A-NEXT: ;;#ASMEND
4234 ; GFX90A-NEXT: ;;#ASMSTART
4235 ; GFX90A-NEXT: ; def v[2:3]
4236 ; GFX90A-NEXT: ;;#ASMEND
4237 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16
4238 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4239 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4240 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4241 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4243 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_3:
4245 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4246 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4247 ; GFX940-NEXT: ;;#ASMSTART
4248 ; GFX940-NEXT: ; def v[0:1]
4249 ; GFX940-NEXT: ;;#ASMEND
4250 ; GFX940-NEXT: ;;#ASMSTART
4251 ; GFX940-NEXT: ; def v[2:3]
4252 ; GFX940-NEXT: ;;#ASMEND
4253 ; GFX940-NEXT: s_nop 0
4254 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16
4255 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4256 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4257 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4258 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4259 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4260 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4261 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
4262 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4266 define void @v_shuffle_v3bf16_v4bf16__7_0_3(ptr addrspace(1) inreg %ptr) {
4267 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_3:
4269 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4270 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4271 ; GFX900-NEXT: ;;#ASMSTART
4272 ; GFX900-NEXT: ; def v[0:1]
4273 ; GFX900-NEXT: ;;#ASMEND
4274 ; GFX900-NEXT: ;;#ASMSTART
4275 ; GFX900-NEXT: ; def v[2:3]
4276 ; GFX900-NEXT: ;;#ASMEND
4277 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16
4278 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4279 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4280 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4281 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4283 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_3:
4285 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4286 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4287 ; GFX90A-NEXT: ;;#ASMSTART
4288 ; GFX90A-NEXT: ; def v[0:1]
4289 ; GFX90A-NEXT: ;;#ASMEND
4290 ; GFX90A-NEXT: ;;#ASMSTART
4291 ; GFX90A-NEXT: ; def v[2:3]
4292 ; GFX90A-NEXT: ;;#ASMEND
4293 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
4294 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4295 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4296 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4297 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4299 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_3:
4301 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4302 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4303 ; GFX940-NEXT: ;;#ASMSTART
4304 ; GFX940-NEXT: ; def v[0:1]
4305 ; GFX940-NEXT: ;;#ASMEND
4306 ; GFX940-NEXT: ;;#ASMSTART
4307 ; GFX940-NEXT: ; def v[2:3]
4308 ; GFX940-NEXT: ;;#ASMEND
4309 ; GFX940-NEXT: s_nop 0
4310 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
4311 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4312 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4313 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4314 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4315 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4316 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4317 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
4318 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4322 define void @v_shuffle_v3bf16_v4bf16__7_1_3(ptr addrspace(1) inreg %ptr) {
4323 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_3:
4325 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4326 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4327 ; GFX900-NEXT: ;;#ASMSTART
4328 ; GFX900-NEXT: ; def v[0:1]
4329 ; GFX900-NEXT: ;;#ASMEND
4330 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
4331 ; GFX900-NEXT: ;;#ASMSTART
4332 ; GFX900-NEXT: ; def v[2:3]
4333 ; GFX900-NEXT: ;;#ASMEND
4334 ; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4
4335 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4336 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4337 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4338 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4340 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_3:
4342 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4343 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4344 ; GFX90A-NEXT: ;;#ASMSTART
4345 ; GFX90A-NEXT: ; def v[0:1]
4346 ; GFX90A-NEXT: ;;#ASMEND
4347 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
4348 ; GFX90A-NEXT: ;;#ASMSTART
4349 ; GFX90A-NEXT: ; def v[2:3]
4350 ; GFX90A-NEXT: ;;#ASMEND
4351 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
4352 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4353 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4354 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4355 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4357 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_3:
4359 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4360 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4361 ; GFX940-NEXT: ;;#ASMSTART
4362 ; GFX940-NEXT: ; def v[0:1]
4363 ; GFX940-NEXT: ;;#ASMEND
4364 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
4365 ; GFX940-NEXT: ;;#ASMSTART
4366 ; GFX940-NEXT: ; def v[2:3]
4367 ; GFX940-NEXT: ;;#ASMEND
4368 ; GFX940-NEXT: s_nop 0
4369 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
4370 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4371 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4372 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4373 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4374 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4375 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4376 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
4377 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4381 define void @v_shuffle_v3bf16_v4bf16__7_2_3(ptr addrspace(1) inreg %ptr) {
4382 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_3:
4384 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4385 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4386 ; GFX900-NEXT: ;;#ASMSTART
4387 ; GFX900-NEXT: ; def v[0:1]
4388 ; GFX900-NEXT: ;;#ASMEND
4389 ; GFX900-NEXT: ;;#ASMSTART
4390 ; GFX900-NEXT: ; def v[2:3]
4391 ; GFX900-NEXT: ;;#ASMEND
4392 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
4393 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4394 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4395 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4396 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4398 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_3:
4400 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4401 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4402 ; GFX90A-NEXT: ;;#ASMSTART
4403 ; GFX90A-NEXT: ; def v[0:1]
4404 ; GFX90A-NEXT: ;;#ASMEND
4405 ; GFX90A-NEXT: ;;#ASMSTART
4406 ; GFX90A-NEXT: ; def v[2:3]
4407 ; GFX90A-NEXT: ;;#ASMEND
4408 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
4409 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4410 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4411 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4412 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4414 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_3:
4416 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4417 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4418 ; GFX940-NEXT: ;;#ASMSTART
4419 ; GFX940-NEXT: ; def v[0:1]
4420 ; GFX940-NEXT: ;;#ASMEND
4421 ; GFX940-NEXT: ;;#ASMSTART
4422 ; GFX940-NEXT: ; def v[2:3]
4423 ; GFX940-NEXT: ;;#ASMEND
4424 ; GFX940-NEXT: s_nop 0
4425 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
4426 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4427 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4428 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4429 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4430 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4431 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4432 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
4433 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4437 define void @v_shuffle_v3bf16_v4bf16__7_4_3(ptr addrspace(1) inreg %ptr) {
4438 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_3:
4440 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4441 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4442 ; GFX900-NEXT: ;;#ASMSTART
4443 ; GFX900-NEXT: ; def v[0:1]
4444 ; GFX900-NEXT: ;;#ASMEND
4445 ; GFX900-NEXT: ;;#ASMSTART
4446 ; GFX900-NEXT: ; def v[2:3]
4447 ; GFX900-NEXT: ;;#ASMEND
4448 ; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16
4449 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4450 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4451 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4452 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4454 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_3:
4456 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4457 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4458 ; GFX90A-NEXT: ;;#ASMSTART
4459 ; GFX90A-NEXT: ; def v[0:1]
4460 ; GFX90A-NEXT: ;;#ASMEND
4461 ; GFX90A-NEXT: ;;#ASMSTART
4462 ; GFX90A-NEXT: ; def v[2:3]
4463 ; GFX90A-NEXT: ;;#ASMEND
4464 ; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16
4465 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4466 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4467 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4468 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4470 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_3:
4472 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4473 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4474 ; GFX940-NEXT: ;;#ASMSTART
4475 ; GFX940-NEXT: ; def v[0:1]
4476 ; GFX940-NEXT: ;;#ASMEND
4477 ; GFX940-NEXT: ;;#ASMSTART
4478 ; GFX940-NEXT: ; def v[2:3]
4479 ; GFX940-NEXT: ;;#ASMEND
4480 ; GFX940-NEXT: s_nop 0
4481 ; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16
4482 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4483 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4484 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4485 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4486 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4487 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4488 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
4489 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4493 define void @v_shuffle_v3bf16_v4bf16__7_5_3(ptr addrspace(1) inreg %ptr) {
4494 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_3:
4496 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4497 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4498 ; GFX900-NEXT: ;;#ASMSTART
4499 ; GFX900-NEXT: ; def v[0:1]
4500 ; GFX900-NEXT: ;;#ASMEND
4501 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
4502 ; GFX900-NEXT: ;;#ASMSTART
4503 ; GFX900-NEXT: ; def v[2:3]
4504 ; GFX900-NEXT: ;;#ASMEND
4505 ; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4
4506 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4507 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4508 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4509 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4511 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_3:
4513 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4514 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4515 ; GFX90A-NEXT: ;;#ASMSTART
4516 ; GFX90A-NEXT: ; def v[0:1]
4517 ; GFX90A-NEXT: ;;#ASMEND
4518 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
4519 ; GFX90A-NEXT: ;;#ASMSTART
4520 ; GFX90A-NEXT: ; def v[2:3]
4521 ; GFX90A-NEXT: ;;#ASMEND
4522 ; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4
4523 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4524 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4525 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4526 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4528 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_3:
4530 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4532 ; GFX940-NEXT: ;;#ASMSTART
4533 ; GFX940-NEXT: ; def v[0:1]
4534 ; GFX940-NEXT: ;;#ASMEND
4535 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
4536 ; GFX940-NEXT: ;;#ASMSTART
4537 ; GFX940-NEXT: ; def v[2:3]
4538 ; GFX940-NEXT: ;;#ASMEND
4539 ; GFX940-NEXT: s_nop 0
4540 ; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2
4541 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4542 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4543 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4544 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4545 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4546 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4547 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
4548 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4552 define void @v_shuffle_v3bf16_v4bf16__7_6_3(ptr addrspace(1) inreg %ptr) {
4553 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_3:
4555 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4556 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
4557 ; GFX900-NEXT: ;;#ASMSTART
4558 ; GFX900-NEXT: ; def v[0:1]
4559 ; GFX900-NEXT: ;;#ASMEND
4560 ; GFX900-NEXT: ;;#ASMSTART
4561 ; GFX900-NEXT: ; def v[2:3]
4562 ; GFX900-NEXT: ;;#ASMEND
4563 ; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16
4564 ; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4565 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
4566 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4567 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4569 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_3:
4571 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4572 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
4573 ; GFX90A-NEXT: ;;#ASMSTART
4574 ; GFX90A-NEXT: ; def v[0:1]
4575 ; GFX90A-NEXT: ;;#ASMEND
4576 ; GFX90A-NEXT: ;;#ASMSTART
4577 ; GFX90A-NEXT: ; def v[2:3]
4578 ; GFX90A-NEXT: ;;#ASMEND
4579 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16
4580 ; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4
4581 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
4582 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4583 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4585 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_3:
4587 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4588 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
4589 ; GFX940-NEXT: ;;#ASMSTART
4590 ; GFX940-NEXT: ; def v[0:1]
4591 ; GFX940-NEXT: ;;#ASMEND
4592 ; GFX940-NEXT: ;;#ASMSTART
4593 ; GFX940-NEXT: ; def v[2:3]
4594 ; GFX940-NEXT: ;;#ASMEND
4595 ; GFX940-NEXT: s_nop 0
4596 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16
4597 ; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4598 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
4599 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4600 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4601 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4602 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4603 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
4604 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4608 define void @v_shuffle_v3bf16_v4bf16__u_4_4(ptr addrspace(1) inreg %ptr) {
4609 ; GFX9-LABEL: v_shuffle_v3bf16_v4bf16__u_4_4:
4611 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4612 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4613 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4614 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 4, i32 4>
4615 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4619 define void @v_shuffle_v3bf16_v4bf16__0_4_4(ptr addrspace(1) inreg %ptr) {
4620 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_4_4:
4622 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4623 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4624 ; GFX900-NEXT: ;;#ASMSTART
4625 ; GFX900-NEXT: ; def v[0:1]
4626 ; GFX900-NEXT: ;;#ASMEND
4627 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
4628 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4629 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4630 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4632 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_4_4:
4634 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4635 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4636 ; GFX90A-NEXT: ;;#ASMSTART
4637 ; GFX90A-NEXT: ; def v[0:1]
4638 ; GFX90A-NEXT: ;;#ASMEND
4639 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
4640 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4641 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4642 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4644 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_4_4:
4646 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4647 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4648 ; GFX940-NEXT: ;;#ASMSTART
4649 ; GFX940-NEXT: ; def v[0:1]
4650 ; GFX940-NEXT: ;;#ASMEND
4651 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4652 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4653 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4654 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4655 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4656 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 4, i32 4>
4657 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4661 define void @v_shuffle_v3bf16_v4bf16__1_4_4(ptr addrspace(1) inreg %ptr) {
4662 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_4_4:
4664 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4665 ; GFX900-NEXT: ;;#ASMSTART
4666 ; GFX900-NEXT: ; def v[0:1]
4667 ; GFX900-NEXT: ;;#ASMEND
4668 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4669 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
4670 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4671 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4672 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4674 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_4_4:
4676 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4677 ; GFX90A-NEXT: ;;#ASMSTART
4678 ; GFX90A-NEXT: ; def v[0:1]
4679 ; GFX90A-NEXT: ;;#ASMEND
4680 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4681 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
4682 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4683 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4684 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4686 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_4_4:
4688 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4689 ; GFX940-NEXT: ;;#ASMSTART
4690 ; GFX940-NEXT: ; def v[0:1]
4691 ; GFX940-NEXT: ;;#ASMEND
4692 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4693 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
4694 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4695 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4696 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4697 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4698 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 4, i32 4>
4699 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4703 define void @v_shuffle_v3bf16_v4bf16__2_4_4(ptr addrspace(1) inreg %ptr) {
4704 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_4_4:
4706 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4707 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4708 ; GFX900-NEXT: ;;#ASMSTART
4709 ; GFX900-NEXT: ; def v[0:1]
4710 ; GFX900-NEXT: ;;#ASMEND
4711 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
4712 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4713 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4715 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_4_4:
4717 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4718 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4719 ; GFX90A-NEXT: ;;#ASMSTART
4720 ; GFX90A-NEXT: ; def v[0:1]
4721 ; GFX90A-NEXT: ;;#ASMEND
4722 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
4723 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4724 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4726 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_4_4:
4728 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4729 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4730 ; GFX940-NEXT: ;;#ASMSTART
4731 ; GFX940-NEXT: ; def v[0:1]
4732 ; GFX940-NEXT: ;;#ASMEND
4733 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
4734 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4735 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4736 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4737 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 4, i32 4>
4738 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4742 define void @v_shuffle_v3bf16_v4bf16__3_4_4(ptr addrspace(1) inreg %ptr) {
4743 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_4_4:
4745 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4746 ; GFX900-NEXT: ;;#ASMSTART
4747 ; GFX900-NEXT: ; def v[0:1]
4748 ; GFX900-NEXT: ;;#ASMEND
4749 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4750 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
4751 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
4752 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4753 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4755 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_4_4:
4757 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4758 ; GFX90A-NEXT: ;;#ASMSTART
4759 ; GFX90A-NEXT: ; def v[0:1]
4760 ; GFX90A-NEXT: ;;#ASMEND
4761 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4762 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
4763 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
4764 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4765 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4767 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_4_4:
4769 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4770 ; GFX940-NEXT: ;;#ASMSTART
4771 ; GFX940-NEXT: ; def v[0:1]
4772 ; GFX940-NEXT: ;;#ASMEND
4773 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4774 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
4775 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
4776 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4777 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4778 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4779 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 4, i32 4>
4780 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4784 define void @v_shuffle_v3bf16_v4bf16__4_4_4(ptr addrspace(1) inreg %ptr) {
4785 ; GFX9-LABEL: v_shuffle_v3bf16_v4bf16__4_4_4:
4787 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4788 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4789 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4790 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 4, i32 4>
4791 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4795 define void @v_shuffle_v3bf16_v4bf16__5_4_4(ptr addrspace(1) inreg %ptr) {
4796 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_4_4:
4798 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4799 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4800 ; GFX900-NEXT: ;;#ASMSTART
4801 ; GFX900-NEXT: ; def v[0:1]
4802 ; GFX900-NEXT: ;;#ASMEND
4803 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16
4804 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
4805 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
4806 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4807 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4809 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_4_4:
4811 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4812 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4813 ; GFX90A-NEXT: ;;#ASMSTART
4814 ; GFX90A-NEXT: ; def v[0:1]
4815 ; GFX90A-NEXT: ;;#ASMEND
4816 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16
4817 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
4818 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
4819 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4820 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4822 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_4_4:
4824 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4825 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4826 ; GFX940-NEXT: ;;#ASMSTART
4827 ; GFX940-NEXT: ; def v[0:1]
4828 ; GFX940-NEXT: ;;#ASMEND
4829 ; GFX940-NEXT: s_nop 0
4830 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16
4831 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4832 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
4833 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4834 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4835 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4836 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4837 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
4838 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4842 define void @v_shuffle_v3bf16_v4bf16__6_4_4(ptr addrspace(1) inreg %ptr) {
4843 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_4_4:
4845 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4846 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4847 ; GFX900-NEXT: ;;#ASMSTART
4848 ; GFX900-NEXT: ; def v[0:1]
4849 ; GFX900-NEXT: ;;#ASMEND
4850 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
4851 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
4852 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
4853 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
4854 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4855 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4857 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_4_4:
4859 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4860 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4861 ; GFX90A-NEXT: ;;#ASMSTART
4862 ; GFX90A-NEXT: ; def v[0:1]
4863 ; GFX90A-NEXT: ;;#ASMEND
4864 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
4865 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
4866 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
4867 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
4868 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4869 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4871 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_4_4:
4873 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4874 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4875 ; GFX940-NEXT: ;;#ASMSTART
4876 ; GFX940-NEXT: ; def v[0:1]
4877 ; GFX940-NEXT: ;;#ASMEND
4878 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
4879 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
4880 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4881 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
4882 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4883 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4884 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4885 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4886 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
4887 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4891 define void @v_shuffle_v3bf16_v4bf16__7_4_4(ptr addrspace(1) inreg %ptr) {
4892 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_4:
4894 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4895 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4896 ; GFX900-NEXT: ;;#ASMSTART
4897 ; GFX900-NEXT: ; def v[0:1]
4898 ; GFX900-NEXT: ;;#ASMEND
4899 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16
4900 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
4901 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
4902 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4903 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4905 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_4:
4907 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4908 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4909 ; GFX90A-NEXT: ;;#ASMSTART
4910 ; GFX90A-NEXT: ; def v[0:1]
4911 ; GFX90A-NEXT: ;;#ASMEND
4912 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16
4913 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
4914 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
4915 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4916 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4918 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_4:
4920 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4921 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4922 ; GFX940-NEXT: ;;#ASMSTART
4923 ; GFX940-NEXT: ; def v[0:1]
4924 ; GFX940-NEXT: ;;#ASMEND
4925 ; GFX940-NEXT: s_nop 0
4926 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16
4927 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4928 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
4929 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4930 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4931 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4932 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4933 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
4934 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4938 define void @v_shuffle_v3bf16_v4bf16__7_u_4(ptr addrspace(1) inreg %ptr) {
4939 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_4:
4941 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4942 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
4943 ; GFX900-NEXT: ;;#ASMSTART
4944 ; GFX900-NEXT: ; def v[0:1]
4945 ; GFX900-NEXT: ;;#ASMEND
4946 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
4947 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
4948 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
4949 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4950 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4952 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_4:
4954 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4955 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
4956 ; GFX90A-NEXT: ;;#ASMSTART
4957 ; GFX90A-NEXT: ; def v[0:1]
4958 ; GFX90A-NEXT: ;;#ASMEND
4959 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
4960 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
4961 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
4962 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
4963 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4965 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_4:
4967 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4968 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
4969 ; GFX940-NEXT: ;;#ASMSTART
4970 ; GFX940-NEXT: ; def v[0:1]
4971 ; GFX940-NEXT: ;;#ASMEND
4972 ; GFX940-NEXT: s_nop 0
4973 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
4974 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4975 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
4976 ; GFX940-NEXT: s_waitcnt vmcnt(0)
4977 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4978 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
4979 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
4980 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
4981 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
4985 define void @v_shuffle_v3bf16_v4bf16__7_0_4(ptr addrspace(1) inreg %ptr) {
4986 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_4:
4988 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4989 ; GFX900-NEXT: ;;#ASMSTART
4990 ; GFX900-NEXT: ; def v[0:1]
4991 ; GFX900-NEXT: ;;#ASMEND
4992 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
4993 ; GFX900-NEXT: ;;#ASMSTART
4994 ; GFX900-NEXT: ; def v[1:2]
4995 ; GFX900-NEXT: ;;#ASMEND
4996 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16
4997 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
4998 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
4999 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5000 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5002 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_4:
5004 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5005 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5006 ; GFX90A-NEXT: ;;#ASMSTART
5007 ; GFX90A-NEXT: ; def v[0:1]
5008 ; GFX90A-NEXT: ;;#ASMEND
5009 ; GFX90A-NEXT: ;;#ASMSTART
5010 ; GFX90A-NEXT: ; def v[2:3]
5011 ; GFX90A-NEXT: ;;#ASMEND
5012 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
5013 ; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4
5014 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5015 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5016 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5018 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_4:
5020 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5021 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5022 ; GFX940-NEXT: ;;#ASMSTART
5023 ; GFX940-NEXT: ; def v[0:1]
5024 ; GFX940-NEXT: ;;#ASMEND
5025 ; GFX940-NEXT: ;;#ASMSTART
5026 ; GFX940-NEXT: ; def v[2:3]
5027 ; GFX940-NEXT: ;;#ASMEND
5028 ; GFX940-NEXT: s_nop 0
5029 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
5030 ; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5031 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5032 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5033 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5034 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5035 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5036 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
5037 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5041 define void @v_shuffle_v3bf16_v4bf16__7_1_4(ptr addrspace(1) inreg %ptr) {
5042 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_4:
5044 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5045 ; GFX900-NEXT: ;;#ASMSTART
5046 ; GFX900-NEXT: ; def v[0:1]
5047 ; GFX900-NEXT: ;;#ASMEND
5048 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
5049 ; GFX900-NEXT: ;;#ASMSTART
5050 ; GFX900-NEXT: ; def v[1:2]
5051 ; GFX900-NEXT: ;;#ASMEND
5052 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5053 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
5054 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
5055 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
5056 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5057 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5059 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_4:
5061 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5062 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5063 ; GFX90A-NEXT: ;;#ASMSTART
5064 ; GFX90A-NEXT: ; def v[0:1]
5065 ; GFX90A-NEXT: ;;#ASMEND
5066 ; GFX90A-NEXT: ;;#ASMSTART
5067 ; GFX90A-NEXT: ; def v[2:3]
5068 ; GFX90A-NEXT: ;;#ASMEND
5069 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5070 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
5071 ; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4
5072 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5073 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5074 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5076 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_4:
5078 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5079 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5080 ; GFX940-NEXT: ;;#ASMSTART
5081 ; GFX940-NEXT: ; def v[0:1]
5082 ; GFX940-NEXT: ;;#ASMEND
5083 ; GFX940-NEXT: ;;#ASMSTART
5084 ; GFX940-NEXT: ; def v[2:3]
5085 ; GFX940-NEXT: ;;#ASMEND
5086 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5087 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
5088 ; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5089 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5090 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5091 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5092 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5093 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5094 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
5095 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5099 define void @v_shuffle_v3bf16_v4bf16__7_2_4(ptr addrspace(1) inreg %ptr) {
5100 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_4:
5102 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5103 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
5104 ; GFX900-NEXT: ;;#ASMSTART
5105 ; GFX900-NEXT: ; def v[0:1]
5106 ; GFX900-NEXT: ;;#ASMEND
5107 ; GFX900-NEXT: ;;#ASMSTART
5108 ; GFX900-NEXT: ; def v[2:3]
5109 ; GFX900-NEXT: ;;#ASMEND
5110 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
5111 ; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4
5112 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
5113 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5114 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5116 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_4:
5118 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5119 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5120 ; GFX90A-NEXT: ;;#ASMSTART
5121 ; GFX90A-NEXT: ; def v[0:1]
5122 ; GFX90A-NEXT: ;;#ASMEND
5123 ; GFX90A-NEXT: ;;#ASMSTART
5124 ; GFX90A-NEXT: ; def v[2:3]
5125 ; GFX90A-NEXT: ;;#ASMEND
5126 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
5127 ; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4
5128 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5129 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5130 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5132 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_4:
5134 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5135 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5136 ; GFX940-NEXT: ;;#ASMSTART
5137 ; GFX940-NEXT: ; def v[0:1]
5138 ; GFX940-NEXT: ;;#ASMEND
5139 ; GFX940-NEXT: ;;#ASMSTART
5140 ; GFX940-NEXT: ; def v[2:3]
5141 ; GFX940-NEXT: ;;#ASMEND
5142 ; GFX940-NEXT: s_nop 0
5143 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
5144 ; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5145 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5146 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5147 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5148 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5149 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5150 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
5151 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5155 define void @v_shuffle_v3bf16_v4bf16__7_3_4(ptr addrspace(1) inreg %ptr) {
5156 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_4:
5158 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5159 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
5160 ; GFX900-NEXT: ;;#ASMSTART
5161 ; GFX900-NEXT: ; def v[0:1]
5162 ; GFX900-NEXT: ;;#ASMEND
5163 ; GFX900-NEXT: ;;#ASMSTART
5164 ; GFX900-NEXT: ; def v[2:3]
5165 ; GFX900-NEXT: ;;#ASMEND
5166 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5167 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
5168 ; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4
5169 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
5170 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5171 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5173 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_4:
5175 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5176 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5177 ; GFX90A-NEXT: ;;#ASMSTART
5178 ; GFX90A-NEXT: ; def v[0:1]
5179 ; GFX90A-NEXT: ;;#ASMEND
5180 ; GFX90A-NEXT: ;;#ASMSTART
5181 ; GFX90A-NEXT: ; def v[2:3]
5182 ; GFX90A-NEXT: ;;#ASMEND
5183 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5184 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
5185 ; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4
5186 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5187 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5188 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5190 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_4:
5192 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5193 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5194 ; GFX940-NEXT: ;;#ASMSTART
5195 ; GFX940-NEXT: ; def v[0:1]
5196 ; GFX940-NEXT: ;;#ASMEND
5197 ; GFX940-NEXT: ;;#ASMSTART
5198 ; GFX940-NEXT: ; def v[2:3]
5199 ; GFX940-NEXT: ;;#ASMEND
5200 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5201 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
5202 ; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5203 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5204 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5205 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5206 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5207 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5208 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
5209 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5213 define void @v_shuffle_v3bf16_v4bf16__7_5_4(ptr addrspace(1) inreg %ptr) {
5214 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_4:
5216 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5217 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5218 ; GFX900-NEXT: ;;#ASMSTART
5219 ; GFX900-NEXT: ; def v[0:1]
5220 ; GFX900-NEXT: ;;#ASMEND
5221 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5222 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
5223 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
5224 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
5225 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5226 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5228 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_4:
5230 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5231 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5232 ; GFX90A-NEXT: ;;#ASMSTART
5233 ; GFX90A-NEXT: ; def v[0:1]
5234 ; GFX90A-NEXT: ;;#ASMEND
5235 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5236 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
5237 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
5238 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
5239 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5240 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5242 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_4:
5244 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5245 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5246 ; GFX940-NEXT: ;;#ASMSTART
5247 ; GFX940-NEXT: ; def v[0:1]
5248 ; GFX940-NEXT: ;;#ASMEND
5249 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5250 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
5251 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5252 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
5253 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5254 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5255 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5256 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5257 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
5258 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5262 define void @v_shuffle_v3bf16_v4bf16__7_6_4(ptr addrspace(1) inreg %ptr) {
5263 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_4:
5265 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5266 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5267 ; GFX900-NEXT: ;;#ASMSTART
5268 ; GFX900-NEXT: ; def v[0:1]
5269 ; GFX900-NEXT: ;;#ASMEND
5270 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
5271 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
5272 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
5273 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5274 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5276 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_4:
5278 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5279 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5280 ; GFX90A-NEXT: ;;#ASMSTART
5281 ; GFX90A-NEXT: ; def v[0:1]
5282 ; GFX90A-NEXT: ;;#ASMEND
5283 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
5284 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
5285 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
5286 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5287 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5289 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_4:
5291 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5292 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5293 ; GFX940-NEXT: ;;#ASMSTART
5294 ; GFX940-NEXT: ; def v[0:1]
5295 ; GFX940-NEXT: ;;#ASMEND
5296 ; GFX940-NEXT: s_nop 0
5297 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
5298 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5299 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
5300 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5301 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5302 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5303 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5304 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
5305 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5309 define void @v_shuffle_v3bf16_v4bf16__u_5_5(ptr addrspace(1) inreg %ptr) {
5310 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__u_5_5:
5312 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5313 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5314 ; GFX900-NEXT: ;;#ASMSTART
5315 ; GFX900-NEXT: ; def v[0:1]
5316 ; GFX900-NEXT: ;;#ASMEND
5317 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
5318 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5319 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
5320 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5321 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5323 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__u_5_5:
5325 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5326 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5327 ; GFX90A-NEXT: ;;#ASMSTART
5328 ; GFX90A-NEXT: ; def v[0:1]
5329 ; GFX90A-NEXT: ;;#ASMEND
5330 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
5331 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5332 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
5333 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5334 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5336 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_5_5:
5338 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5339 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5340 ; GFX940-NEXT: ;;#ASMSTART
5341 ; GFX940-NEXT: ; def v[0:1]
5342 ; GFX940-NEXT: ;;#ASMEND
5343 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
5344 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5345 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5346 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5347 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5348 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5349 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5350 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
5351 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5355 define void @v_shuffle_v3bf16_v4bf16__0_5_5(ptr addrspace(1) inreg %ptr) {
5356 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_5_5:
5358 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5359 ; GFX900-NEXT: ;;#ASMSTART
5360 ; GFX900-NEXT: ; def v[0:1]
5361 ; GFX900-NEXT: ;;#ASMEND
5362 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
5363 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
5364 ; GFX900-NEXT: ;;#ASMSTART
5365 ; GFX900-NEXT: ; def v[1:2]
5366 ; GFX900-NEXT: ;;#ASMEND
5367 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
5368 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
5369 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1
5370 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
5371 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5372 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5374 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_5_5:
5376 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5377 ; GFX90A-NEXT: ;;#ASMSTART
5378 ; GFX90A-NEXT: ; def v[0:1]
5379 ; GFX90A-NEXT: ;;#ASMEND
5380 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
5381 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5382 ; GFX90A-NEXT: ;;#ASMSTART
5383 ; GFX90A-NEXT: ; def v[2:3]
5384 ; GFX90A-NEXT: ;;#ASMEND
5385 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2
5386 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5387 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2
5388 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
5389 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5390 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5392 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_5_5:
5394 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5395 ; GFX940-NEXT: ;;#ASMSTART
5396 ; GFX940-NEXT: ; def v[0:1]
5397 ; GFX940-NEXT: ;;#ASMEND
5398 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
5399 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5400 ; GFX940-NEXT: ;;#ASMSTART
5401 ; GFX940-NEXT: ; def v[2:3]
5402 ; GFX940-NEXT: ;;#ASMEND
5403 ; GFX940-NEXT: s_nop 0
5404 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2
5405 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5406 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2
5407 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
5408 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5409 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5410 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5411 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5412 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
5413 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5417 define void @v_shuffle_v3bf16_v4bf16__1_5_5(ptr addrspace(1) inreg %ptr) {
5418 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_5_5:
5420 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5421 ; GFX900-NEXT: ;;#ASMSTART
5422 ; GFX900-NEXT: ; def v[0:1]
5423 ; GFX900-NEXT: ;;#ASMEND
5424 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5425 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
5426 ; GFX900-NEXT: ;;#ASMSTART
5427 ; GFX900-NEXT: ; def v[1:2]
5428 ; GFX900-NEXT: ;;#ASMEND
5429 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
5430 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
5431 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1
5432 ; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4
5433 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5434 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5436 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_5_5:
5438 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5439 ; GFX90A-NEXT: ;;#ASMSTART
5440 ; GFX90A-NEXT: ; def v[0:1]
5441 ; GFX90A-NEXT: ;;#ASMEND
5442 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5443 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5444 ; GFX90A-NEXT: ;;#ASMSTART
5445 ; GFX90A-NEXT: ; def v[2:3]
5446 ; GFX90A-NEXT: ;;#ASMEND
5447 ; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4
5448 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5449 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2
5450 ; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4
5451 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5452 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5454 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_5_5:
5456 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5457 ; GFX940-NEXT: ;;#ASMSTART
5458 ; GFX940-NEXT: ; def v[0:1]
5459 ; GFX940-NEXT: ;;#ASMEND
5460 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5461 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5462 ; GFX940-NEXT: ;;#ASMSTART
5463 ; GFX940-NEXT: ; def v[2:3]
5464 ; GFX940-NEXT: ;;#ASMEND
5465 ; GFX940-NEXT: s_nop 0
5466 ; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2
5467 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5468 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2
5469 ; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
5470 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5471 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5472 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5473 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5474 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
5475 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5479 define void @v_shuffle_v3bf16_v4bf16__2_5_5(ptr addrspace(1) inreg %ptr) {
5480 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_5_5:
5482 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5483 ; GFX900-NEXT: ;;#ASMSTART
5484 ; GFX900-NEXT: ; def v[0:1]
5485 ; GFX900-NEXT: ;;#ASMEND
5486 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
5487 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
5488 ; GFX900-NEXT: ;;#ASMSTART
5489 ; GFX900-NEXT: ; def v[2:3]
5490 ; GFX900-NEXT: ;;#ASMEND
5491 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2
5492 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
5493 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
5494 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
5495 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5496 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5498 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_5_5:
5500 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5501 ; GFX90A-NEXT: ;;#ASMSTART
5502 ; GFX90A-NEXT: ; def v[0:1]
5503 ; GFX90A-NEXT: ;;#ASMEND
5504 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
5505 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5506 ; GFX90A-NEXT: ;;#ASMSTART
5507 ; GFX90A-NEXT: ; def v[2:3]
5508 ; GFX90A-NEXT: ;;#ASMEND
5509 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2
5510 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
5511 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
5512 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5513 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5514 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5516 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_5_5:
5518 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5519 ; GFX940-NEXT: ;;#ASMSTART
5520 ; GFX940-NEXT: ; def v[0:1]
5521 ; GFX940-NEXT: ;;#ASMEND
5522 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
5523 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5524 ; GFX940-NEXT: ;;#ASMSTART
5525 ; GFX940-NEXT: ; def v[2:3]
5526 ; GFX940-NEXT: ;;#ASMEND
5527 ; GFX940-NEXT: s_nop 0
5528 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2
5529 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
5530 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
5531 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5532 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5533 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5534 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5535 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5536 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
5537 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5541 define void @v_shuffle_v3bf16_v4bf16__3_5_5(ptr addrspace(1) inreg %ptr) {
5542 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_5_5:
5544 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5545 ; GFX900-NEXT: ;;#ASMSTART
5546 ; GFX900-NEXT: ; def v[0:1]
5547 ; GFX900-NEXT: ;;#ASMEND
5548 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5549 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
5550 ; GFX900-NEXT: ;;#ASMSTART
5551 ; GFX900-NEXT: ; def v[2:3]
5552 ; GFX900-NEXT: ;;#ASMEND
5553 ; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4
5554 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
5555 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
5556 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
5557 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5558 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5560 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_5_5:
5562 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5563 ; GFX90A-NEXT: ;;#ASMSTART
5564 ; GFX90A-NEXT: ; def v[0:1]
5565 ; GFX90A-NEXT: ;;#ASMEND
5566 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5567 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5568 ; GFX90A-NEXT: ;;#ASMSTART
5569 ; GFX90A-NEXT: ; def v[2:3]
5570 ; GFX90A-NEXT: ;;#ASMEND
5571 ; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4
5572 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2
5573 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5574 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
5575 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5576 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5578 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_5_5:
5580 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5581 ; GFX940-NEXT: ;;#ASMSTART
5582 ; GFX940-NEXT: ; def v[0:1]
5583 ; GFX940-NEXT: ;;#ASMEND
5584 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5585 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5586 ; GFX940-NEXT: ;;#ASMSTART
5587 ; GFX940-NEXT: ; def v[2:3]
5588 ; GFX940-NEXT: ;;#ASMEND
5589 ; GFX940-NEXT: s_nop 0
5590 ; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2
5591 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2
5592 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5593 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
5594 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5595 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5596 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5597 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5598 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
5599 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5603 define void @v_shuffle_v3bf16_v4bf16__4_5_5(ptr addrspace(1) inreg %ptr) {
5604 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__4_5_5:
5606 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5607 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5608 ; GFX900-NEXT: ;;#ASMSTART
5609 ; GFX900-NEXT: ; def v[0:1]
5610 ; GFX900-NEXT: ;;#ASMEND
5611 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
5612 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5613 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
5614 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5615 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5617 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__4_5_5:
5619 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5620 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5621 ; GFX90A-NEXT: ;;#ASMSTART
5622 ; GFX90A-NEXT: ; def v[0:1]
5623 ; GFX90A-NEXT: ;;#ASMEND
5624 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
5625 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5626 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
5627 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5628 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5630 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_5_5:
5632 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5633 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5634 ; GFX940-NEXT: ;;#ASMSTART
5635 ; GFX940-NEXT: ; def v[0:1]
5636 ; GFX940-NEXT: ;;#ASMEND
5637 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
5638 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5639 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5640 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5641 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5642 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5643 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5644 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
5645 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5649 define void @v_shuffle_v3bf16_v4bf16__5_5_5(ptr addrspace(1) inreg %ptr) {
5650 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_5_5:
5652 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5653 ; GFX900-NEXT: ;;#ASMSTART
5654 ; GFX900-NEXT: ; def v[0:1]
5655 ; GFX900-NEXT: ;;#ASMEND
5656 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5657 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5658 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
5659 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5660 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
5661 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
5662 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5663 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5665 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_5_5:
5667 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5668 ; GFX90A-NEXT: ;;#ASMSTART
5669 ; GFX90A-NEXT: ; def v[0:1]
5670 ; GFX90A-NEXT: ;;#ASMEND
5671 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5672 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5673 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
5674 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5675 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
5676 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
5677 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5678 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5680 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_5_5:
5682 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5683 ; GFX940-NEXT: ;;#ASMSTART
5684 ; GFX940-NEXT: ; def v[0:1]
5685 ; GFX940-NEXT: ;;#ASMEND
5686 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5687 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5688 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
5689 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5690 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
5691 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5692 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5693 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5694 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5695 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5696 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
5697 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5701 define void @v_shuffle_v3bf16_v4bf16__6_5_5(ptr addrspace(1) inreg %ptr) {
5702 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_5_5:
5704 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5705 ; GFX900-NEXT: ;;#ASMSTART
5706 ; GFX900-NEXT: ; def v[0:1]
5707 ; GFX900-NEXT: ;;#ASMEND
5708 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
5709 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5710 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0
5711 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5712 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
5713 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
5714 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5715 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5717 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_5_5:
5719 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5720 ; GFX90A-NEXT: ;;#ASMSTART
5721 ; GFX90A-NEXT: ; def v[0:1]
5722 ; GFX90A-NEXT: ;;#ASMEND
5723 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
5724 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5725 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0
5726 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5727 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
5728 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
5729 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5730 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5732 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_5_5:
5734 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5735 ; GFX940-NEXT: ;;#ASMSTART
5736 ; GFX940-NEXT: ; def v[0:1]
5737 ; GFX940-NEXT: ;;#ASMEND
5738 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
5739 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5740 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0
5741 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5742 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5743 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
5744 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5745 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5746 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5747 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5748 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
5749 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5753 define void @v_shuffle_v3bf16_v4bf16__7_5_5(ptr addrspace(1) inreg %ptr) {
5754 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_5:
5756 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5757 ; GFX900-NEXT: ;;#ASMSTART
5758 ; GFX900-NEXT: ; def v[0:1]
5759 ; GFX900-NEXT: ;;#ASMEND
5760 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5761 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5762 ; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4
5763 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5764 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
5765 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
5766 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5767 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5769 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_5:
5771 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5772 ; GFX90A-NEXT: ;;#ASMSTART
5773 ; GFX90A-NEXT: ; def v[0:1]
5774 ; GFX90A-NEXT: ;;#ASMEND
5775 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5776 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5777 ; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4
5778 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5779 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
5780 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
5781 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5782 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5784 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_5:
5786 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5787 ; GFX940-NEXT: ;;#ASMSTART
5788 ; GFX940-NEXT: ; def v[0:1]
5789 ; GFX940-NEXT: ;;#ASMEND
5790 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5791 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5792 ; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2
5793 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5794 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
5795 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5796 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5797 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5798 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5799 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5800 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
5801 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5805 define void @v_shuffle_v3bf16_v4bf16__7_u_5(ptr addrspace(1) inreg %ptr) {
5806 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_5:
5808 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5809 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
5810 ; GFX900-NEXT: ;;#ASMSTART
5811 ; GFX900-NEXT: ; def v[0:1]
5812 ; GFX900-NEXT: ;;#ASMEND
5813 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
5814 ; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
5815 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
5816 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5817 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5819 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_5:
5821 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5822 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
5823 ; GFX90A-NEXT: ;;#ASMSTART
5824 ; GFX90A-NEXT: ; def v[0:1]
5825 ; GFX90A-NEXT: ;;#ASMEND
5826 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
5827 ; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
5828 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
5829 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5830 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5832 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_5:
5834 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5835 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
5836 ; GFX940-NEXT: ;;#ASMSTART
5837 ; GFX940-NEXT: ; def v[0:1]
5838 ; GFX940-NEXT: ;;#ASMEND
5839 ; GFX940-NEXT: s_nop 0
5840 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
5841 ; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
5842 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
5843 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5844 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5845 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5846 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5847 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
5848 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5852 define void @v_shuffle_v3bf16_v4bf16__7_0_5(ptr addrspace(1) inreg %ptr) {
5853 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_5:
5855 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5856 ; GFX900-NEXT: ;;#ASMSTART
5857 ; GFX900-NEXT: ; def v[0:1]
5858 ; GFX900-NEXT: ;;#ASMEND
5859 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
5860 ; GFX900-NEXT: ;;#ASMSTART
5861 ; GFX900-NEXT: ; def v[1:2]
5862 ; GFX900-NEXT: ;;#ASMEND
5863 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16
5864 ; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4
5865 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
5866 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5867 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5869 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_5:
5871 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5872 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5873 ; GFX90A-NEXT: ;;#ASMSTART
5874 ; GFX90A-NEXT: ; def v[0:1]
5875 ; GFX90A-NEXT: ;;#ASMEND
5876 ; GFX90A-NEXT: ;;#ASMSTART
5877 ; GFX90A-NEXT: ; def v[2:3]
5878 ; GFX90A-NEXT: ;;#ASMEND
5879 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
5880 ; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4
5881 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5882 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5883 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5885 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_5:
5887 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5888 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5889 ; GFX940-NEXT: ;;#ASMSTART
5890 ; GFX940-NEXT: ; def v[0:1]
5891 ; GFX940-NEXT: ;;#ASMEND
5892 ; GFX940-NEXT: ;;#ASMSTART
5893 ; GFX940-NEXT: ; def v[2:3]
5894 ; GFX940-NEXT: ;;#ASMEND
5895 ; GFX940-NEXT: s_nop 0
5896 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
5897 ; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
5898 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5899 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5900 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5901 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5902 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5903 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
5904 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5908 define void @v_shuffle_v3bf16_v4bf16__7_1_5(ptr addrspace(1) inreg %ptr) {
5909 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_5:
5911 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5912 ; GFX900-NEXT: ;;#ASMSTART
5913 ; GFX900-NEXT: ; def v[0:1]
5914 ; GFX900-NEXT: ;;#ASMEND
5915 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
5916 ; GFX900-NEXT: ;;#ASMSTART
5917 ; GFX900-NEXT: ; def v[1:2]
5918 ; GFX900-NEXT: ;;#ASMEND
5919 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
5920 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
5921 ; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4
5922 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
5923 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5924 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5926 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_5:
5928 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5929 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5930 ; GFX90A-NEXT: ;;#ASMSTART
5931 ; GFX90A-NEXT: ; def v[0:1]
5932 ; GFX90A-NEXT: ;;#ASMEND
5933 ; GFX90A-NEXT: ;;#ASMSTART
5934 ; GFX90A-NEXT: ; def v[2:3]
5935 ; GFX90A-NEXT: ;;#ASMEND
5936 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
5937 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
5938 ; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4
5939 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5940 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5941 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5943 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_5:
5945 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5946 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
5947 ; GFX940-NEXT: ;;#ASMSTART
5948 ; GFX940-NEXT: ; def v[0:1]
5949 ; GFX940-NEXT: ;;#ASMEND
5950 ; GFX940-NEXT: ;;#ASMSTART
5951 ; GFX940-NEXT: ; def v[2:3]
5952 ; GFX940-NEXT: ;;#ASMEND
5953 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
5954 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
5955 ; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
5956 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
5957 ; GFX940-NEXT: s_waitcnt vmcnt(0)
5958 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5959 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
5960 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
5961 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
5962 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
5966 define void @v_shuffle_v3bf16_v4bf16__7_2_5(ptr addrspace(1) inreg %ptr) {
5967 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_5:
5969 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5970 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
5971 ; GFX900-NEXT: ;;#ASMSTART
5972 ; GFX900-NEXT: ; def v[0:1]
5973 ; GFX900-NEXT: ;;#ASMEND
5974 ; GFX900-NEXT: ;;#ASMSTART
5975 ; GFX900-NEXT: ; def v[2:3]
5976 ; GFX900-NEXT: ;;#ASMEND
5977 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
5978 ; GFX900-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4
5979 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
5980 ; GFX900-NEXT: s_waitcnt vmcnt(0)
5981 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5983 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_5:
5985 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5986 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
5987 ; GFX90A-NEXT: ;;#ASMSTART
5988 ; GFX90A-NEXT: ; def v[0:1]
5989 ; GFX90A-NEXT: ;;#ASMEND
5990 ; GFX90A-NEXT: ;;#ASMSTART
5991 ; GFX90A-NEXT: ; def v[2:3]
5992 ; GFX90A-NEXT: ;;#ASMEND
5993 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
5994 ; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4
5995 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
5996 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
5997 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5999 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_5:
6001 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6002 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6003 ; GFX940-NEXT: ;;#ASMSTART
6004 ; GFX940-NEXT: ; def v[0:1]
6005 ; GFX940-NEXT: ;;#ASMEND
6006 ; GFX940-NEXT: ;;#ASMSTART
6007 ; GFX940-NEXT: ; def v[2:3]
6008 ; GFX940-NEXT: ;;#ASMEND
6009 ; GFX940-NEXT: s_nop 0
6010 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
6011 ; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
6012 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6013 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6014 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6015 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6016 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6017 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
6018 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6022 define void @v_shuffle_v3bf16_v4bf16__7_3_5(ptr addrspace(1) inreg %ptr) {
6023 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_5:
6025 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6026 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
6027 ; GFX900-NEXT: ;;#ASMSTART
6028 ; GFX900-NEXT: ; def v[0:1]
6029 ; GFX900-NEXT: ;;#ASMEND
6030 ; GFX900-NEXT: ;;#ASMSTART
6031 ; GFX900-NEXT: ; def v[2:3]
6032 ; GFX900-NEXT: ;;#ASMEND
6033 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
6034 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
6035 ; GFX900-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4
6036 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
6037 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6038 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6040 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_5:
6042 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6043 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6044 ; GFX90A-NEXT: ;;#ASMSTART
6045 ; GFX90A-NEXT: ; def v[0:1]
6046 ; GFX90A-NEXT: ;;#ASMEND
6047 ; GFX90A-NEXT: ;;#ASMSTART
6048 ; GFX90A-NEXT: ; def v[2:3]
6049 ; GFX90A-NEXT: ;;#ASMEND
6050 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
6051 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
6052 ; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4
6053 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6054 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6055 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6057 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_5:
6059 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6060 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6061 ; GFX940-NEXT: ;;#ASMSTART
6062 ; GFX940-NEXT: ; def v[0:1]
6063 ; GFX940-NEXT: ;;#ASMEND
6064 ; GFX940-NEXT: ;;#ASMSTART
6065 ; GFX940-NEXT: ; def v[2:3]
6066 ; GFX940-NEXT: ;;#ASMEND
6067 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
6068 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
6069 ; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
6070 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6071 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6072 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6073 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6074 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6075 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
6076 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6080 define void @v_shuffle_v3bf16_v4bf16__7_4_5(ptr addrspace(1) inreg %ptr) {
6081 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_5:
6083 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6084 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6085 ; GFX900-NEXT: ;;#ASMSTART
6086 ; GFX900-NEXT: ; def v[0:1]
6087 ; GFX900-NEXT: ;;#ASMEND
6088 ; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16
6089 ; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
6090 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
6091 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6092 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6094 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_5:
6096 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6097 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6098 ; GFX90A-NEXT: ;;#ASMSTART
6099 ; GFX90A-NEXT: ; def v[0:1]
6100 ; GFX90A-NEXT: ;;#ASMEND
6101 ; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16
6102 ; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
6103 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
6104 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6105 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6107 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_5:
6109 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6110 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6111 ; GFX940-NEXT: ;;#ASMSTART
6112 ; GFX940-NEXT: ; def v[0:1]
6113 ; GFX940-NEXT: ;;#ASMEND
6114 ; GFX940-NEXT: s_nop 0
6115 ; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16
6116 ; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
6117 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
6118 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6119 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6120 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6121 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6122 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
6123 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6127 define void @v_shuffle_v3bf16_v4bf16__7_6_5(ptr addrspace(1) inreg %ptr) {
6128 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_5:
6130 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6131 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6132 ; GFX900-NEXT: ;;#ASMSTART
6133 ; GFX900-NEXT: ; def v[0:1]
6134 ; GFX900-NEXT: ;;#ASMEND
6135 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
6136 ; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
6137 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
6138 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6139 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6141 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_5:
6143 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6144 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6145 ; GFX90A-NEXT: ;;#ASMSTART
6146 ; GFX90A-NEXT: ; def v[0:1]
6147 ; GFX90A-NEXT: ;;#ASMEND
6148 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
6149 ; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4
6150 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
6151 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6152 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6154 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_5:
6156 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6157 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6158 ; GFX940-NEXT: ;;#ASMSTART
6159 ; GFX940-NEXT: ; def v[0:1]
6160 ; GFX940-NEXT: ;;#ASMEND
6161 ; GFX940-NEXT: s_nop 0
6162 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
6163 ; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
6164 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
6165 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6166 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6167 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6168 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6169 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
6170 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6174 define void @v_shuffle_v3bf16_v4bf16__u_6_6(ptr addrspace(1) inreg %ptr) {
6175 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__u_6_6:
6177 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6178 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6179 ; GFX900-NEXT: ;;#ASMSTART
6180 ; GFX900-NEXT: ; def v[0:1]
6181 ; GFX900-NEXT: ;;#ASMEND
6182 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
6183 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6184 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6185 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6186 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6188 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__u_6_6:
6190 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6191 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6192 ; GFX90A-NEXT: ;;#ASMSTART
6193 ; GFX90A-NEXT: ; def v[0:1]
6194 ; GFX90A-NEXT: ;;#ASMEND
6195 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
6196 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6197 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6198 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6199 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6201 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_6_6:
6203 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6204 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6205 ; GFX940-NEXT: ;;#ASMSTART
6206 ; GFX940-NEXT: ; def v[0:1]
6207 ; GFX940-NEXT: ;;#ASMEND
6208 ; GFX940-NEXT: s_nop 0
6209 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
6210 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6211 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
6212 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6213 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6214 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6215 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6216 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
6217 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6221 define void @v_shuffle_v3bf16_v4bf16__0_6_6(ptr addrspace(1) inreg %ptr) {
6222 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_6_6:
6224 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6225 ; GFX900-NEXT: ;;#ASMSTART
6226 ; GFX900-NEXT: ; def v[0:1]
6227 ; GFX900-NEXT: ;;#ASMEND
6228 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
6229 ; GFX900-NEXT: ;;#ASMSTART
6230 ; GFX900-NEXT: ; def v[1:2]
6231 ; GFX900-NEXT: ;;#ASMEND
6232 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
6233 ; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4
6234 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
6235 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
6236 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6237 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6239 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_6_6:
6241 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6242 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6243 ; GFX90A-NEXT: ;;#ASMSTART
6244 ; GFX90A-NEXT: ; def v[0:1]
6245 ; GFX90A-NEXT: ;;#ASMEND
6246 ; GFX90A-NEXT: ;;#ASMSTART
6247 ; GFX90A-NEXT: ; def v[2:3]
6248 ; GFX90A-NEXT: ;;#ASMEND
6249 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
6250 ; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4
6251 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6252 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6253 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6254 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6256 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_6_6:
6258 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6259 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6260 ; GFX940-NEXT: ;;#ASMSTART
6261 ; GFX940-NEXT: ; def v[0:1]
6262 ; GFX940-NEXT: ;;#ASMEND
6263 ; GFX940-NEXT: ;;#ASMSTART
6264 ; GFX940-NEXT: ; def v[2:3]
6265 ; GFX940-NEXT: ;;#ASMEND
6266 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
6267 ; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2
6268 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6269 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6270 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6271 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6272 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6273 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6274 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
6275 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6279 define void @v_shuffle_v3bf16_v4bf16__1_6_6(ptr addrspace(1) inreg %ptr) {
6280 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_6_6:
6282 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6283 ; GFX900-NEXT: ;;#ASMSTART
6284 ; GFX900-NEXT: ; def v[0:1]
6285 ; GFX900-NEXT: ;;#ASMEND
6286 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
6287 ; GFX900-NEXT: ;;#ASMSTART
6288 ; GFX900-NEXT: ; def v[1:2]
6289 ; GFX900-NEXT: ;;#ASMEND
6290 ; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16
6291 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
6292 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
6293 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6294 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6296 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_6_6:
6298 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6299 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6300 ; GFX90A-NEXT: ;;#ASMSTART
6301 ; GFX90A-NEXT: ; def v[0:1]
6302 ; GFX90A-NEXT: ;;#ASMEND
6303 ; GFX90A-NEXT: ;;#ASMSTART
6304 ; GFX90A-NEXT: ; def v[2:3]
6305 ; GFX90A-NEXT: ;;#ASMEND
6306 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16
6307 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6308 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6309 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6310 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6312 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_6_6:
6314 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6315 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6316 ; GFX940-NEXT: ;;#ASMSTART
6317 ; GFX940-NEXT: ; def v[0:1]
6318 ; GFX940-NEXT: ;;#ASMEND
6319 ; GFX940-NEXT: ;;#ASMSTART
6320 ; GFX940-NEXT: ; def v[2:3]
6321 ; GFX940-NEXT: ;;#ASMEND
6322 ; GFX940-NEXT: s_nop 0
6323 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16
6324 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6325 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6326 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6327 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6328 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6329 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6330 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
6331 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6335 define void @v_shuffle_v3bf16_v4bf16__2_6_6(ptr addrspace(1) inreg %ptr) {
6336 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_6_6:
6338 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6339 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
6340 ; GFX900-NEXT: ;;#ASMSTART
6341 ; GFX900-NEXT: ; def v[0:1]
6342 ; GFX900-NEXT: ;;#ASMEND
6343 ; GFX900-NEXT: ;;#ASMSTART
6344 ; GFX900-NEXT: ; def v[2:3]
6345 ; GFX900-NEXT: ;;#ASMEND
6346 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
6347 ; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4
6348 ; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4
6349 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
6350 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6351 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6353 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_6_6:
6355 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6356 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6357 ; GFX90A-NEXT: ;;#ASMSTART
6358 ; GFX90A-NEXT: ; def v[0:1]
6359 ; GFX90A-NEXT: ;;#ASMEND
6360 ; GFX90A-NEXT: ;;#ASMSTART
6361 ; GFX90A-NEXT: ; def v[2:3]
6362 ; GFX90A-NEXT: ;;#ASMEND
6363 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
6364 ; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4
6365 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6366 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6367 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6368 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6370 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_6_6:
6372 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6373 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6374 ; GFX940-NEXT: ;;#ASMSTART
6375 ; GFX940-NEXT: ; def v[0:1]
6376 ; GFX940-NEXT: ;;#ASMEND
6377 ; GFX940-NEXT: ;;#ASMSTART
6378 ; GFX940-NEXT: ; def v[2:3]
6379 ; GFX940-NEXT: ;;#ASMEND
6380 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
6381 ; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2
6382 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6383 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6384 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6385 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6386 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6387 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6388 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
6389 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6393 define void @v_shuffle_v3bf16_v4bf16__3_6_6(ptr addrspace(1) inreg %ptr) {
6394 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_6_6:
6396 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6397 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
6398 ; GFX900-NEXT: ;;#ASMSTART
6399 ; GFX900-NEXT: ; def v[0:1]
6400 ; GFX900-NEXT: ;;#ASMEND
6401 ; GFX900-NEXT: ;;#ASMSTART
6402 ; GFX900-NEXT: ; def v[2:3]
6403 ; GFX900-NEXT: ;;#ASMEND
6404 ; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16
6405 ; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4
6406 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
6407 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6408 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6410 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_6_6:
6412 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6413 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6414 ; GFX90A-NEXT: ;;#ASMSTART
6415 ; GFX90A-NEXT: ; def v[0:1]
6416 ; GFX90A-NEXT: ;;#ASMEND
6417 ; GFX90A-NEXT: ;;#ASMSTART
6418 ; GFX90A-NEXT: ; def v[2:3]
6419 ; GFX90A-NEXT: ;;#ASMEND
6420 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16
6421 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6422 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6423 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6424 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6426 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_6_6:
6428 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6429 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6430 ; GFX940-NEXT: ;;#ASMSTART
6431 ; GFX940-NEXT: ; def v[0:1]
6432 ; GFX940-NEXT: ;;#ASMEND
6433 ; GFX940-NEXT: ;;#ASMSTART
6434 ; GFX940-NEXT: ; def v[2:3]
6435 ; GFX940-NEXT: ;;#ASMEND
6436 ; GFX940-NEXT: s_nop 0
6437 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16
6438 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6439 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6440 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6441 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6442 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6443 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6444 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
6445 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6449 define void @v_shuffle_v3bf16_v4bf16__4_6_6(ptr addrspace(1) inreg %ptr) {
6450 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__4_6_6:
6452 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6453 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6454 ; GFX900-NEXT: ;;#ASMSTART
6455 ; GFX900-NEXT: ; def v[0:1]
6456 ; GFX900-NEXT: ;;#ASMEND
6457 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
6458 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
6459 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6460 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6461 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6462 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6464 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__4_6_6:
6466 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6467 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6468 ; GFX90A-NEXT: ;;#ASMSTART
6469 ; GFX90A-NEXT: ; def v[0:1]
6470 ; GFX90A-NEXT: ;;#ASMEND
6471 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
6472 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
6473 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6474 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6475 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6476 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6478 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_6_6:
6480 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6481 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6482 ; GFX940-NEXT: ;;#ASMSTART
6483 ; GFX940-NEXT: ; def v[0:1]
6484 ; GFX940-NEXT: ;;#ASMEND
6485 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
6486 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
6487 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6488 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
6489 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6490 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6491 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6492 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6493 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
6494 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6498 define void @v_shuffle_v3bf16_v4bf16__5_6_6(ptr addrspace(1) inreg %ptr) {
6499 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_6_6:
6501 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6502 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6503 ; GFX900-NEXT: ;;#ASMSTART
6504 ; GFX900-NEXT: ; def v[0:1]
6505 ; GFX900-NEXT: ;;#ASMEND
6506 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16
6507 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6508 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6509 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6510 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6512 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_6_6:
6514 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6516 ; GFX90A-NEXT: ;;#ASMSTART
6517 ; GFX90A-NEXT: ; def v[0:1]
6518 ; GFX90A-NEXT: ;;#ASMEND
6519 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16
6520 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6521 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6522 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6523 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6525 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_6_6:
6527 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6528 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6529 ; GFX940-NEXT: ;;#ASMSTART
6530 ; GFX940-NEXT: ; def v[0:1]
6531 ; GFX940-NEXT: ;;#ASMEND
6532 ; GFX940-NEXT: s_nop 0
6533 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16
6534 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6535 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
6536 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6537 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6538 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6539 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6540 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
6541 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6545 define void @v_shuffle_v3bf16_v4bf16__6_6_6(ptr addrspace(1) inreg %ptr) {
6546 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_6_6:
6548 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6549 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6550 ; GFX900-NEXT: ;;#ASMSTART
6551 ; GFX900-NEXT: ; def v[0:1]
6552 ; GFX900-NEXT: ;;#ASMEND
6553 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
6554 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
6555 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6556 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6557 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6558 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6560 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_6_6:
6562 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6563 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6564 ; GFX90A-NEXT: ;;#ASMSTART
6565 ; GFX90A-NEXT: ; def v[0:1]
6566 ; GFX90A-NEXT: ;;#ASMEND
6567 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
6568 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
6569 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6570 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6571 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6572 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6574 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_6_6:
6576 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6577 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6578 ; GFX940-NEXT: ;;#ASMSTART
6579 ; GFX940-NEXT: ; def v[0:1]
6580 ; GFX940-NEXT: ;;#ASMEND
6581 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
6582 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
6583 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6584 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
6585 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6586 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6587 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6588 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6589 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
6590 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6594 define void @v_shuffle_v3bf16_v4bf16__7_6_6(ptr addrspace(1) inreg %ptr) {
6595 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_6:
6597 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6598 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6599 ; GFX900-NEXT: ;;#ASMSTART
6600 ; GFX900-NEXT: ; def v[0:1]
6601 ; GFX900-NEXT: ;;#ASMEND
6602 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16
6603 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6604 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6605 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6606 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6608 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_6:
6610 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6611 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6612 ; GFX90A-NEXT: ;;#ASMSTART
6613 ; GFX90A-NEXT: ; def v[0:1]
6614 ; GFX90A-NEXT: ;;#ASMEND
6615 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16
6616 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6617 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6618 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6619 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6621 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_6:
6623 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6624 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6625 ; GFX940-NEXT: ;;#ASMSTART
6626 ; GFX940-NEXT: ; def v[0:1]
6627 ; GFX940-NEXT: ;;#ASMEND
6628 ; GFX940-NEXT: s_nop 0
6629 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16
6630 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6631 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
6632 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6633 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6634 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6635 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6636 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
6637 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6641 define void @v_shuffle_v3bf16_v4bf16__7_u_6(ptr addrspace(1) inreg %ptr) {
6642 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_6:
6644 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6645 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6646 ; GFX900-NEXT: ;;#ASMSTART
6647 ; GFX900-NEXT: ; def v[0:1]
6648 ; GFX900-NEXT: ;;#ASMEND
6649 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
6650 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6651 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6652 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6653 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6655 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_6:
6657 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6658 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6659 ; GFX90A-NEXT: ;;#ASMSTART
6660 ; GFX90A-NEXT: ; def v[0:1]
6661 ; GFX90A-NEXT: ;;#ASMEND
6662 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
6663 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6664 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6665 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6666 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6668 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_6:
6670 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6671 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6672 ; GFX940-NEXT: ;;#ASMSTART
6673 ; GFX940-NEXT: ; def v[0:1]
6674 ; GFX940-NEXT: ;;#ASMEND
6675 ; GFX940-NEXT: s_nop 0
6676 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
6677 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6678 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
6679 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6680 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6681 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6682 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6683 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
6684 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6688 define void @v_shuffle_v3bf16_v4bf16__7_0_6(ptr addrspace(1) inreg %ptr) {
6689 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_6:
6691 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6692 ; GFX900-NEXT: ;;#ASMSTART
6693 ; GFX900-NEXT: ; def v[0:1]
6694 ; GFX900-NEXT: ;;#ASMEND
6695 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
6696 ; GFX900-NEXT: ;;#ASMSTART
6697 ; GFX900-NEXT: ; def v[1:2]
6698 ; GFX900-NEXT: ;;#ASMEND
6699 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16
6700 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
6701 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
6702 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6703 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6705 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_6:
6707 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6708 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6709 ; GFX90A-NEXT: ;;#ASMSTART
6710 ; GFX90A-NEXT: ; def v[0:1]
6711 ; GFX90A-NEXT: ;;#ASMEND
6712 ; GFX90A-NEXT: ;;#ASMSTART
6713 ; GFX90A-NEXT: ; def v[2:3]
6714 ; GFX90A-NEXT: ;;#ASMEND
6715 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
6716 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6717 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6718 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6719 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6721 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_6:
6723 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6724 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6725 ; GFX940-NEXT: ;;#ASMSTART
6726 ; GFX940-NEXT: ; def v[0:1]
6727 ; GFX940-NEXT: ;;#ASMEND
6728 ; GFX940-NEXT: ;;#ASMSTART
6729 ; GFX940-NEXT: ; def v[2:3]
6730 ; GFX940-NEXT: ;;#ASMEND
6731 ; GFX940-NEXT: s_nop 0
6732 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
6733 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6734 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6735 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6736 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6737 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6738 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6739 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 6>
6740 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6744 define void @v_shuffle_v3bf16_v4bf16__7_1_6(ptr addrspace(1) inreg %ptr) {
6745 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_6:
6747 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6748 ; GFX900-NEXT: ;;#ASMSTART
6749 ; GFX900-NEXT: ; def v[0:1]
6750 ; GFX900-NEXT: ;;#ASMEND
6751 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
6752 ; GFX900-NEXT: ;;#ASMSTART
6753 ; GFX900-NEXT: ; def v[1:2]
6754 ; GFX900-NEXT: ;;#ASMEND
6755 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
6756 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
6757 ; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4
6758 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
6759 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6760 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6762 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_6:
6764 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6765 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6766 ; GFX90A-NEXT: ;;#ASMSTART
6767 ; GFX90A-NEXT: ; def v[0:1]
6768 ; GFX90A-NEXT: ;;#ASMEND
6769 ; GFX90A-NEXT: ;;#ASMSTART
6770 ; GFX90A-NEXT: ; def v[2:3]
6771 ; GFX90A-NEXT: ;;#ASMEND
6772 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
6773 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
6774 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6775 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6776 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6777 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6779 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_6:
6781 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6782 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6783 ; GFX940-NEXT: ;;#ASMSTART
6784 ; GFX940-NEXT: ; def v[0:1]
6785 ; GFX940-NEXT: ;;#ASMEND
6786 ; GFX940-NEXT: ;;#ASMSTART
6787 ; GFX940-NEXT: ; def v[2:3]
6788 ; GFX940-NEXT: ;;#ASMEND
6789 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
6790 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
6791 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6792 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6793 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6794 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6795 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6796 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6797 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 6>
6798 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6802 define void @v_shuffle_v3bf16_v4bf16__7_2_6(ptr addrspace(1) inreg %ptr) {
6803 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_6:
6805 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6806 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
6807 ; GFX900-NEXT: ;;#ASMSTART
6808 ; GFX900-NEXT: ; def v[0:1]
6809 ; GFX900-NEXT: ;;#ASMEND
6810 ; GFX900-NEXT: ;;#ASMSTART
6811 ; GFX900-NEXT: ; def v[2:3]
6812 ; GFX900-NEXT: ;;#ASMEND
6813 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
6814 ; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4
6815 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
6816 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6817 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6819 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_6:
6821 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6822 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6823 ; GFX90A-NEXT: ;;#ASMSTART
6824 ; GFX90A-NEXT: ; def v[0:1]
6825 ; GFX90A-NEXT: ;;#ASMEND
6826 ; GFX90A-NEXT: ;;#ASMSTART
6827 ; GFX90A-NEXT: ; def v[2:3]
6828 ; GFX90A-NEXT: ;;#ASMEND
6829 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
6830 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6831 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6832 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6833 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6835 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_6:
6837 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6838 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6839 ; GFX940-NEXT: ;;#ASMSTART
6840 ; GFX940-NEXT: ; def v[0:1]
6841 ; GFX940-NEXT: ;;#ASMEND
6842 ; GFX940-NEXT: ;;#ASMSTART
6843 ; GFX940-NEXT: ; def v[2:3]
6844 ; GFX940-NEXT: ;;#ASMEND
6845 ; GFX940-NEXT: s_nop 0
6846 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
6847 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6848 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6849 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6850 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6851 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6852 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6853 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 6>
6854 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6858 define void @v_shuffle_v3bf16_v4bf16__7_3_6(ptr addrspace(1) inreg %ptr) {
6859 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_6:
6861 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6862 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
6863 ; GFX900-NEXT: ;;#ASMSTART
6864 ; GFX900-NEXT: ; def v[0:1]
6865 ; GFX900-NEXT: ;;#ASMEND
6866 ; GFX900-NEXT: ;;#ASMSTART
6867 ; GFX900-NEXT: ; def v[2:3]
6868 ; GFX900-NEXT: ;;#ASMEND
6869 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
6870 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
6871 ; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4
6872 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
6873 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6874 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6876 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_6:
6878 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6879 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
6880 ; GFX90A-NEXT: ;;#ASMSTART
6881 ; GFX90A-NEXT: ; def v[0:1]
6882 ; GFX90A-NEXT: ;;#ASMEND
6883 ; GFX90A-NEXT: ;;#ASMSTART
6884 ; GFX90A-NEXT: ; def v[2:3]
6885 ; GFX90A-NEXT: ;;#ASMEND
6886 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
6887 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
6888 ; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4
6889 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
6890 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6891 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6893 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_6:
6895 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6896 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
6897 ; GFX940-NEXT: ;;#ASMSTART
6898 ; GFX940-NEXT: ; def v[0:1]
6899 ; GFX940-NEXT: ;;#ASMEND
6900 ; GFX940-NEXT: ;;#ASMSTART
6901 ; GFX940-NEXT: ; def v[2:3]
6902 ; GFX940-NEXT: ;;#ASMEND
6903 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
6904 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
6905 ; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6906 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
6907 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6908 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6909 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6910 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6911 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 6>
6912 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6916 define void @v_shuffle_v3bf16_v4bf16__7_4_6(ptr addrspace(1) inreg %ptr) {
6917 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_6:
6919 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6920 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6921 ; GFX900-NEXT: ;;#ASMSTART
6922 ; GFX900-NEXT: ; def v[0:1]
6923 ; GFX900-NEXT: ;;#ASMEND
6924 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16
6925 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6926 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6927 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6928 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6930 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_6:
6932 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6933 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6934 ; GFX90A-NEXT: ;;#ASMSTART
6935 ; GFX90A-NEXT: ; def v[0:1]
6936 ; GFX90A-NEXT: ;;#ASMEND
6937 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16
6938 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6939 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6940 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6941 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6943 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_6:
6945 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6946 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6947 ; GFX940-NEXT: ;;#ASMSTART
6948 ; GFX940-NEXT: ; def v[0:1]
6949 ; GFX940-NEXT: ;;#ASMEND
6950 ; GFX940-NEXT: s_nop 0
6951 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16
6952 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6953 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
6954 ; GFX940-NEXT: s_waitcnt vmcnt(0)
6955 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6956 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
6957 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
6958 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
6959 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
6963 define void @v_shuffle_v3bf16_v4bf16__7_5_6(ptr addrspace(1) inreg %ptr) {
6964 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_6:
6966 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6967 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
6968 ; GFX900-NEXT: ;;#ASMSTART
6969 ; GFX900-NEXT: ; def v[0:1]
6970 ; GFX900-NEXT: ;;#ASMEND
6971 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
6972 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
6973 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
6974 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
6975 ; GFX900-NEXT: s_waitcnt vmcnt(0)
6976 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6978 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_6:
6980 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6981 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
6982 ; GFX90A-NEXT: ;;#ASMSTART
6983 ; GFX90A-NEXT: ; def v[0:1]
6984 ; GFX90A-NEXT: ;;#ASMEND
6985 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
6986 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
6987 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
6988 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
6989 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
6990 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6992 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_6:
6994 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6995 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
6996 ; GFX940-NEXT: ;;#ASMSTART
6997 ; GFX940-NEXT: ; def v[0:1]
6998 ; GFX940-NEXT: ;;#ASMEND
6999 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
7000 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
7001 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7002 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
7003 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7004 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7005 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7006 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7007 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
7008 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7012 define void @v_shuffle_v3bf16_v4bf16__u_7_7(ptr addrspace(1) inreg %ptr) {
7013 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__u_7_7:
7015 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7016 ; GFX900-NEXT: ;;#ASMSTART
7017 ; GFX900-NEXT: ; def v[0:1]
7018 ; GFX900-NEXT: ;;#ASMEND
7019 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
7020 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7021 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
7022 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7023 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
7024 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
7025 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7026 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7028 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__u_7_7:
7030 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7031 ; GFX90A-NEXT: ;;#ASMSTART
7032 ; GFX90A-NEXT: ; def v[0:1]
7033 ; GFX90A-NEXT: ;;#ASMEND
7034 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
7035 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7036 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
7037 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7038 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
7039 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
7040 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7041 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7043 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_7_7:
7045 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7046 ; GFX940-NEXT: ;;#ASMSTART
7047 ; GFX940-NEXT: ; def v[0:1]
7048 ; GFX940-NEXT: ;;#ASMEND
7049 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
7050 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7051 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
7052 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7053 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
7054 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7055 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7056 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7057 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7058 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7059 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
7060 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7064 define void @v_shuffle_v3bf16_v4bf16__0_7_7(ptr addrspace(1) inreg %ptr) {
7065 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__0_7_7:
7067 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7068 ; GFX900-NEXT: ;;#ASMSTART
7069 ; GFX900-NEXT: ; def v[0:1]
7070 ; GFX900-NEXT: ;;#ASMEND
7071 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
7072 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
7073 ; GFX900-NEXT: ;;#ASMSTART
7074 ; GFX900-NEXT: ; def v[1:2]
7075 ; GFX900-NEXT: ;;#ASMEND
7076 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2
7077 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
7078 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
7079 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
7080 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7081 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7083 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__0_7_7:
7085 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7086 ; GFX90A-NEXT: ;;#ASMSTART
7087 ; GFX90A-NEXT: ; def v[0:1]
7088 ; GFX90A-NEXT: ;;#ASMEND
7089 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
7090 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7091 ; GFX90A-NEXT: ;;#ASMSTART
7092 ; GFX90A-NEXT: ; def v[2:3]
7093 ; GFX90A-NEXT: ;;#ASMEND
7094 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3
7095 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7096 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7097 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7098 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7099 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7101 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_7_7:
7103 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7104 ; GFX940-NEXT: ;;#ASMSTART
7105 ; GFX940-NEXT: ; def v[0:1]
7106 ; GFX940-NEXT: ;;#ASMEND
7107 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
7108 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7109 ; GFX940-NEXT: ;;#ASMSTART
7110 ; GFX940-NEXT: ; def v[2:3]
7111 ; GFX940-NEXT: ;;#ASMEND
7112 ; GFX940-NEXT: s_nop 0
7113 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3
7114 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7115 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7116 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7117 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7118 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7119 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7120 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7121 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 0, i32 7, i32 7>
7122 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7126 define void @v_shuffle_v3bf16_v4bf16__1_7_7(ptr addrspace(1) inreg %ptr) {
7127 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__1_7_7:
7129 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7130 ; GFX900-NEXT: ;;#ASMSTART
7131 ; GFX900-NEXT: ; def v[0:1]
7132 ; GFX900-NEXT: ;;#ASMEND
7133 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
7134 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
7135 ; GFX900-NEXT: ;;#ASMSTART
7136 ; GFX900-NEXT: ; def v[1:2]
7137 ; GFX900-NEXT: ;;#ASMEND
7138 ; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4
7139 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
7140 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
7141 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
7142 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7143 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7145 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__1_7_7:
7147 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7148 ; GFX90A-NEXT: ;;#ASMSTART
7149 ; GFX90A-NEXT: ; def v[0:1]
7150 ; GFX90A-NEXT: ;;#ASMEND
7151 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
7152 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7153 ; GFX90A-NEXT: ;;#ASMSTART
7154 ; GFX90A-NEXT: ; def v[2:3]
7155 ; GFX90A-NEXT: ;;#ASMEND
7156 ; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4
7157 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7158 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7159 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7160 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7161 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7163 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_7_7:
7165 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7166 ; GFX940-NEXT: ;;#ASMSTART
7167 ; GFX940-NEXT: ; def v[0:1]
7168 ; GFX940-NEXT: ;;#ASMEND
7169 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
7170 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7171 ; GFX940-NEXT: ;;#ASMSTART
7172 ; GFX940-NEXT: ; def v[2:3]
7173 ; GFX940-NEXT: ;;#ASMEND
7174 ; GFX940-NEXT: s_nop 0
7175 ; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2
7176 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7177 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7178 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7179 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7180 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7181 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7182 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7183 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 1, i32 7, i32 7>
7184 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7188 define void @v_shuffle_v3bf16_v4bf16__2_7_7(ptr addrspace(1) inreg %ptr) {
7189 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__2_7_7:
7191 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7192 ; GFX900-NEXT: ;;#ASMSTART
7193 ; GFX900-NEXT: ; def v[0:1]
7194 ; GFX900-NEXT: ;;#ASMEND
7195 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
7196 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
7197 ; GFX900-NEXT: ;;#ASMSTART
7198 ; GFX900-NEXT: ; def v[2:3]
7199 ; GFX900-NEXT: ;;#ASMEND
7200 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3
7201 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7202 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
7203 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
7204 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7205 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7207 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__2_7_7:
7209 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7210 ; GFX90A-NEXT: ;;#ASMSTART
7211 ; GFX90A-NEXT: ; def v[0:1]
7212 ; GFX90A-NEXT: ;;#ASMEND
7213 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
7214 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7215 ; GFX90A-NEXT: ;;#ASMSTART
7216 ; GFX90A-NEXT: ; def v[2:3]
7217 ; GFX90A-NEXT: ;;#ASMEND
7218 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3
7219 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7220 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7221 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7222 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7223 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7225 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_7_7:
7227 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7228 ; GFX940-NEXT: ;;#ASMSTART
7229 ; GFX940-NEXT: ; def v[0:1]
7230 ; GFX940-NEXT: ;;#ASMEND
7231 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
7232 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7233 ; GFX940-NEXT: ;;#ASMSTART
7234 ; GFX940-NEXT: ; def v[2:3]
7235 ; GFX940-NEXT: ;;#ASMEND
7236 ; GFX940-NEXT: s_nop 0
7237 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3
7238 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7239 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7240 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7241 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7242 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7243 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7244 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7245 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 2, i32 7, i32 7>
7246 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7250 define void @v_shuffle_v3bf16_v4bf16__3_7_7(ptr addrspace(1) inreg %ptr) {
7251 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__3_7_7:
7253 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7254 ; GFX900-NEXT: ;;#ASMSTART
7255 ; GFX900-NEXT: ; def v[0:1]
7256 ; GFX900-NEXT: ;;#ASMEND
7257 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
7258 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
7259 ; GFX900-NEXT: ;;#ASMSTART
7260 ; GFX900-NEXT: ; def v[2:3]
7261 ; GFX900-NEXT: ;;#ASMEND
7262 ; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4
7263 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7264 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
7265 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
7266 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7267 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7269 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__3_7_7:
7271 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7272 ; GFX90A-NEXT: ;;#ASMSTART
7273 ; GFX90A-NEXT: ; def v[0:1]
7274 ; GFX90A-NEXT: ;;#ASMEND
7275 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
7276 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7277 ; GFX90A-NEXT: ;;#ASMSTART
7278 ; GFX90A-NEXT: ; def v[2:3]
7279 ; GFX90A-NEXT: ;;#ASMEND
7280 ; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4
7281 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7282 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7283 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7284 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7285 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7287 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_7_7:
7289 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7290 ; GFX940-NEXT: ;;#ASMSTART
7291 ; GFX940-NEXT: ; def v[0:1]
7292 ; GFX940-NEXT: ;;#ASMEND
7293 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
7294 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7295 ; GFX940-NEXT: ;;#ASMSTART
7296 ; GFX940-NEXT: ; def v[2:3]
7297 ; GFX940-NEXT: ;;#ASMEND
7298 ; GFX940-NEXT: s_nop 0
7299 ; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2
7300 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7301 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7302 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7303 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7304 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7305 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7306 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7307 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 3, i32 7, i32 7>
7308 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7312 define void @v_shuffle_v3bf16_v4bf16__4_7_7(ptr addrspace(1) inreg %ptr) {
7313 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__4_7_7:
7315 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7316 ; GFX900-NEXT: ;;#ASMSTART
7317 ; GFX900-NEXT: ; def v[0:1]
7318 ; GFX900-NEXT: ;;#ASMEND
7319 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
7320 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7321 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
7322 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7323 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
7324 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
7325 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7326 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7328 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__4_7_7:
7330 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7331 ; GFX90A-NEXT: ;;#ASMSTART
7332 ; GFX90A-NEXT: ; def v[0:1]
7333 ; GFX90A-NEXT: ;;#ASMEND
7334 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
7335 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7336 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
7337 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7338 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
7339 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
7340 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7341 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7343 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_7_7:
7345 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7346 ; GFX940-NEXT: ;;#ASMSTART
7347 ; GFX940-NEXT: ; def v[0:1]
7348 ; GFX940-NEXT: ;;#ASMEND
7349 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
7350 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7351 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
7352 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7353 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
7354 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7355 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7356 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7357 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7358 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7359 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
7360 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7364 define void @v_shuffle_v3bf16_v4bf16__5_7_7(ptr addrspace(1) inreg %ptr) {
7365 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__5_7_7:
7367 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7368 ; GFX900-NEXT: ;;#ASMSTART
7369 ; GFX900-NEXT: ; def v[0:1]
7370 ; GFX900-NEXT: ;;#ASMEND
7371 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
7372 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7373 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
7374 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7375 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
7376 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
7377 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7378 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7380 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__5_7_7:
7382 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7383 ; GFX90A-NEXT: ;;#ASMSTART
7384 ; GFX90A-NEXT: ; def v[0:1]
7385 ; GFX90A-NEXT: ;;#ASMEND
7386 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
7387 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7388 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
7389 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7390 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
7391 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
7392 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7393 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7395 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_7_7:
7397 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7398 ; GFX940-NEXT: ;;#ASMSTART
7399 ; GFX940-NEXT: ; def v[0:1]
7400 ; GFX940-NEXT: ;;#ASMEND
7401 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
7402 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7403 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
7404 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7405 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
7406 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7407 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7408 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7409 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7410 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7411 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
7412 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7416 define void @v_shuffle_v3bf16_v4bf16__6_7_7(ptr addrspace(1) inreg %ptr) {
7417 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__6_7_7:
7419 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7420 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7421 ; GFX900-NEXT: ;;#ASMSTART
7422 ; GFX900-NEXT: ; def v[0:1]
7423 ; GFX900-NEXT: ;;#ASMEND
7424 ; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4
7425 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
7426 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7427 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7429 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__6_7_7:
7431 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7432 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7433 ; GFX90A-NEXT: ;;#ASMSTART
7434 ; GFX90A-NEXT: ; def v[0:1]
7435 ; GFX90A-NEXT: ;;#ASMEND
7436 ; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4
7437 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
7438 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7439 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7441 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_7_7:
7443 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7444 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7445 ; GFX940-NEXT: ;;#ASMSTART
7446 ; GFX940-NEXT: ; def v[0:1]
7447 ; GFX940-NEXT: ;;#ASMEND
7448 ; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
7449 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
7450 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7451 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7452 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7453 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7454 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
7455 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7459 define void @v_shuffle_v3bf16_v4bf16__7_u_7(ptr addrspace(1) inreg %ptr) {
7460 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_u_7:
7462 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7463 ; GFX900-NEXT: ;;#ASMSTART
7464 ; GFX900-NEXT: ; def v[0:1]
7465 ; GFX900-NEXT: ;;#ASMEND
7466 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7467 ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1
7468 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
7469 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
7470 ; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4
7471 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7472 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7474 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_u_7:
7476 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7477 ; GFX90A-NEXT: ;;#ASMSTART
7478 ; GFX90A-NEXT: ; def v[0:1]
7479 ; GFX90A-NEXT: ;;#ASMEND
7480 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7481 ; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1
7482 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
7483 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
7484 ; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4
7485 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7486 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7488 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_7:
7490 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7491 ; GFX940-NEXT: ;;#ASMSTART
7492 ; GFX940-NEXT: ; def v[0:1]
7493 ; GFX940-NEXT: ;;#ASMEND
7494 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7495 ; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1
7496 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
7497 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
7498 ; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
7499 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7500 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7501 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7502 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7503 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 7>
7504 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7508 define void @v_shuffle_v3bf16_v4bf16__7_0_7(ptr addrspace(1) inreg %ptr) {
7509 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_0_7:
7511 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7512 ; GFX900-NEXT: ;;#ASMSTART
7513 ; GFX900-NEXT: ; def v[0:1]
7514 ; GFX900-NEXT: ;;#ASMEND
7515 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
7516 ; GFX900-NEXT: ;;#ASMSTART
7517 ; GFX900-NEXT: ; def v[1:2]
7518 ; GFX900-NEXT: ;;#ASMEND
7519 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16
7520 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
7521 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
7522 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
7523 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7524 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7526 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_0_7:
7528 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7529 ; GFX90A-NEXT: ;;#ASMSTART
7530 ; GFX90A-NEXT: ; def v[0:1]
7531 ; GFX90A-NEXT: ;;#ASMEND
7532 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7533 ; GFX90A-NEXT: ;;#ASMSTART
7534 ; GFX90A-NEXT: ; def v[2:3]
7535 ; GFX90A-NEXT: ;;#ASMEND
7536 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
7537 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7538 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7539 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7540 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7541 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7543 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_7:
7545 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7546 ; GFX940-NEXT: ;;#ASMSTART
7547 ; GFX940-NEXT: ; def v[0:1]
7548 ; GFX940-NEXT: ;;#ASMEND
7549 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7550 ; GFX940-NEXT: ;;#ASMSTART
7551 ; GFX940-NEXT: ; def v[2:3]
7552 ; GFX940-NEXT: ;;#ASMEND
7553 ; GFX940-NEXT: s_nop 0
7554 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
7555 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7556 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7557 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7558 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7559 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7560 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7561 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7562 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 7>
7563 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7567 define void @v_shuffle_v3bf16_v4bf16__7_1_7(ptr addrspace(1) inreg %ptr) {
7568 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_1_7:
7570 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7571 ; GFX900-NEXT: ;;#ASMSTART
7572 ; GFX900-NEXT: ; def v[0:1]
7573 ; GFX900-NEXT: ;;#ASMEND
7574 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
7575 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
7576 ; GFX900-NEXT: ;;#ASMSTART
7577 ; GFX900-NEXT: ; def v[1:2]
7578 ; GFX900-NEXT: ;;#ASMEND
7579 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
7580 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2
7581 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
7582 ; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4
7583 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7584 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7586 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_1_7:
7588 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7589 ; GFX90A-NEXT: ;;#ASMSTART
7590 ; GFX90A-NEXT: ; def v[0:1]
7591 ; GFX90A-NEXT: ;;#ASMEND
7592 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
7593 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7594 ; GFX90A-NEXT: ;;#ASMSTART
7595 ; GFX90A-NEXT: ; def v[2:3]
7596 ; GFX90A-NEXT: ;;#ASMEND
7597 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
7598 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7599 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7600 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7601 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7602 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7604 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_7:
7606 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7607 ; GFX940-NEXT: ;;#ASMSTART
7608 ; GFX940-NEXT: ; def v[0:1]
7609 ; GFX940-NEXT: ;;#ASMEND
7610 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
7611 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7612 ; GFX940-NEXT: ;;#ASMSTART
7613 ; GFX940-NEXT: ; def v[2:3]
7614 ; GFX940-NEXT: ;;#ASMEND
7615 ; GFX940-NEXT: s_nop 0
7616 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
7617 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7618 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7619 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7620 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7621 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7622 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7623 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7624 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 7>
7625 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7629 define void @v_shuffle_v3bf16_v4bf16__7_2_7(ptr addrspace(1) inreg %ptr) {
7630 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_2_7:
7632 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7633 ; GFX900-NEXT: ;;#ASMSTART
7634 ; GFX900-NEXT: ; def v[0:1]
7635 ; GFX900-NEXT: ;;#ASMEND
7636 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
7637 ; GFX900-NEXT: ;;#ASMSTART
7638 ; GFX900-NEXT: ; def v[2:3]
7639 ; GFX900-NEXT: ;;#ASMEND
7640 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
7641 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7642 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
7643 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
7644 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7645 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7647 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_2_7:
7649 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7650 ; GFX90A-NEXT: ;;#ASMSTART
7651 ; GFX90A-NEXT: ; def v[0:1]
7652 ; GFX90A-NEXT: ;;#ASMEND
7653 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7654 ; GFX90A-NEXT: ;;#ASMSTART
7655 ; GFX90A-NEXT: ; def v[2:3]
7656 ; GFX90A-NEXT: ;;#ASMEND
7657 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
7658 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7659 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7660 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7661 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7662 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7664 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_7:
7666 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7667 ; GFX940-NEXT: ;;#ASMSTART
7668 ; GFX940-NEXT: ; def v[0:1]
7669 ; GFX940-NEXT: ;;#ASMEND
7670 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7671 ; GFX940-NEXT: ;;#ASMSTART
7672 ; GFX940-NEXT: ; def v[2:3]
7673 ; GFX940-NEXT: ;;#ASMEND
7674 ; GFX940-NEXT: s_nop 0
7675 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
7676 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7677 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7678 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7679 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7680 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7681 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7682 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7683 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 7>
7684 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7688 define void @v_shuffle_v3bf16_v4bf16__7_3_7(ptr addrspace(1) inreg %ptr) {
7689 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_3_7:
7691 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7692 ; GFX900-NEXT: ;;#ASMSTART
7693 ; GFX900-NEXT: ; def v[0:1]
7694 ; GFX900-NEXT: ;;#ASMEND
7695 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
7696 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
7697 ; GFX900-NEXT: ;;#ASMSTART
7698 ; GFX900-NEXT: ; def v[2:3]
7699 ; GFX900-NEXT: ;;#ASMEND
7700 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
7701 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7702 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
7703 ; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4
7704 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7705 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7707 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_3_7:
7709 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7710 ; GFX90A-NEXT: ;;#ASMSTART
7711 ; GFX90A-NEXT: ; def v[0:1]
7712 ; GFX90A-NEXT: ;;#ASMEND
7713 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
7714 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
7715 ; GFX90A-NEXT: ;;#ASMSTART
7716 ; GFX90A-NEXT: ; def v[2:3]
7717 ; GFX90A-NEXT: ;;#ASMEND
7718 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
7719 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7720 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
7721 ; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4
7722 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7723 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7725 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_7:
7727 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7728 ; GFX940-NEXT: ;;#ASMSTART
7729 ; GFX940-NEXT: ; def v[0:1]
7730 ; GFX940-NEXT: ;;#ASMEND
7731 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
7732 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
7733 ; GFX940-NEXT: ;;#ASMSTART
7734 ; GFX940-NEXT: ; def v[2:3]
7735 ; GFX940-NEXT: ;;#ASMEND
7736 ; GFX940-NEXT: s_nop 0
7737 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
7738 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3
7739 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
7740 ; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7741 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7742 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7743 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7744 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7745 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 7>
7746 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7750 define void @v_shuffle_v3bf16_v4bf16__7_4_7(ptr addrspace(1) inreg %ptr) {
7751 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_4_7:
7753 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7754 ; GFX900-NEXT: ;;#ASMSTART
7755 ; GFX900-NEXT: ; def v[0:1]
7756 ; GFX900-NEXT: ;;#ASMEND
7757 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7758 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16
7759 ; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v1
7760 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
7761 ; GFX900-NEXT: global_store_short v2, v3, s[16:17] offset:4
7762 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7763 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7765 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_4_7:
7767 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7768 ; GFX90A-NEXT: ;;#ASMSTART
7769 ; GFX90A-NEXT: ; def v[0:1]
7770 ; GFX90A-NEXT: ;;#ASMEND
7771 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7772 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16
7773 ; GFX90A-NEXT: v_lshrrev_b32_e32 v3, 16, v1
7774 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
7775 ; GFX90A-NEXT: global_store_short v2, v3, s[16:17] offset:4
7776 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7777 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7779 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_7:
7781 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7782 ; GFX940-NEXT: ;;#ASMSTART
7783 ; GFX940-NEXT: ; def v[0:1]
7784 ; GFX940-NEXT: ;;#ASMEND
7785 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7786 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16
7787 ; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1
7788 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
7789 ; GFX940-NEXT: global_store_short v2, v3, s[0:1] offset:4 sc0 sc1
7790 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7791 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7792 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7793 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7794 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 7>
7795 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7799 define void @v_shuffle_v3bf16_v4bf16__7_5_7(ptr addrspace(1) inreg %ptr) {
7800 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_5_7:
7802 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7803 ; GFX900-NEXT: ;;#ASMSTART
7804 ; GFX900-NEXT: ; def v[0:1]
7805 ; GFX900-NEXT: ;;#ASMEND
7806 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
7807 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7808 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
7809 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7810 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
7811 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
7812 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7813 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7815 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_5_7:
7817 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7818 ; GFX90A-NEXT: ;;#ASMSTART
7819 ; GFX90A-NEXT: ; def v[0:1]
7820 ; GFX90A-NEXT: ;;#ASMEND
7821 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
7822 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7823 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
7824 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7825 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
7826 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
7827 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7828 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7830 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_7:
7832 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7833 ; GFX940-NEXT: ;;#ASMSTART
7834 ; GFX940-NEXT: ; def v[0:1]
7835 ; GFX940-NEXT: ;;#ASMEND
7836 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
7837 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7838 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
7839 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7840 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
7841 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7842 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7843 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7844 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7845 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7846 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
7847 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7851 define void @v_shuffle_v3bf16_v4bf16__7_6_7(ptr addrspace(1) inreg %ptr) {
7852 ; GFX900-LABEL: v_shuffle_v3bf16_v4bf16__7_6_7:
7854 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7855 ; GFX900-NEXT: ;;#ASMSTART
7856 ; GFX900-NEXT: ; def v[0:1]
7857 ; GFX900-NEXT: ;;#ASMEND
7858 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
7859 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16
7860 ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7861 ; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4
7862 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
7863 ; GFX900-NEXT: s_waitcnt vmcnt(0)
7864 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7866 ; GFX90A-LABEL: v_shuffle_v3bf16_v4bf16__7_6_7:
7868 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7869 ; GFX90A-NEXT: ;;#ASMSTART
7870 ; GFX90A-NEXT: ; def v[0:1]
7871 ; GFX90A-NEXT: ;;#ASMEND
7872 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
7873 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16
7874 ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7875 ; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4
7876 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
7877 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
7878 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7880 ; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_7:
7882 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7883 ; GFX940-NEXT: ;;#ASMSTART
7884 ; GFX940-NEXT: ; def v[0:1]
7885 ; GFX940-NEXT: ;;#ASMEND
7886 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
7887 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16
7888 ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
7889 ; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7890 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
7891 ; GFX940-NEXT: s_waitcnt vmcnt(0)
7892 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7893 %vec0 = call <4 x bfloat> asm "; def $0", "=v"()
7894 %vec1 = call <4 x bfloat> asm "; def $0", "=v"()
7895 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 7>
7896 store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8
7900 define void @s_shuffle_v3bf16_v4bf16__u_u_u() {
7901 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__u_u_u:
7903 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7904 ; GFX9-NEXT: ;;#ASMSTART
7905 ; GFX9-NEXT: ; use s[8:9]
7906 ; GFX9-NEXT: ;;#ASMEND
7907 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7908 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7909 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> poison
7910 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7911 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7915 define void @s_shuffle_v3bf16_v4bf16__0_u_u() {
7916 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__0_u_u:
7918 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7919 ; GFX900-NEXT: ;;#ASMSTART
7920 ; GFX900-NEXT: ; def s[8:9]
7921 ; GFX900-NEXT: ;;#ASMEND
7922 ; GFX900-NEXT: ;;#ASMSTART
7923 ; GFX900-NEXT: ; use s[8:9]
7924 ; GFX900-NEXT: ;;#ASMEND
7925 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7927 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__0_u_u:
7929 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7930 ; GFX90A-NEXT: ;;#ASMSTART
7931 ; GFX90A-NEXT: ; def s[8:9]
7932 ; GFX90A-NEXT: ;;#ASMEND
7933 ; GFX90A-NEXT: ;;#ASMSTART
7934 ; GFX90A-NEXT: ; use s[8:9]
7935 ; GFX90A-NEXT: ;;#ASMEND
7936 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7938 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_u_u:
7940 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7941 ; GFX940-NEXT: ;;#ASMSTART
7942 ; GFX940-NEXT: ; def s[8:9]
7943 ; GFX940-NEXT: ;;#ASMEND
7944 ; GFX940-NEXT: s_nop 0
7945 ; GFX940-NEXT: ;;#ASMSTART
7946 ; GFX940-NEXT: ; use s[8:9]
7947 ; GFX940-NEXT: ;;#ASMEND
7948 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7949 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7950 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
7951 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7952 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7956 define void @s_shuffle_v3bf16_v4bf16__1_u_u() {
7957 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_u_u:
7959 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7960 ; GFX900-NEXT: ;;#ASMSTART
7961 ; GFX900-NEXT: ; def s[4:5]
7962 ; GFX900-NEXT: ;;#ASMEND
7963 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
7964 ; GFX900-NEXT: ;;#ASMSTART
7965 ; GFX900-NEXT: ; use s[8:9]
7966 ; GFX900-NEXT: ;;#ASMEND
7967 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7969 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_u_u:
7971 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7972 ; GFX90A-NEXT: ;;#ASMSTART
7973 ; GFX90A-NEXT: ; def s[4:5]
7974 ; GFX90A-NEXT: ;;#ASMEND
7975 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
7976 ; GFX90A-NEXT: ;;#ASMSTART
7977 ; GFX90A-NEXT: ; use s[8:9]
7978 ; GFX90A-NEXT: ;;#ASMEND
7979 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7981 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_u_u:
7983 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7984 ; GFX940-NEXT: ;;#ASMSTART
7985 ; GFX940-NEXT: ; def s[0:1]
7986 ; GFX940-NEXT: ;;#ASMEND
7987 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
7988 ; GFX940-NEXT: ;;#ASMSTART
7989 ; GFX940-NEXT: ; use s[8:9]
7990 ; GFX940-NEXT: ;;#ASMEND
7991 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7992 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
7993 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
7994 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7995 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
7999 define void @s_shuffle_v3bf16_v4bf16__2_u_u() {
8000 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u:
8002 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8003 ; GFX900-NEXT: ;;#ASMSTART
8004 ; GFX900-NEXT: ; def s[4:5]
8005 ; GFX900-NEXT: ;;#ASMEND
8006 ; GFX900-NEXT: s_mov_b32 s8, s5
8007 ; GFX900-NEXT: ;;#ASMSTART
8008 ; GFX900-NEXT: ; use s[8:9]
8009 ; GFX900-NEXT: ;;#ASMEND
8010 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8012 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u:
8014 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8015 ; GFX90A-NEXT: ;;#ASMSTART
8016 ; GFX90A-NEXT: ; def s[4:5]
8017 ; GFX90A-NEXT: ;;#ASMEND
8018 ; GFX90A-NEXT: s_mov_b32 s8, s5
8019 ; GFX90A-NEXT: ;;#ASMSTART
8020 ; GFX90A-NEXT: ; use s[8:9]
8021 ; GFX90A-NEXT: ;;#ASMEND
8022 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8024 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u:
8026 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8027 ; GFX940-NEXT: ;;#ASMSTART
8028 ; GFX940-NEXT: ; def s[0:1]
8029 ; GFX940-NEXT: ;;#ASMEND
8030 ; GFX940-NEXT: s_mov_b32 s8, s1
8031 ; GFX940-NEXT: ;;#ASMSTART
8032 ; GFX940-NEXT: ; use s[8:9]
8033 ; GFX940-NEXT: ;;#ASMEND
8034 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8035 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8036 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
8037 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8038 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8042 define void @s_shuffle_v3bf16_v4bf16__3_u_u() {
8043 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_u_u:
8045 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8046 ; GFX900-NEXT: ;;#ASMSTART
8047 ; GFX900-NEXT: ; def s[4:5]
8048 ; GFX900-NEXT: ;;#ASMEND
8049 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
8050 ; GFX900-NEXT: ;;#ASMSTART
8051 ; GFX900-NEXT: ; use s[8:9]
8052 ; GFX900-NEXT: ;;#ASMEND
8053 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8055 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_u_u:
8057 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8058 ; GFX90A-NEXT: ;;#ASMSTART
8059 ; GFX90A-NEXT: ; def s[4:5]
8060 ; GFX90A-NEXT: ;;#ASMEND
8061 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
8062 ; GFX90A-NEXT: ;;#ASMSTART
8063 ; GFX90A-NEXT: ; use s[8:9]
8064 ; GFX90A-NEXT: ;;#ASMEND
8065 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8067 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_u_u:
8069 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8070 ; GFX940-NEXT: ;;#ASMSTART
8071 ; GFX940-NEXT: ; def s[0:1]
8072 ; GFX940-NEXT: ;;#ASMEND
8073 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
8074 ; GFX940-NEXT: ;;#ASMSTART
8075 ; GFX940-NEXT: ; use s[8:9]
8076 ; GFX940-NEXT: ;;#ASMEND
8077 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8078 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8079 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
8080 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8081 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8085 define void @s_shuffle_v3bf16_v4bf16__4_u_u() {
8086 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__4_u_u:
8088 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8089 ; GFX9-NEXT: ;;#ASMSTART
8090 ; GFX9-NEXT: ; use s[8:9]
8091 ; GFX9-NEXT: ;;#ASMEND
8092 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8093 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8094 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 poison, i32 poison>
8095 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8096 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8100 define void @s_shuffle_v3bf16_v4bf16__5_u_u() {
8101 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_u_u:
8103 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8104 ; GFX900-NEXT: ;;#ASMSTART
8105 ; GFX900-NEXT: ; def s[4:5]
8106 ; GFX900-NEXT: ;;#ASMEND
8107 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
8108 ; GFX900-NEXT: ;;#ASMSTART
8109 ; GFX900-NEXT: ; use s[8:9]
8110 ; GFX900-NEXT: ;;#ASMEND
8111 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8113 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_u_u:
8115 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8116 ; GFX90A-NEXT: ;;#ASMSTART
8117 ; GFX90A-NEXT: ; def s[4:5]
8118 ; GFX90A-NEXT: ;;#ASMEND
8119 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
8120 ; GFX90A-NEXT: ;;#ASMSTART
8121 ; GFX90A-NEXT: ; use s[8:9]
8122 ; GFX90A-NEXT: ;;#ASMEND
8123 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8125 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_u_u:
8127 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8128 ; GFX940-NEXT: ;;#ASMSTART
8129 ; GFX940-NEXT: ; def s[0:1]
8130 ; GFX940-NEXT: ;;#ASMEND
8131 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
8132 ; GFX940-NEXT: ;;#ASMSTART
8133 ; GFX940-NEXT: ; use s[8:9]
8134 ; GFX940-NEXT: ;;#ASMEND
8135 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8136 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8137 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8138 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison>
8139 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8140 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8144 define void @s_shuffle_v3bf16_v4bf16__6_u_u() {
8145 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u:
8147 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8148 ; GFX900-NEXT: ;;#ASMSTART
8149 ; GFX900-NEXT: ; def s[4:5]
8150 ; GFX900-NEXT: ;;#ASMEND
8151 ; GFX900-NEXT: s_mov_b32 s8, s5
8152 ; GFX900-NEXT: ;;#ASMSTART
8153 ; GFX900-NEXT: ; use s[8:9]
8154 ; GFX900-NEXT: ;;#ASMEND
8155 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8157 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u:
8159 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8160 ; GFX90A-NEXT: ;;#ASMSTART
8161 ; GFX90A-NEXT: ; def s[4:5]
8162 ; GFX90A-NEXT: ;;#ASMEND
8163 ; GFX90A-NEXT: s_mov_b32 s8, s5
8164 ; GFX90A-NEXT: ;;#ASMSTART
8165 ; GFX90A-NEXT: ; use s[8:9]
8166 ; GFX90A-NEXT: ;;#ASMEND
8167 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8169 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u:
8171 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8172 ; GFX940-NEXT: ;;#ASMSTART
8173 ; GFX940-NEXT: ; def s[0:1]
8174 ; GFX940-NEXT: ;;#ASMEND
8175 ; GFX940-NEXT: s_mov_b32 s8, s1
8176 ; GFX940-NEXT: ;;#ASMSTART
8177 ; GFX940-NEXT: ; use s[8:9]
8178 ; GFX940-NEXT: ;;#ASMEND
8179 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8180 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8181 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8182 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison>
8183 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8184 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8188 define void @s_shuffle_v3bf16_v4bf16__7_u_u() {
8189 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_u:
8191 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8192 ; GFX900-NEXT: ;;#ASMSTART
8193 ; GFX900-NEXT: ; def s[4:5]
8194 ; GFX900-NEXT: ;;#ASMEND
8195 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
8196 ; GFX900-NEXT: ;;#ASMSTART
8197 ; GFX900-NEXT: ; use s[8:9]
8198 ; GFX900-NEXT: ;;#ASMEND
8199 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8201 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_u:
8203 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8204 ; GFX90A-NEXT: ;;#ASMSTART
8205 ; GFX90A-NEXT: ; def s[4:5]
8206 ; GFX90A-NEXT: ;;#ASMEND
8207 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
8208 ; GFX90A-NEXT: ;;#ASMSTART
8209 ; GFX90A-NEXT: ; use s[8:9]
8210 ; GFX90A-NEXT: ;;#ASMEND
8211 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8213 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_u:
8215 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8216 ; GFX940-NEXT: ;;#ASMSTART
8217 ; GFX940-NEXT: ; def s[0:1]
8218 ; GFX940-NEXT: ;;#ASMEND
8219 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
8220 ; GFX940-NEXT: ;;#ASMSTART
8221 ; GFX940-NEXT: ; use s[8:9]
8222 ; GFX940-NEXT: ;;#ASMEND
8223 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8224 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8225 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8226 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison>
8227 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8228 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8232 define void @s_shuffle_v3bf16_v4bf16__7_0_u() {
8233 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_u:
8235 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8236 ; GFX900-NEXT: ;;#ASMSTART
8237 ; GFX900-NEXT: ; def s[4:5]
8238 ; GFX900-NEXT: ;;#ASMEND
8239 ; GFX900-NEXT: ;;#ASMSTART
8240 ; GFX900-NEXT: ; def s[6:7]
8241 ; GFX900-NEXT: ;;#ASMEND
8242 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
8243 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8244 ; GFX900-NEXT: ;;#ASMSTART
8245 ; GFX900-NEXT: ; use s[8:9]
8246 ; GFX900-NEXT: ;;#ASMEND
8247 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8249 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_u:
8251 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8252 ; GFX90A-NEXT: ;;#ASMSTART
8253 ; GFX90A-NEXT: ; def s[4:5]
8254 ; GFX90A-NEXT: ;;#ASMEND
8255 ; GFX90A-NEXT: ;;#ASMSTART
8256 ; GFX90A-NEXT: ; def s[6:7]
8257 ; GFX90A-NEXT: ;;#ASMEND
8258 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
8259 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8260 ; GFX90A-NEXT: ;;#ASMSTART
8261 ; GFX90A-NEXT: ; use s[8:9]
8262 ; GFX90A-NEXT: ;;#ASMEND
8263 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8265 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_u:
8267 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8268 ; GFX940-NEXT: ;;#ASMSTART
8269 ; GFX940-NEXT: ; def s[0:1]
8270 ; GFX940-NEXT: ;;#ASMEND
8271 ; GFX940-NEXT: ;;#ASMSTART
8272 ; GFX940-NEXT: ; def s[2:3]
8273 ; GFX940-NEXT: ;;#ASMEND
8274 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
8275 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
8276 ; GFX940-NEXT: ;;#ASMSTART
8277 ; GFX940-NEXT: ; use s[8:9]
8278 ; GFX940-NEXT: ;;#ASMEND
8279 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8280 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8281 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8282 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 poison>
8283 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8284 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8288 define void @s_shuffle_v3bf16_v4bf16__7_1_u() {
8289 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_u:
8291 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8292 ; GFX900-NEXT: ;;#ASMSTART
8293 ; GFX900-NEXT: ; def s[4:5]
8294 ; GFX900-NEXT: ;;#ASMEND
8295 ; GFX900-NEXT: ;;#ASMSTART
8296 ; GFX900-NEXT: ; def s[6:7]
8297 ; GFX900-NEXT: ;;#ASMEND
8298 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
8299 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
8300 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8301 ; GFX900-NEXT: ;;#ASMSTART
8302 ; GFX900-NEXT: ; use s[8:9]
8303 ; GFX900-NEXT: ;;#ASMEND
8304 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8306 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_u:
8308 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8309 ; GFX90A-NEXT: ;;#ASMSTART
8310 ; GFX90A-NEXT: ; def s[4:5]
8311 ; GFX90A-NEXT: ;;#ASMEND
8312 ; GFX90A-NEXT: ;;#ASMSTART
8313 ; GFX90A-NEXT: ; def s[6:7]
8314 ; GFX90A-NEXT: ;;#ASMEND
8315 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
8316 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
8317 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8318 ; GFX90A-NEXT: ;;#ASMSTART
8319 ; GFX90A-NEXT: ; use s[8:9]
8320 ; GFX90A-NEXT: ;;#ASMEND
8321 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8323 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_u:
8325 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8326 ; GFX940-NEXT: ;;#ASMSTART
8327 ; GFX940-NEXT: ; def s[0:1]
8328 ; GFX940-NEXT: ;;#ASMEND
8329 ; GFX940-NEXT: ;;#ASMSTART
8330 ; GFX940-NEXT: ; def s[2:3]
8331 ; GFX940-NEXT: ;;#ASMEND
8332 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
8333 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
8334 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
8335 ; GFX940-NEXT: ;;#ASMSTART
8336 ; GFX940-NEXT: ; use s[8:9]
8337 ; GFX940-NEXT: ;;#ASMEND
8338 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8339 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8340 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8341 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 poison>
8342 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8343 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8347 define void @s_shuffle_v3bf16_v4bf16__7_2_u() {
8348 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_u:
8350 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8351 ; GFX900-NEXT: ;;#ASMSTART
8352 ; GFX900-NEXT: ; def s[4:5]
8353 ; GFX900-NEXT: ;;#ASMEND
8354 ; GFX900-NEXT: ;;#ASMSTART
8355 ; GFX900-NEXT: ; def s[6:7]
8356 ; GFX900-NEXT: ;;#ASMEND
8357 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
8358 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
8359 ; GFX900-NEXT: ;;#ASMSTART
8360 ; GFX900-NEXT: ; use s[8:9]
8361 ; GFX900-NEXT: ;;#ASMEND
8362 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8364 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_u:
8366 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8367 ; GFX90A-NEXT: ;;#ASMSTART
8368 ; GFX90A-NEXT: ; def s[4:5]
8369 ; GFX90A-NEXT: ;;#ASMEND
8370 ; GFX90A-NEXT: ;;#ASMSTART
8371 ; GFX90A-NEXT: ; def s[6:7]
8372 ; GFX90A-NEXT: ;;#ASMEND
8373 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
8374 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
8375 ; GFX90A-NEXT: ;;#ASMSTART
8376 ; GFX90A-NEXT: ; use s[8:9]
8377 ; GFX90A-NEXT: ;;#ASMEND
8378 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8380 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_u:
8382 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8383 ; GFX940-NEXT: ;;#ASMSTART
8384 ; GFX940-NEXT: ; def s[0:1]
8385 ; GFX940-NEXT: ;;#ASMEND
8386 ; GFX940-NEXT: ;;#ASMSTART
8387 ; GFX940-NEXT: ; def s[2:3]
8388 ; GFX940-NEXT: ;;#ASMEND
8389 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
8390 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
8391 ; GFX940-NEXT: ;;#ASMSTART
8392 ; GFX940-NEXT: ; use s[8:9]
8393 ; GFX940-NEXT: ;;#ASMEND
8394 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8395 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8396 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8397 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 poison>
8398 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8399 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8403 define void @s_shuffle_v3bf16_v4bf16__7_3_u() {
8404 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_u:
8406 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8407 ; GFX900-NEXT: ;;#ASMSTART
8408 ; GFX900-NEXT: ; def s[4:5]
8409 ; GFX900-NEXT: ;;#ASMEND
8410 ; GFX900-NEXT: ;;#ASMSTART
8411 ; GFX900-NEXT: ; def s[6:7]
8412 ; GFX900-NEXT: ;;#ASMEND
8413 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
8414 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
8415 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8416 ; GFX900-NEXT: ;;#ASMSTART
8417 ; GFX900-NEXT: ; use s[8:9]
8418 ; GFX900-NEXT: ;;#ASMEND
8419 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8421 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_u:
8423 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8424 ; GFX90A-NEXT: ;;#ASMSTART
8425 ; GFX90A-NEXT: ; def s[4:5]
8426 ; GFX90A-NEXT: ;;#ASMEND
8427 ; GFX90A-NEXT: ;;#ASMSTART
8428 ; GFX90A-NEXT: ; def s[6:7]
8429 ; GFX90A-NEXT: ;;#ASMEND
8430 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
8431 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
8432 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8433 ; GFX90A-NEXT: ;;#ASMSTART
8434 ; GFX90A-NEXT: ; use s[8:9]
8435 ; GFX90A-NEXT: ;;#ASMEND
8436 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8438 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_u:
8440 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8441 ; GFX940-NEXT: ;;#ASMSTART
8442 ; GFX940-NEXT: ; def s[0:1]
8443 ; GFX940-NEXT: ;;#ASMEND
8444 ; GFX940-NEXT: ;;#ASMSTART
8445 ; GFX940-NEXT: ; def s[2:3]
8446 ; GFX940-NEXT: ;;#ASMEND
8447 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
8448 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
8449 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
8450 ; GFX940-NEXT: ;;#ASMSTART
8451 ; GFX940-NEXT: ; use s[8:9]
8452 ; GFX940-NEXT: ;;#ASMEND
8453 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8454 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8455 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8456 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 poison>
8457 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8458 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8462 define void @s_shuffle_v3bf16_v4bf16__7_4_u() {
8463 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_u:
8465 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8466 ; GFX900-NEXT: ;;#ASMSTART
8467 ; GFX900-NEXT: ; def s[4:5]
8468 ; GFX900-NEXT: ;;#ASMEND
8469 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
8470 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8471 ; GFX900-NEXT: ;;#ASMSTART
8472 ; GFX900-NEXT: ; use s[8:9]
8473 ; GFX900-NEXT: ;;#ASMEND
8474 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8476 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_u:
8478 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8479 ; GFX90A-NEXT: ;;#ASMSTART
8480 ; GFX90A-NEXT: ; def s[4:5]
8481 ; GFX90A-NEXT: ;;#ASMEND
8482 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
8483 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8484 ; GFX90A-NEXT: ;;#ASMSTART
8485 ; GFX90A-NEXT: ; use s[8:9]
8486 ; GFX90A-NEXT: ;;#ASMEND
8487 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8489 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_u:
8491 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8492 ; GFX940-NEXT: ;;#ASMSTART
8493 ; GFX940-NEXT: ; def s[0:1]
8494 ; GFX940-NEXT: ;;#ASMEND
8495 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
8496 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
8497 ; GFX940-NEXT: ;;#ASMSTART
8498 ; GFX940-NEXT: ; use s[8:9]
8499 ; GFX940-NEXT: ;;#ASMEND
8500 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8501 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8502 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8503 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 poison>
8504 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8505 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8509 define void @s_shuffle_v3bf16_v4bf16__7_5_u() {
8510 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_u:
8512 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8513 ; GFX900-NEXT: ;;#ASMSTART
8514 ; GFX900-NEXT: ; def s[4:5]
8515 ; GFX900-NEXT: ;;#ASMEND
8516 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
8517 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
8518 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8519 ; GFX900-NEXT: ;;#ASMSTART
8520 ; GFX900-NEXT: ; use s[8:9]
8521 ; GFX900-NEXT: ;;#ASMEND
8522 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8524 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_u:
8526 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8527 ; GFX90A-NEXT: ;;#ASMSTART
8528 ; GFX90A-NEXT: ; def s[4:5]
8529 ; GFX90A-NEXT: ;;#ASMEND
8530 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
8531 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
8532 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
8533 ; GFX90A-NEXT: ;;#ASMSTART
8534 ; GFX90A-NEXT: ; use s[8:9]
8535 ; GFX90A-NEXT: ;;#ASMEND
8536 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8538 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_u:
8540 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8541 ; GFX940-NEXT: ;;#ASMSTART
8542 ; GFX940-NEXT: ; def s[0:1]
8543 ; GFX940-NEXT: ;;#ASMEND
8544 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
8545 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
8546 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
8547 ; GFX940-NEXT: ;;#ASMSTART
8548 ; GFX940-NEXT: ; use s[8:9]
8549 ; GFX940-NEXT: ;;#ASMEND
8550 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8551 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8552 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8553 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
8554 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8555 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8559 define void @s_shuffle_v3bf16_v4bf16__7_6_u() {
8560 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_u:
8562 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8563 ; GFX900-NEXT: ;;#ASMSTART
8564 ; GFX900-NEXT: ; def s[4:5]
8565 ; GFX900-NEXT: ;;#ASMEND
8566 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
8567 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
8568 ; GFX900-NEXT: ;;#ASMSTART
8569 ; GFX900-NEXT: ; use s[8:9]
8570 ; GFX900-NEXT: ;;#ASMEND
8571 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8573 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_u:
8575 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8576 ; GFX90A-NEXT: ;;#ASMSTART
8577 ; GFX90A-NEXT: ; def s[4:5]
8578 ; GFX90A-NEXT: ;;#ASMEND
8579 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
8580 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
8581 ; GFX90A-NEXT: ;;#ASMSTART
8582 ; GFX90A-NEXT: ; use s[8:9]
8583 ; GFX90A-NEXT: ;;#ASMEND
8584 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8586 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_u:
8588 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8589 ; GFX940-NEXT: ;;#ASMSTART
8590 ; GFX940-NEXT: ; def s[0:1]
8591 ; GFX940-NEXT: ;;#ASMEND
8592 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
8593 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
8594 ; GFX940-NEXT: ;;#ASMSTART
8595 ; GFX940-NEXT: ; use s[8:9]
8596 ; GFX940-NEXT: ;;#ASMEND
8597 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8598 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8599 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8600 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 poison>
8601 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8602 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8606 define void @s_shuffle_v3bf16_v4bf16__7_7_u() {
8607 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_u:
8609 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8610 ; GFX900-NEXT: ;;#ASMSTART
8611 ; GFX900-NEXT: ; def s[4:5]
8612 ; GFX900-NEXT: ;;#ASMEND
8613 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
8614 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8615 ; GFX900-NEXT: ;;#ASMSTART
8616 ; GFX900-NEXT: ; use s[8:9]
8617 ; GFX900-NEXT: ;;#ASMEND
8618 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8620 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_u:
8622 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8623 ; GFX90A-NEXT: ;;#ASMSTART
8624 ; GFX90A-NEXT: ; def s[4:5]
8625 ; GFX90A-NEXT: ;;#ASMEND
8626 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
8627 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8628 ; GFX90A-NEXT: ;;#ASMSTART
8629 ; GFX90A-NEXT: ; use s[8:9]
8630 ; GFX90A-NEXT: ;;#ASMEND
8631 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8633 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_u:
8635 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8636 ; GFX940-NEXT: ;;#ASMSTART
8637 ; GFX940-NEXT: ; def s[0:1]
8638 ; GFX940-NEXT: ;;#ASMEND
8639 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
8640 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
8641 ; GFX940-NEXT: ;;#ASMSTART
8642 ; GFX940-NEXT: ; use s[8:9]
8643 ; GFX940-NEXT: ;;#ASMEND
8644 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8645 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8646 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8647 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
8648 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8649 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8653 define void @s_shuffle_v3bf16_v4bf16__7_7_0() {
8654 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_0:
8656 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8657 ; GFX900-NEXT: ;;#ASMSTART
8658 ; GFX900-NEXT: ; def s[4:5]
8659 ; GFX900-NEXT: ;;#ASMEND
8660 ; GFX900-NEXT: ;;#ASMSTART
8661 ; GFX900-NEXT: ; def s[6:7]
8662 ; GFX900-NEXT: ;;#ASMEND
8663 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
8664 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
8665 ; GFX900-NEXT: s_mov_b32 s9, s4
8666 ; GFX900-NEXT: ;;#ASMSTART
8667 ; GFX900-NEXT: ; use s[8:9]
8668 ; GFX900-NEXT: ;;#ASMEND
8669 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8671 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_0:
8673 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8674 ; GFX90A-NEXT: ;;#ASMSTART
8675 ; GFX90A-NEXT: ; def s[4:5]
8676 ; GFX90A-NEXT: ;;#ASMEND
8677 ; GFX90A-NEXT: ;;#ASMSTART
8678 ; GFX90A-NEXT: ; def s[6:7]
8679 ; GFX90A-NEXT: ;;#ASMEND
8680 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
8681 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
8682 ; GFX90A-NEXT: s_mov_b32 s9, s4
8683 ; GFX90A-NEXT: ;;#ASMSTART
8684 ; GFX90A-NEXT: ; use s[8:9]
8685 ; GFX90A-NEXT: ;;#ASMEND
8686 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8688 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_0:
8690 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8691 ; GFX940-NEXT: ;;#ASMSTART
8692 ; GFX940-NEXT: ; def s[0:1]
8693 ; GFX940-NEXT: ;;#ASMEND
8694 ; GFX940-NEXT: ;;#ASMSTART
8695 ; GFX940-NEXT: ; def s[2:3]
8696 ; GFX940-NEXT: ;;#ASMEND
8697 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
8698 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
8699 ; GFX940-NEXT: s_mov_b32 s9, s0
8700 ; GFX940-NEXT: ;;#ASMSTART
8701 ; GFX940-NEXT: ; use s[8:9]
8702 ; GFX940-NEXT: ;;#ASMEND
8703 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8704 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8705 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8706 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
8707 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8708 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8712 define void @s_shuffle_v3bf16_v4bf16__7_7_1() {
8713 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_1:
8715 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8716 ; GFX900-NEXT: ;;#ASMSTART
8717 ; GFX900-NEXT: ; def s[4:5]
8718 ; GFX900-NEXT: ;;#ASMEND
8719 ; GFX900-NEXT: ;;#ASMSTART
8720 ; GFX900-NEXT: ; def s[6:7]
8721 ; GFX900-NEXT: ;;#ASMEND
8722 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
8723 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
8724 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8725 ; GFX900-NEXT: ;;#ASMSTART
8726 ; GFX900-NEXT: ; use s[8:9]
8727 ; GFX900-NEXT: ;;#ASMEND
8728 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8730 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_1:
8732 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8733 ; GFX90A-NEXT: ;;#ASMSTART
8734 ; GFX90A-NEXT: ; def s[4:5]
8735 ; GFX90A-NEXT: ;;#ASMEND
8736 ; GFX90A-NEXT: ;;#ASMSTART
8737 ; GFX90A-NEXT: ; def s[6:7]
8738 ; GFX90A-NEXT: ;;#ASMEND
8739 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
8740 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
8741 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8742 ; GFX90A-NEXT: ;;#ASMSTART
8743 ; GFX90A-NEXT: ; use s[8:9]
8744 ; GFX90A-NEXT: ;;#ASMEND
8745 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8747 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_1:
8749 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8750 ; GFX940-NEXT: ;;#ASMSTART
8751 ; GFX940-NEXT: ; def s[0:1]
8752 ; GFX940-NEXT: ;;#ASMEND
8753 ; GFX940-NEXT: ;;#ASMSTART
8754 ; GFX940-NEXT: ; def s[2:3]
8755 ; GFX940-NEXT: ;;#ASMEND
8756 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
8757 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
8758 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
8759 ; GFX940-NEXT: ;;#ASMSTART
8760 ; GFX940-NEXT: ; use s[8:9]
8761 ; GFX940-NEXT: ;;#ASMEND
8762 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8763 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8764 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8765 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 1>
8766 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8767 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8771 define void @s_shuffle_v3bf16_v4bf16__7_7_2() {
8772 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_2:
8774 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8775 ; GFX900-NEXT: ;;#ASMSTART
8776 ; GFX900-NEXT: ; def s[4:5]
8777 ; GFX900-NEXT: ;;#ASMEND
8778 ; GFX900-NEXT: ;;#ASMSTART
8779 ; GFX900-NEXT: ; def s[8:9]
8780 ; GFX900-NEXT: ;;#ASMEND
8781 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
8782 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8783 ; GFX900-NEXT: ;;#ASMSTART
8784 ; GFX900-NEXT: ; use s[8:9]
8785 ; GFX900-NEXT: ;;#ASMEND
8786 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8788 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_2:
8790 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8791 ; GFX90A-NEXT: ;;#ASMSTART
8792 ; GFX90A-NEXT: ; def s[4:5]
8793 ; GFX90A-NEXT: ;;#ASMEND
8794 ; GFX90A-NEXT: ;;#ASMSTART
8795 ; GFX90A-NEXT: ; def s[8:9]
8796 ; GFX90A-NEXT: ;;#ASMEND
8797 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
8798 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8799 ; GFX90A-NEXT: ;;#ASMSTART
8800 ; GFX90A-NEXT: ; use s[8:9]
8801 ; GFX90A-NEXT: ;;#ASMEND
8802 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8804 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_2:
8806 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8807 ; GFX940-NEXT: ;;#ASMSTART
8808 ; GFX940-NEXT: ; def s[0:1]
8809 ; GFX940-NEXT: ;;#ASMEND
8810 ; GFX940-NEXT: ;;#ASMSTART
8811 ; GFX940-NEXT: ; def s[8:9]
8812 ; GFX940-NEXT: ;;#ASMEND
8813 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
8814 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
8815 ; GFX940-NEXT: ;;#ASMSTART
8816 ; GFX940-NEXT: ; use s[8:9]
8817 ; GFX940-NEXT: ;;#ASMEND
8818 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8819 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8820 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8821 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 2>
8822 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8823 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8827 define void @s_shuffle_v3bf16_v4bf16__7_7_3() {
8828 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_3:
8830 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8831 ; GFX900-NEXT: ;;#ASMSTART
8832 ; GFX900-NEXT: ; def s[4:5]
8833 ; GFX900-NEXT: ;;#ASMEND
8834 ; GFX900-NEXT: ;;#ASMSTART
8835 ; GFX900-NEXT: ; def s[6:7]
8836 ; GFX900-NEXT: ;;#ASMEND
8837 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
8838 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
8839 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8840 ; GFX900-NEXT: ;;#ASMSTART
8841 ; GFX900-NEXT: ; use s[8:9]
8842 ; GFX900-NEXT: ;;#ASMEND
8843 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8845 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_3:
8847 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8848 ; GFX90A-NEXT: ;;#ASMSTART
8849 ; GFX90A-NEXT: ; def s[4:5]
8850 ; GFX90A-NEXT: ;;#ASMEND
8851 ; GFX90A-NEXT: ;;#ASMSTART
8852 ; GFX90A-NEXT: ; def s[6:7]
8853 ; GFX90A-NEXT: ;;#ASMEND
8854 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
8855 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
8856 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8857 ; GFX90A-NEXT: ;;#ASMSTART
8858 ; GFX90A-NEXT: ; use s[8:9]
8859 ; GFX90A-NEXT: ;;#ASMEND
8860 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8862 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_3:
8864 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8865 ; GFX940-NEXT: ;;#ASMSTART
8866 ; GFX940-NEXT: ; def s[0:1]
8867 ; GFX940-NEXT: ;;#ASMEND
8868 ; GFX940-NEXT: ;;#ASMSTART
8869 ; GFX940-NEXT: ; def s[2:3]
8870 ; GFX940-NEXT: ;;#ASMEND
8871 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
8872 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
8873 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
8874 ; GFX940-NEXT: ;;#ASMSTART
8875 ; GFX940-NEXT: ; use s[8:9]
8876 ; GFX940-NEXT: ;;#ASMEND
8877 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8878 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8879 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8880 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 3>
8881 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8882 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8886 define void @s_shuffle_v3bf16_v4bf16__7_7_4() {
8887 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_4:
8889 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8890 ; GFX900-NEXT: ;;#ASMSTART
8891 ; GFX900-NEXT: ; def s[4:5]
8892 ; GFX900-NEXT: ;;#ASMEND
8893 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
8894 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
8895 ; GFX900-NEXT: s_mov_b32 s9, s4
8896 ; GFX900-NEXT: ;;#ASMSTART
8897 ; GFX900-NEXT: ; use s[8:9]
8898 ; GFX900-NEXT: ;;#ASMEND
8899 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8901 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_4:
8903 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8904 ; GFX90A-NEXT: ;;#ASMSTART
8905 ; GFX90A-NEXT: ; def s[4:5]
8906 ; GFX90A-NEXT: ;;#ASMEND
8907 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
8908 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
8909 ; GFX90A-NEXT: s_mov_b32 s9, s4
8910 ; GFX90A-NEXT: ;;#ASMSTART
8911 ; GFX90A-NEXT: ; use s[8:9]
8912 ; GFX90A-NEXT: ;;#ASMEND
8913 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8915 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_4:
8917 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8918 ; GFX940-NEXT: ;;#ASMSTART
8919 ; GFX940-NEXT: ; def s[0:1]
8920 ; GFX940-NEXT: ;;#ASMEND
8921 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
8922 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
8923 ; GFX940-NEXT: s_mov_b32 s9, s0
8924 ; GFX940-NEXT: ;;#ASMSTART
8925 ; GFX940-NEXT: ; use s[8:9]
8926 ; GFX940-NEXT: ;;#ASMEND
8927 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8928 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8929 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8930 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
8931 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8932 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8936 define void @s_shuffle_v3bf16_v4bf16__7_7_5() {
8937 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_5:
8939 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8940 ; GFX900-NEXT: ;;#ASMSTART
8941 ; GFX900-NEXT: ; def s[4:5]
8942 ; GFX900-NEXT: ;;#ASMEND
8943 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
8944 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
8945 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8946 ; GFX900-NEXT: ;;#ASMSTART
8947 ; GFX900-NEXT: ; use s[8:9]
8948 ; GFX900-NEXT: ;;#ASMEND
8949 ; GFX900-NEXT: s_setpc_b64 s[30:31]
8951 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_5:
8953 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8954 ; GFX90A-NEXT: ;;#ASMSTART
8955 ; GFX90A-NEXT: ; def s[4:5]
8956 ; GFX90A-NEXT: ;;#ASMEND
8957 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
8958 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
8959 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8960 ; GFX90A-NEXT: ;;#ASMSTART
8961 ; GFX90A-NEXT: ; use s[8:9]
8962 ; GFX90A-NEXT: ;;#ASMEND
8963 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
8965 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_5:
8967 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8968 ; GFX940-NEXT: ;;#ASMSTART
8969 ; GFX940-NEXT: ; def s[0:1]
8970 ; GFX940-NEXT: ;;#ASMEND
8971 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
8972 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
8973 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
8974 ; GFX940-NEXT: ;;#ASMSTART
8975 ; GFX940-NEXT: ; use s[8:9]
8976 ; GFX940-NEXT: ;;#ASMEND
8977 ; GFX940-NEXT: s_setpc_b64 s[30:31]
8978 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
8979 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
8980 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
8981 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8982 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
8986 define void @s_shuffle_v3bf16_v4bf16__7_7_6() {
8987 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_6:
8989 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8990 ; GFX900-NEXT: ;;#ASMSTART
8991 ; GFX900-NEXT: ; def s[8:9]
8992 ; GFX900-NEXT: ;;#ASMEND
8993 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
8994 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
8995 ; GFX900-NEXT: ;;#ASMSTART
8996 ; GFX900-NEXT: ; use s[8:9]
8997 ; GFX900-NEXT: ;;#ASMEND
8998 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9000 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_6:
9002 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9003 ; GFX90A-NEXT: ;;#ASMSTART
9004 ; GFX90A-NEXT: ; def s[8:9]
9005 ; GFX90A-NEXT: ;;#ASMEND
9006 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
9007 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
9008 ; GFX90A-NEXT: ;;#ASMSTART
9009 ; GFX90A-NEXT: ; use s[8:9]
9010 ; GFX90A-NEXT: ;;#ASMEND
9011 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9013 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_6:
9015 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9016 ; GFX940-NEXT: ;;#ASMSTART
9017 ; GFX940-NEXT: ; def s[8:9]
9018 ; GFX940-NEXT: ;;#ASMEND
9019 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
9020 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
9021 ; GFX940-NEXT: ;;#ASMSTART
9022 ; GFX940-NEXT: ; use s[8:9]
9023 ; GFX940-NEXT: ;;#ASMEND
9024 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9025 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9026 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9027 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
9028 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9029 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9033 define void @s_shuffle_v3bf16_v4bf16__7_7_7() {
9034 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_7_7:
9036 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9037 ; GFX900-NEXT: ;;#ASMSTART
9038 ; GFX900-NEXT: ; def s[4:5]
9039 ; GFX900-NEXT: ;;#ASMEND
9040 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
9041 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
9042 ; GFX900-NEXT: ;;#ASMSTART
9043 ; GFX900-NEXT: ; use s[8:9]
9044 ; GFX900-NEXT: ;;#ASMEND
9045 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9047 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_7_7:
9049 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9050 ; GFX90A-NEXT: ;;#ASMSTART
9051 ; GFX90A-NEXT: ; def s[4:5]
9052 ; GFX90A-NEXT: ;;#ASMEND
9053 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
9054 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
9055 ; GFX90A-NEXT: ;;#ASMSTART
9056 ; GFX90A-NEXT: ; use s[8:9]
9057 ; GFX90A-NEXT: ;;#ASMEND
9058 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9060 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_7:
9062 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9063 ; GFX940-NEXT: ;;#ASMSTART
9064 ; GFX940-NEXT: ; def s[0:1]
9065 ; GFX940-NEXT: ;;#ASMEND
9066 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
9067 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
9068 ; GFX940-NEXT: ;;#ASMSTART
9069 ; GFX940-NEXT: ; use s[8:9]
9070 ; GFX940-NEXT: ;;#ASMEND
9071 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9072 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9073 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9074 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
9075 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9076 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9080 define void @s_shuffle_v3bf16_v4bf16__u_0_0() {
9081 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__u_0_0:
9083 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9084 ; GFX900-NEXT: ;;#ASMSTART
9085 ; GFX900-NEXT: ; def s[4:5]
9086 ; GFX900-NEXT: ;;#ASMEND
9087 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
9088 ; GFX900-NEXT: s_mov_b32 s9, s4
9089 ; GFX900-NEXT: ;;#ASMSTART
9090 ; GFX900-NEXT: ; use s[8:9]
9091 ; GFX900-NEXT: ;;#ASMEND
9092 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9094 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__u_0_0:
9096 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9097 ; GFX90A-NEXT: ;;#ASMSTART
9098 ; GFX90A-NEXT: ; def s[4:5]
9099 ; GFX90A-NEXT: ;;#ASMEND
9100 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
9101 ; GFX90A-NEXT: s_mov_b32 s9, s4
9102 ; GFX90A-NEXT: ;;#ASMSTART
9103 ; GFX90A-NEXT: ; use s[8:9]
9104 ; GFX90A-NEXT: ;;#ASMEND
9105 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9107 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__u_0_0:
9109 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9110 ; GFX940-NEXT: ;;#ASMSTART
9111 ; GFX940-NEXT: ; def s[0:1]
9112 ; GFX940-NEXT: ;;#ASMEND
9113 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
9114 ; GFX940-NEXT: s_mov_b32 s9, s0
9115 ; GFX940-NEXT: ;;#ASMSTART
9116 ; GFX940-NEXT: ; use s[8:9]
9117 ; GFX940-NEXT: ;;#ASMEND
9118 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9119 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9120 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 0, i32 0>
9121 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9122 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9126 define void @s_shuffle_v3bf16_v4bf16__0_0_0() {
9127 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__0_0_0:
9129 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9130 ; GFX900-NEXT: ;;#ASMSTART
9131 ; GFX900-NEXT: ; def s[4:5]
9132 ; GFX900-NEXT: ;;#ASMEND
9133 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
9134 ; GFX900-NEXT: s_mov_b32 s9, s4
9135 ; GFX900-NEXT: ;;#ASMSTART
9136 ; GFX900-NEXT: ; use s[8:9]
9137 ; GFX900-NEXT: ;;#ASMEND
9138 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9140 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__0_0_0:
9142 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9143 ; GFX90A-NEXT: ;;#ASMSTART
9144 ; GFX90A-NEXT: ; def s[4:5]
9145 ; GFX90A-NEXT: ;;#ASMEND
9146 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
9147 ; GFX90A-NEXT: s_mov_b32 s9, s4
9148 ; GFX90A-NEXT: ;;#ASMSTART
9149 ; GFX90A-NEXT: ; use s[8:9]
9150 ; GFX90A-NEXT: ;;#ASMEND
9151 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9153 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_0_0:
9155 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9156 ; GFX940-NEXT: ;;#ASMSTART
9157 ; GFX940-NEXT: ; def s[0:1]
9158 ; GFX940-NEXT: ;;#ASMEND
9159 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
9160 ; GFX940-NEXT: s_mov_b32 s9, s0
9161 ; GFX940-NEXT: ;;#ASMSTART
9162 ; GFX940-NEXT: ; use s[8:9]
9163 ; GFX940-NEXT: ;;#ASMEND
9164 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9165 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9166 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> zeroinitializer
9167 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9168 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9172 define void @s_shuffle_v3bf16_v4bf16__1_0_0() {
9173 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_0_0:
9175 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9176 ; GFX900-NEXT: ;;#ASMSTART
9177 ; GFX900-NEXT: ; def s[4:5]
9178 ; GFX900-NEXT: ;;#ASMEND
9179 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
9180 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9181 ; GFX900-NEXT: s_mov_b32 s9, s4
9182 ; GFX900-NEXT: ;;#ASMSTART
9183 ; GFX900-NEXT: ; use s[8:9]
9184 ; GFX900-NEXT: ;;#ASMEND
9185 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9187 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_0_0:
9189 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9190 ; GFX90A-NEXT: ;;#ASMSTART
9191 ; GFX90A-NEXT: ; def s[4:5]
9192 ; GFX90A-NEXT: ;;#ASMEND
9193 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
9194 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9195 ; GFX90A-NEXT: s_mov_b32 s9, s4
9196 ; GFX90A-NEXT: ;;#ASMSTART
9197 ; GFX90A-NEXT: ; use s[8:9]
9198 ; GFX90A-NEXT: ;;#ASMEND
9199 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9201 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_0_0:
9203 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9204 ; GFX940-NEXT: ;;#ASMSTART
9205 ; GFX940-NEXT: ; def s[0:1]
9206 ; GFX940-NEXT: ;;#ASMEND
9207 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
9208 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
9209 ; GFX940-NEXT: s_mov_b32 s9, s0
9210 ; GFX940-NEXT: ;;#ASMSTART
9211 ; GFX940-NEXT: ; use s[8:9]
9212 ; GFX940-NEXT: ;;#ASMEND
9213 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9214 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9215 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 0, i32 0>
9216 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9217 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9221 define void @s_shuffle_v3bf16_v4bf16__2_0_0() {
9222 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_0_0:
9224 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9225 ; GFX900-NEXT: ;;#ASMSTART
9226 ; GFX900-NEXT: ; def s[4:5]
9227 ; GFX900-NEXT: ;;#ASMEND
9228 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9229 ; GFX900-NEXT: s_mov_b32 s9, s4
9230 ; GFX900-NEXT: ;;#ASMSTART
9231 ; GFX900-NEXT: ; use s[8:9]
9232 ; GFX900-NEXT: ;;#ASMEND
9233 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9235 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_0_0:
9237 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9238 ; GFX90A-NEXT: ;;#ASMSTART
9239 ; GFX90A-NEXT: ; def s[4:5]
9240 ; GFX90A-NEXT: ;;#ASMEND
9241 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9242 ; GFX90A-NEXT: s_mov_b32 s9, s4
9243 ; GFX90A-NEXT: ;;#ASMSTART
9244 ; GFX90A-NEXT: ; use s[8:9]
9245 ; GFX90A-NEXT: ;;#ASMEND
9246 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9248 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_0_0:
9250 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9251 ; GFX940-NEXT: ;;#ASMSTART
9252 ; GFX940-NEXT: ; def s[0:1]
9253 ; GFX940-NEXT: ;;#ASMEND
9254 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
9255 ; GFX940-NEXT: s_mov_b32 s9, s0
9256 ; GFX940-NEXT: ;;#ASMSTART
9257 ; GFX940-NEXT: ; use s[8:9]
9258 ; GFX940-NEXT: ;;#ASMEND
9259 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9260 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9261 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 0>
9262 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9263 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9267 define void @s_shuffle_v3bf16_v4bf16__3_0_0() {
9268 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_0_0:
9270 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9271 ; GFX900-NEXT: ;;#ASMSTART
9272 ; GFX900-NEXT: ; def s[4:5]
9273 ; GFX900-NEXT: ;;#ASMEND
9274 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
9275 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9276 ; GFX900-NEXT: s_mov_b32 s9, s4
9277 ; GFX900-NEXT: ;;#ASMSTART
9278 ; GFX900-NEXT: ; use s[8:9]
9279 ; GFX900-NEXT: ;;#ASMEND
9280 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9282 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_0_0:
9284 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9285 ; GFX90A-NEXT: ;;#ASMSTART
9286 ; GFX90A-NEXT: ; def s[4:5]
9287 ; GFX90A-NEXT: ;;#ASMEND
9288 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
9289 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9290 ; GFX90A-NEXT: s_mov_b32 s9, s4
9291 ; GFX90A-NEXT: ;;#ASMSTART
9292 ; GFX90A-NEXT: ; use s[8:9]
9293 ; GFX90A-NEXT: ;;#ASMEND
9294 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9296 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_0_0:
9298 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9299 ; GFX940-NEXT: ;;#ASMSTART
9300 ; GFX940-NEXT: ; def s[0:1]
9301 ; GFX940-NEXT: ;;#ASMEND
9302 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
9303 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
9304 ; GFX940-NEXT: s_mov_b32 s9, s0
9305 ; GFX940-NEXT: ;;#ASMSTART
9306 ; GFX940-NEXT: ; use s[8:9]
9307 ; GFX940-NEXT: ;;#ASMEND
9308 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9309 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9310 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 0, i32 0>
9311 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9312 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9316 define void @s_shuffle_v3bf16_v4bf16__4_0_0() {
9317 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__4_0_0:
9319 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9320 ; GFX900-NEXT: ;;#ASMSTART
9321 ; GFX900-NEXT: ; def s[4:5]
9322 ; GFX900-NEXT: ;;#ASMEND
9323 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
9324 ; GFX900-NEXT: s_mov_b32 s9, s4
9325 ; GFX900-NEXT: ;;#ASMSTART
9326 ; GFX900-NEXT: ; use s[8:9]
9327 ; GFX900-NEXT: ;;#ASMEND
9328 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9330 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__4_0_0:
9332 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9333 ; GFX90A-NEXT: ;;#ASMSTART
9334 ; GFX90A-NEXT: ; def s[4:5]
9335 ; GFX90A-NEXT: ;;#ASMEND
9336 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
9337 ; GFX90A-NEXT: s_mov_b32 s9, s4
9338 ; GFX90A-NEXT: ;;#ASMSTART
9339 ; GFX90A-NEXT: ; use s[8:9]
9340 ; GFX90A-NEXT: ;;#ASMEND
9341 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9343 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__4_0_0:
9345 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9346 ; GFX940-NEXT: ;;#ASMSTART
9347 ; GFX940-NEXT: ; def s[0:1]
9348 ; GFX940-NEXT: ;;#ASMEND
9349 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
9350 ; GFX940-NEXT: s_mov_b32 s9, s0
9351 ; GFX940-NEXT: ;;#ASMSTART
9352 ; GFX940-NEXT: ; use s[8:9]
9353 ; GFX940-NEXT: ;;#ASMEND
9354 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9355 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9356 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 0, i32 0>
9357 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9358 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9362 define void @s_shuffle_v3bf16_v4bf16__5_0_0() {
9363 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_0_0:
9365 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9366 ; GFX900-NEXT: ;;#ASMSTART
9367 ; GFX900-NEXT: ; def s[4:5]
9368 ; GFX900-NEXT: ;;#ASMEND
9369 ; GFX900-NEXT: ;;#ASMSTART
9370 ; GFX900-NEXT: ; def s[6:7]
9371 ; GFX900-NEXT: ;;#ASMEND
9372 ; GFX900-NEXT: s_lshr_b32 s5, s6, 16
9373 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9374 ; GFX900-NEXT: s_mov_b32 s9, s4
9375 ; GFX900-NEXT: ;;#ASMSTART
9376 ; GFX900-NEXT: ; use s[8:9]
9377 ; GFX900-NEXT: ;;#ASMEND
9378 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9380 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_0_0:
9382 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9383 ; GFX90A-NEXT: ;;#ASMSTART
9384 ; GFX90A-NEXT: ; def s[4:5]
9385 ; GFX90A-NEXT: ;;#ASMEND
9386 ; GFX90A-NEXT: ;;#ASMSTART
9387 ; GFX90A-NEXT: ; def s[6:7]
9388 ; GFX90A-NEXT: ;;#ASMEND
9389 ; GFX90A-NEXT: s_lshr_b32 s5, s6, 16
9390 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9391 ; GFX90A-NEXT: s_mov_b32 s9, s4
9392 ; GFX90A-NEXT: ;;#ASMSTART
9393 ; GFX90A-NEXT: ; use s[8:9]
9394 ; GFX90A-NEXT: ;;#ASMEND
9395 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9397 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_0_0:
9399 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9400 ; GFX940-NEXT: ;;#ASMSTART
9401 ; GFX940-NEXT: ; def s[0:1]
9402 ; GFX940-NEXT: ;;#ASMEND
9403 ; GFX940-NEXT: ;;#ASMSTART
9404 ; GFX940-NEXT: ; def s[2:3]
9405 ; GFX940-NEXT: ;;#ASMEND
9406 ; GFX940-NEXT: s_lshr_b32 s1, s2, 16
9407 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
9408 ; GFX940-NEXT: s_mov_b32 s9, s0
9409 ; GFX940-NEXT: ;;#ASMSTART
9410 ; GFX940-NEXT: ; use s[8:9]
9411 ; GFX940-NEXT: ;;#ASMEND
9412 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9413 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9414 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9415 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
9416 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9417 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9421 define void @s_shuffle_v3bf16_v4bf16__6_0_0() {
9422 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_0_0:
9424 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9425 ; GFX900-NEXT: ;;#ASMSTART
9426 ; GFX900-NEXT: ; def s[4:5]
9427 ; GFX900-NEXT: ;;#ASMEND
9428 ; GFX900-NEXT: ;;#ASMSTART
9429 ; GFX900-NEXT: ; def s[6:7]
9430 ; GFX900-NEXT: ;;#ASMEND
9431 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
9432 ; GFX900-NEXT: s_mov_b32 s9, s4
9433 ; GFX900-NEXT: ;;#ASMSTART
9434 ; GFX900-NEXT: ; use s[8:9]
9435 ; GFX900-NEXT: ;;#ASMEND
9436 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9438 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_0_0:
9440 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9441 ; GFX90A-NEXT: ;;#ASMSTART
9442 ; GFX90A-NEXT: ; def s[4:5]
9443 ; GFX90A-NEXT: ;;#ASMEND
9444 ; GFX90A-NEXT: ;;#ASMSTART
9445 ; GFX90A-NEXT: ; def s[6:7]
9446 ; GFX90A-NEXT: ;;#ASMEND
9447 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
9448 ; GFX90A-NEXT: s_mov_b32 s9, s4
9449 ; GFX90A-NEXT: ;;#ASMSTART
9450 ; GFX90A-NEXT: ; use s[8:9]
9451 ; GFX90A-NEXT: ;;#ASMEND
9452 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9454 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_0_0:
9456 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9457 ; GFX940-NEXT: ;;#ASMSTART
9458 ; GFX940-NEXT: ; def s[0:1]
9459 ; GFX940-NEXT: ;;#ASMEND
9460 ; GFX940-NEXT: ;;#ASMSTART
9461 ; GFX940-NEXT: ; def s[2:3]
9462 ; GFX940-NEXT: ;;#ASMEND
9463 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
9464 ; GFX940-NEXT: s_mov_b32 s9, s0
9465 ; GFX940-NEXT: ;;#ASMSTART
9466 ; GFX940-NEXT: ; use s[8:9]
9467 ; GFX940-NEXT: ;;#ASMEND
9468 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9469 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9470 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9471 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
9472 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9473 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9477 define void @s_shuffle_v3bf16_v4bf16__7_0_0() {
9478 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_0:
9480 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9481 ; GFX900-NEXT: ;;#ASMSTART
9482 ; GFX900-NEXT: ; def s[4:5]
9483 ; GFX900-NEXT: ;;#ASMEND
9484 ; GFX900-NEXT: ;;#ASMSTART
9485 ; GFX900-NEXT: ; def s[6:7]
9486 ; GFX900-NEXT: ;;#ASMEND
9487 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
9488 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9489 ; GFX900-NEXT: s_mov_b32 s9, s4
9490 ; GFX900-NEXT: ;;#ASMSTART
9491 ; GFX900-NEXT: ; use s[8:9]
9492 ; GFX900-NEXT: ;;#ASMEND
9493 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9495 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_0:
9497 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9498 ; GFX90A-NEXT: ;;#ASMSTART
9499 ; GFX90A-NEXT: ; def s[4:5]
9500 ; GFX90A-NEXT: ;;#ASMEND
9501 ; GFX90A-NEXT: ;;#ASMSTART
9502 ; GFX90A-NEXT: ; def s[6:7]
9503 ; GFX90A-NEXT: ;;#ASMEND
9504 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
9505 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
9506 ; GFX90A-NEXT: s_mov_b32 s9, s4
9507 ; GFX90A-NEXT: ;;#ASMSTART
9508 ; GFX90A-NEXT: ; use s[8:9]
9509 ; GFX90A-NEXT: ;;#ASMEND
9510 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9512 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_0:
9514 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9515 ; GFX940-NEXT: ;;#ASMSTART
9516 ; GFX940-NEXT: ; def s[0:1]
9517 ; GFX940-NEXT: ;;#ASMEND
9518 ; GFX940-NEXT: ;;#ASMSTART
9519 ; GFX940-NEXT: ; def s[2:3]
9520 ; GFX940-NEXT: ;;#ASMEND
9521 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
9522 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
9523 ; GFX940-NEXT: s_mov_b32 s9, s0
9524 ; GFX940-NEXT: ;;#ASMSTART
9525 ; GFX940-NEXT: ; use s[8:9]
9526 ; GFX940-NEXT: ;;#ASMEND
9527 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9528 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9529 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9530 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
9531 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9532 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9536 define void @s_shuffle_v3bf16_v4bf16__7_u_0() {
9537 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_0:
9539 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9540 ; GFX900-NEXT: ;;#ASMSTART
9541 ; GFX900-NEXT: ; def s[4:5]
9542 ; GFX900-NEXT: ;;#ASMEND
9543 ; GFX900-NEXT: ;;#ASMSTART
9544 ; GFX900-NEXT: ; def s[6:7]
9545 ; GFX900-NEXT: ;;#ASMEND
9546 ; GFX900-NEXT: s_lshr_b32 s8, s7, 16
9547 ; GFX900-NEXT: s_mov_b32 s9, s4
9548 ; GFX900-NEXT: ;;#ASMSTART
9549 ; GFX900-NEXT: ; use s[8:9]
9550 ; GFX900-NEXT: ;;#ASMEND
9551 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9553 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_0:
9555 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9556 ; GFX90A-NEXT: ;;#ASMSTART
9557 ; GFX90A-NEXT: ; def s[4:5]
9558 ; GFX90A-NEXT: ;;#ASMEND
9559 ; GFX90A-NEXT: ;;#ASMSTART
9560 ; GFX90A-NEXT: ; def s[6:7]
9561 ; GFX90A-NEXT: ;;#ASMEND
9562 ; GFX90A-NEXT: s_lshr_b32 s8, s7, 16
9563 ; GFX90A-NEXT: s_mov_b32 s9, s4
9564 ; GFX90A-NEXT: ;;#ASMSTART
9565 ; GFX90A-NEXT: ; use s[8:9]
9566 ; GFX90A-NEXT: ;;#ASMEND
9567 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9569 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_0:
9571 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9572 ; GFX940-NEXT: ;;#ASMSTART
9573 ; GFX940-NEXT: ; def s[0:1]
9574 ; GFX940-NEXT: ;;#ASMEND
9575 ; GFX940-NEXT: ;;#ASMSTART
9576 ; GFX940-NEXT: ; def s[2:3]
9577 ; GFX940-NEXT: ;;#ASMEND
9578 ; GFX940-NEXT: s_lshr_b32 s8, s3, 16
9579 ; GFX940-NEXT: s_mov_b32 s9, s0
9580 ; GFX940-NEXT: ;;#ASMSTART
9581 ; GFX940-NEXT: ; use s[8:9]
9582 ; GFX940-NEXT: ;;#ASMEND
9583 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9584 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9585 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9586 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
9587 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9588 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9592 define void @s_shuffle_v3bf16_v4bf16__7_1_0() {
9593 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_0:
9595 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9596 ; GFX900-NEXT: ;;#ASMSTART
9597 ; GFX900-NEXT: ; def s[4:5]
9598 ; GFX900-NEXT: ;;#ASMEND
9599 ; GFX900-NEXT: ;;#ASMSTART
9600 ; GFX900-NEXT: ; def s[6:7]
9601 ; GFX900-NEXT: ;;#ASMEND
9602 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
9603 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
9604 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9605 ; GFX900-NEXT: s_mov_b32 s9, s4
9606 ; GFX900-NEXT: ;;#ASMSTART
9607 ; GFX900-NEXT: ; use s[8:9]
9608 ; GFX900-NEXT: ;;#ASMEND
9609 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9611 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_0:
9613 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9614 ; GFX90A-NEXT: ;;#ASMSTART
9615 ; GFX90A-NEXT: ; def s[4:5]
9616 ; GFX90A-NEXT: ;;#ASMEND
9617 ; GFX90A-NEXT: ;;#ASMSTART
9618 ; GFX90A-NEXT: ; def s[6:7]
9619 ; GFX90A-NEXT: ;;#ASMEND
9620 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
9621 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
9622 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9623 ; GFX90A-NEXT: s_mov_b32 s9, s4
9624 ; GFX90A-NEXT: ;;#ASMSTART
9625 ; GFX90A-NEXT: ; use s[8:9]
9626 ; GFX90A-NEXT: ;;#ASMEND
9627 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9629 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_0:
9631 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9632 ; GFX940-NEXT: ;;#ASMSTART
9633 ; GFX940-NEXT: ; def s[0:1]
9634 ; GFX940-NEXT: ;;#ASMEND
9635 ; GFX940-NEXT: ;;#ASMSTART
9636 ; GFX940-NEXT: ; def s[2:3]
9637 ; GFX940-NEXT: ;;#ASMEND
9638 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
9639 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
9640 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
9641 ; GFX940-NEXT: s_mov_b32 s9, s0
9642 ; GFX940-NEXT: ;;#ASMSTART
9643 ; GFX940-NEXT: ; use s[8:9]
9644 ; GFX940-NEXT: ;;#ASMEND
9645 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9646 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9647 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9648 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
9649 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9650 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9654 define void @s_shuffle_v3bf16_v4bf16__7_2_0() {
9655 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_0:
9657 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9658 ; GFX900-NEXT: ;;#ASMSTART
9659 ; GFX900-NEXT: ; def s[6:7]
9660 ; GFX900-NEXT: ;;#ASMEND
9661 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
9662 ; GFX900-NEXT: ;;#ASMSTART
9663 ; GFX900-NEXT: ; def s[4:5]
9664 ; GFX900-NEXT: ;;#ASMEND
9665 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9666 ; GFX900-NEXT: s_mov_b32 s9, s4
9667 ; GFX900-NEXT: ;;#ASMSTART
9668 ; GFX900-NEXT: ; use s[8:9]
9669 ; GFX900-NEXT: ;;#ASMEND
9670 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9672 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_0:
9674 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9675 ; GFX90A-NEXT: ;;#ASMSTART
9676 ; GFX90A-NEXT: ; def s[6:7]
9677 ; GFX90A-NEXT: ;;#ASMEND
9678 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
9679 ; GFX90A-NEXT: ;;#ASMSTART
9680 ; GFX90A-NEXT: ; def s[4:5]
9681 ; GFX90A-NEXT: ;;#ASMEND
9682 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9683 ; GFX90A-NEXT: s_mov_b32 s9, s4
9684 ; GFX90A-NEXT: ;;#ASMSTART
9685 ; GFX90A-NEXT: ; use s[8:9]
9686 ; GFX90A-NEXT: ;;#ASMEND
9687 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9689 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_0:
9691 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9692 ; GFX940-NEXT: ;;#ASMSTART
9693 ; GFX940-NEXT: ; def s[2:3]
9694 ; GFX940-NEXT: ;;#ASMEND
9695 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
9696 ; GFX940-NEXT: ;;#ASMSTART
9697 ; GFX940-NEXT: ; def s[0:1]
9698 ; GFX940-NEXT: ;;#ASMEND
9699 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
9700 ; GFX940-NEXT: s_mov_b32 s9, s0
9701 ; GFX940-NEXT: ;;#ASMSTART
9702 ; GFX940-NEXT: ; use s[8:9]
9703 ; GFX940-NEXT: ;;#ASMEND
9704 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9705 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9706 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9707 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
9708 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9709 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9713 define void @s_shuffle_v3bf16_v4bf16__7_3_0() {
9714 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_0:
9716 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9717 ; GFX900-NEXT: ;;#ASMSTART
9718 ; GFX900-NEXT: ; def s[4:5]
9719 ; GFX900-NEXT: ;;#ASMEND
9720 ; GFX900-NEXT: ;;#ASMSTART
9721 ; GFX900-NEXT: ; def s[6:7]
9722 ; GFX900-NEXT: ;;#ASMEND
9723 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
9724 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
9725 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9726 ; GFX900-NEXT: s_mov_b32 s9, s4
9727 ; GFX900-NEXT: ;;#ASMSTART
9728 ; GFX900-NEXT: ; use s[8:9]
9729 ; GFX900-NEXT: ;;#ASMEND
9730 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9732 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_0:
9734 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9735 ; GFX90A-NEXT: ;;#ASMSTART
9736 ; GFX90A-NEXT: ; def s[4:5]
9737 ; GFX90A-NEXT: ;;#ASMEND
9738 ; GFX90A-NEXT: ;;#ASMSTART
9739 ; GFX90A-NEXT: ; def s[6:7]
9740 ; GFX90A-NEXT: ;;#ASMEND
9741 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
9742 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
9743 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9744 ; GFX90A-NEXT: s_mov_b32 s9, s4
9745 ; GFX90A-NEXT: ;;#ASMSTART
9746 ; GFX90A-NEXT: ; use s[8:9]
9747 ; GFX90A-NEXT: ;;#ASMEND
9748 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9750 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_0:
9752 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9753 ; GFX940-NEXT: ;;#ASMSTART
9754 ; GFX940-NEXT: ; def s[0:1]
9755 ; GFX940-NEXT: ;;#ASMEND
9756 ; GFX940-NEXT: ;;#ASMSTART
9757 ; GFX940-NEXT: ; def s[2:3]
9758 ; GFX940-NEXT: ;;#ASMEND
9759 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
9760 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
9761 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
9762 ; GFX940-NEXT: s_mov_b32 s9, s0
9763 ; GFX940-NEXT: ;;#ASMSTART
9764 ; GFX940-NEXT: ; use s[8:9]
9765 ; GFX940-NEXT: ;;#ASMEND
9766 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9767 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9768 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9769 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
9770 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9771 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9775 define void @s_shuffle_v3bf16_v4bf16__7_4_0() {
9776 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_0:
9778 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9779 ; GFX900-NEXT: ;;#ASMSTART
9780 ; GFX900-NEXT: ; def s[4:5]
9781 ; GFX900-NEXT: ;;#ASMEND
9782 ; GFX900-NEXT: ;;#ASMSTART
9783 ; GFX900-NEXT: ; def s[6:7]
9784 ; GFX900-NEXT: ;;#ASMEND
9785 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
9786 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6
9787 ; GFX900-NEXT: s_mov_b32 s9, s4
9788 ; GFX900-NEXT: ;;#ASMSTART
9789 ; GFX900-NEXT: ; use s[8:9]
9790 ; GFX900-NEXT: ;;#ASMEND
9791 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9793 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_0:
9795 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9796 ; GFX90A-NEXT: ;;#ASMSTART
9797 ; GFX90A-NEXT: ; def s[4:5]
9798 ; GFX90A-NEXT: ;;#ASMEND
9799 ; GFX90A-NEXT: ;;#ASMSTART
9800 ; GFX90A-NEXT: ; def s[6:7]
9801 ; GFX90A-NEXT: ;;#ASMEND
9802 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
9803 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6
9804 ; GFX90A-NEXT: s_mov_b32 s9, s4
9805 ; GFX90A-NEXT: ;;#ASMSTART
9806 ; GFX90A-NEXT: ; use s[8:9]
9807 ; GFX90A-NEXT: ;;#ASMEND
9808 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9810 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_0:
9812 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9813 ; GFX940-NEXT: ;;#ASMSTART
9814 ; GFX940-NEXT: ; def s[0:1]
9815 ; GFX940-NEXT: ;;#ASMEND
9816 ; GFX940-NEXT: ;;#ASMSTART
9817 ; GFX940-NEXT: ; def s[2:3]
9818 ; GFX940-NEXT: ;;#ASMEND
9819 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
9820 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2
9821 ; GFX940-NEXT: s_mov_b32 s9, s0
9822 ; GFX940-NEXT: ;;#ASMSTART
9823 ; GFX940-NEXT: ; use s[8:9]
9824 ; GFX940-NEXT: ;;#ASMEND
9825 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9826 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9827 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9828 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
9829 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9830 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9834 define void @s_shuffle_v3bf16_v4bf16__7_5_0() {
9835 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_0:
9837 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9838 ; GFX900-NEXT: ;;#ASMSTART
9839 ; GFX900-NEXT: ; def s[4:5]
9840 ; GFX900-NEXT: ;;#ASMEND
9841 ; GFX900-NEXT: ;;#ASMSTART
9842 ; GFX900-NEXT: ; def s[6:7]
9843 ; GFX900-NEXT: ;;#ASMEND
9844 ; GFX900-NEXT: s_lshr_b32 s5, s6, 16
9845 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
9846 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9847 ; GFX900-NEXT: s_mov_b32 s9, s4
9848 ; GFX900-NEXT: ;;#ASMSTART
9849 ; GFX900-NEXT: ; use s[8:9]
9850 ; GFX900-NEXT: ;;#ASMEND
9851 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9853 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_0:
9855 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9856 ; GFX90A-NEXT: ;;#ASMSTART
9857 ; GFX90A-NEXT: ; def s[4:5]
9858 ; GFX90A-NEXT: ;;#ASMEND
9859 ; GFX90A-NEXT: ;;#ASMSTART
9860 ; GFX90A-NEXT: ; def s[6:7]
9861 ; GFX90A-NEXT: ;;#ASMEND
9862 ; GFX90A-NEXT: s_lshr_b32 s5, s6, 16
9863 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
9864 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
9865 ; GFX90A-NEXT: s_mov_b32 s9, s4
9866 ; GFX90A-NEXT: ;;#ASMSTART
9867 ; GFX90A-NEXT: ; use s[8:9]
9868 ; GFX90A-NEXT: ;;#ASMEND
9869 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9871 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_0:
9873 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9874 ; GFX940-NEXT: ;;#ASMSTART
9875 ; GFX940-NEXT: ; def s[0:1]
9876 ; GFX940-NEXT: ;;#ASMEND
9877 ; GFX940-NEXT: ;;#ASMSTART
9878 ; GFX940-NEXT: ; def s[2:3]
9879 ; GFX940-NEXT: ;;#ASMEND
9880 ; GFX940-NEXT: s_lshr_b32 s1, s2, 16
9881 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
9882 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
9883 ; GFX940-NEXT: s_mov_b32 s9, s0
9884 ; GFX940-NEXT: ;;#ASMSTART
9885 ; GFX940-NEXT: ; use s[8:9]
9886 ; GFX940-NEXT: ;;#ASMEND
9887 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9888 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9889 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9890 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
9891 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9892 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9896 define void @s_shuffle_v3bf16_v4bf16__7_6_0() {
9897 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_0:
9899 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9900 ; GFX900-NEXT: ;;#ASMSTART
9901 ; GFX900-NEXT: ; def s[4:5]
9902 ; GFX900-NEXT: ;;#ASMEND
9903 ; GFX900-NEXT: ;;#ASMSTART
9904 ; GFX900-NEXT: ; def s[6:7]
9905 ; GFX900-NEXT: ;;#ASMEND
9906 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
9907 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7
9908 ; GFX900-NEXT: s_mov_b32 s9, s4
9909 ; GFX900-NEXT: ;;#ASMSTART
9910 ; GFX900-NEXT: ; use s[8:9]
9911 ; GFX900-NEXT: ;;#ASMEND
9912 ; GFX900-NEXT: s_setpc_b64 s[30:31]
9914 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_0:
9916 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9917 ; GFX90A-NEXT: ;;#ASMSTART
9918 ; GFX90A-NEXT: ; def s[4:5]
9919 ; GFX90A-NEXT: ;;#ASMEND
9920 ; GFX90A-NEXT: ;;#ASMSTART
9921 ; GFX90A-NEXT: ; def s[6:7]
9922 ; GFX90A-NEXT: ;;#ASMEND
9923 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
9924 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7
9925 ; GFX90A-NEXT: s_mov_b32 s9, s4
9926 ; GFX90A-NEXT: ;;#ASMSTART
9927 ; GFX90A-NEXT: ; use s[8:9]
9928 ; GFX90A-NEXT: ;;#ASMEND
9929 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
9931 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_0:
9933 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9934 ; GFX940-NEXT: ;;#ASMSTART
9935 ; GFX940-NEXT: ; def s[0:1]
9936 ; GFX940-NEXT: ;;#ASMEND
9937 ; GFX940-NEXT: ;;#ASMSTART
9938 ; GFX940-NEXT: ; def s[2:3]
9939 ; GFX940-NEXT: ;;#ASMEND
9940 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
9941 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3
9942 ; GFX940-NEXT: s_mov_b32 s9, s0
9943 ; GFX940-NEXT: ;;#ASMSTART
9944 ; GFX940-NEXT: ; use s[8:9]
9945 ; GFX940-NEXT: ;;#ASMEND
9946 ; GFX940-NEXT: s_setpc_b64 s[30:31]
9947 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9948 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
9949 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
9950 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9951 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9955 define void @s_shuffle_v3bf16_v4bf16__u_1_1() {
9956 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__u_1_1:
9958 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9959 ; GFX9-NEXT: ;;#ASMSTART
9960 ; GFX9-NEXT: ; def s[8:9]
9961 ; GFX9-NEXT: ;;#ASMEND
9962 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
9963 ; GFX9-NEXT: ;;#ASMSTART
9964 ; GFX9-NEXT: ; use s[8:9]
9965 ; GFX9-NEXT: ;;#ASMEND
9966 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9967 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9968 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 1, i32 1>
9969 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9970 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9974 define void @s_shuffle_v3bf16_v4bf16__0_1_1() {
9975 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__0_1_1:
9977 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9978 ; GFX9-NEXT: ;;#ASMSTART
9979 ; GFX9-NEXT: ; def s[8:9]
9980 ; GFX9-NEXT: ;;#ASMEND
9981 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
9982 ; GFX9-NEXT: ;;#ASMSTART
9983 ; GFX9-NEXT: ; use s[8:9]
9984 ; GFX9-NEXT: ;;#ASMEND
9985 ; GFX9-NEXT: s_setpc_b64 s[30:31]
9986 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
9987 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 1, i32 1>
9988 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9989 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
9993 define void @s_shuffle_v3bf16_v4bf16__1_1_1() {
9994 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_1_1:
9996 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9997 ; GFX900-NEXT: ;;#ASMSTART
9998 ; GFX900-NEXT: ; def s[4:5]
9999 ; GFX900-NEXT: ;;#ASMEND
10000 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10001 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
10002 ; GFX900-NEXT: ;;#ASMSTART
10003 ; GFX900-NEXT: ; use s[8:9]
10004 ; GFX900-NEXT: ;;#ASMEND
10005 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10007 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_1_1:
10009 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10010 ; GFX90A-NEXT: ;;#ASMSTART
10011 ; GFX90A-NEXT: ; def s[4:5]
10012 ; GFX90A-NEXT: ;;#ASMEND
10013 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10014 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
10015 ; GFX90A-NEXT: ;;#ASMSTART
10016 ; GFX90A-NEXT: ; use s[8:9]
10017 ; GFX90A-NEXT: ;;#ASMEND
10018 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10020 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_1_1:
10022 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10023 ; GFX940-NEXT: ;;#ASMSTART
10024 ; GFX940-NEXT: ; def s[0:1]
10025 ; GFX940-NEXT: ;;#ASMEND
10026 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10027 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
10028 ; GFX940-NEXT: ;;#ASMSTART
10029 ; GFX940-NEXT: ; use s[8:9]
10030 ; GFX940-NEXT: ;;#ASMEND
10031 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10032 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10033 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
10034 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10035 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10039 define void @s_shuffle_v3bf16_v4bf16__2_1_1() {
10040 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_1_1:
10042 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10043 ; GFX900-NEXT: ;;#ASMSTART
10044 ; GFX900-NEXT: ; def s[4:5]
10045 ; GFX900-NEXT: ;;#ASMEND
10046 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10047 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
10048 ; GFX900-NEXT: ;;#ASMSTART
10049 ; GFX900-NEXT: ; use s[8:9]
10050 ; GFX900-NEXT: ;;#ASMEND
10051 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10053 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_1_1:
10055 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10056 ; GFX90A-NEXT: ;;#ASMSTART
10057 ; GFX90A-NEXT: ; def s[4:5]
10058 ; GFX90A-NEXT: ;;#ASMEND
10059 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10060 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
10061 ; GFX90A-NEXT: ;;#ASMSTART
10062 ; GFX90A-NEXT: ; use s[8:9]
10063 ; GFX90A-NEXT: ;;#ASMEND
10064 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10066 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_1_1:
10068 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10069 ; GFX940-NEXT: ;;#ASMSTART
10070 ; GFX940-NEXT: ; def s[0:1]
10071 ; GFX940-NEXT: ;;#ASMEND
10072 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10073 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
10074 ; GFX940-NEXT: ;;#ASMSTART
10075 ; GFX940-NEXT: ; use s[8:9]
10076 ; GFX940-NEXT: ;;#ASMEND
10077 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10078 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10079 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 1, i32 1>
10080 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10081 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10085 define void @s_shuffle_v3bf16_v4bf16__3_1_1() {
10086 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_1_1:
10088 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10089 ; GFX900-NEXT: ;;#ASMSTART
10090 ; GFX900-NEXT: ; def s[4:5]
10091 ; GFX900-NEXT: ;;#ASMEND
10092 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10093 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
10094 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10095 ; GFX900-NEXT: ;;#ASMSTART
10096 ; GFX900-NEXT: ; use s[8:9]
10097 ; GFX900-NEXT: ;;#ASMEND
10098 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10100 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_1_1:
10102 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10103 ; GFX90A-NEXT: ;;#ASMSTART
10104 ; GFX90A-NEXT: ; def s[4:5]
10105 ; GFX90A-NEXT: ;;#ASMEND
10106 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10107 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
10108 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10109 ; GFX90A-NEXT: ;;#ASMSTART
10110 ; GFX90A-NEXT: ; use s[8:9]
10111 ; GFX90A-NEXT: ;;#ASMEND
10112 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10114 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_1_1:
10116 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10117 ; GFX940-NEXT: ;;#ASMSTART
10118 ; GFX940-NEXT: ; def s[0:1]
10119 ; GFX940-NEXT: ;;#ASMEND
10120 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10121 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
10122 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
10123 ; GFX940-NEXT: ;;#ASMSTART
10124 ; GFX940-NEXT: ; use s[8:9]
10125 ; GFX940-NEXT: ;;#ASMEND
10126 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10127 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10128 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 1, i32 1>
10129 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10130 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10134 define void @s_shuffle_v3bf16_v4bf16__4_1_1() {
10135 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__4_1_1:
10137 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10138 ; GFX9-NEXT: ;;#ASMSTART
10139 ; GFX9-NEXT: ; def s[8:9]
10140 ; GFX9-NEXT: ;;#ASMEND
10141 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
10142 ; GFX9-NEXT: ;;#ASMSTART
10143 ; GFX9-NEXT: ; use s[8:9]
10144 ; GFX9-NEXT: ;;#ASMEND
10145 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10146 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10147 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 1, i32 1>
10148 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10149 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10153 define void @s_shuffle_v3bf16_v4bf16__5_1_1() {
10154 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_1_1:
10156 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10157 ; GFX900-NEXT: ;;#ASMSTART
10158 ; GFX900-NEXT: ; def s[4:5]
10159 ; GFX900-NEXT: ;;#ASMEND
10160 ; GFX900-NEXT: ;;#ASMSTART
10161 ; GFX900-NEXT: ; def s[6:7]
10162 ; GFX900-NEXT: ;;#ASMEND
10163 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10164 ; GFX900-NEXT: s_lshr_b32 s4, s6, 16
10165 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10166 ; GFX900-NEXT: ;;#ASMSTART
10167 ; GFX900-NEXT: ; use s[8:9]
10168 ; GFX900-NEXT: ;;#ASMEND
10169 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10171 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_1_1:
10173 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10174 ; GFX90A-NEXT: ;;#ASMSTART
10175 ; GFX90A-NEXT: ; def s[4:5]
10176 ; GFX90A-NEXT: ;;#ASMEND
10177 ; GFX90A-NEXT: ;;#ASMSTART
10178 ; GFX90A-NEXT: ; def s[6:7]
10179 ; GFX90A-NEXT: ;;#ASMEND
10180 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10181 ; GFX90A-NEXT: s_lshr_b32 s4, s6, 16
10182 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10183 ; GFX90A-NEXT: ;;#ASMSTART
10184 ; GFX90A-NEXT: ; use s[8:9]
10185 ; GFX90A-NEXT: ;;#ASMEND
10186 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10188 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_1_1:
10190 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10191 ; GFX940-NEXT: ;;#ASMSTART
10192 ; GFX940-NEXT: ; def s[0:1]
10193 ; GFX940-NEXT: ;;#ASMEND
10194 ; GFX940-NEXT: ;;#ASMSTART
10195 ; GFX940-NEXT: ; def s[2:3]
10196 ; GFX940-NEXT: ;;#ASMEND
10197 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10198 ; GFX940-NEXT: s_lshr_b32 s0, s2, 16
10199 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
10200 ; GFX940-NEXT: ;;#ASMSTART
10201 ; GFX940-NEXT: ; use s[8:9]
10202 ; GFX940-NEXT: ;;#ASMEND
10203 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10204 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10205 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10206 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
10207 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10208 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10212 define void @s_shuffle_v3bf16_v4bf16__6_1_1() {
10213 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_1_1:
10215 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10216 ; GFX900-NEXT: ;;#ASMSTART
10217 ; GFX900-NEXT: ; def s[4:5]
10218 ; GFX900-NEXT: ;;#ASMEND
10219 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10220 ; GFX900-NEXT: ;;#ASMSTART
10221 ; GFX900-NEXT: ; def s[6:7]
10222 ; GFX900-NEXT: ;;#ASMEND
10223 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s9
10224 ; GFX900-NEXT: ;;#ASMSTART
10225 ; GFX900-NEXT: ; use s[8:9]
10226 ; GFX900-NEXT: ;;#ASMEND
10227 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10229 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_1_1:
10231 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10232 ; GFX90A-NEXT: ;;#ASMSTART
10233 ; GFX90A-NEXT: ; def s[4:5]
10234 ; GFX90A-NEXT: ;;#ASMEND
10235 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10236 ; GFX90A-NEXT: ;;#ASMSTART
10237 ; GFX90A-NEXT: ; def s[6:7]
10238 ; GFX90A-NEXT: ;;#ASMEND
10239 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s9
10240 ; GFX90A-NEXT: ;;#ASMSTART
10241 ; GFX90A-NEXT: ; use s[8:9]
10242 ; GFX90A-NEXT: ;;#ASMEND
10243 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10245 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_1_1:
10247 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10248 ; GFX940-NEXT: ;;#ASMSTART
10249 ; GFX940-NEXT: ; def s[0:1]
10250 ; GFX940-NEXT: ;;#ASMEND
10251 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10252 ; GFX940-NEXT: ;;#ASMSTART
10253 ; GFX940-NEXT: ; def s[2:3]
10254 ; GFX940-NEXT: ;;#ASMEND
10255 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9
10256 ; GFX940-NEXT: ;;#ASMSTART
10257 ; GFX940-NEXT: ; use s[8:9]
10258 ; GFX940-NEXT: ;;#ASMEND
10259 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10260 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10261 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10262 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
10263 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10264 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10268 define void @s_shuffle_v3bf16_v4bf16__7_1_1() {
10269 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_1:
10271 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10272 ; GFX900-NEXT: ;;#ASMSTART
10273 ; GFX900-NEXT: ; def s[4:5]
10274 ; GFX900-NEXT: ;;#ASMEND
10275 ; GFX900-NEXT: ;;#ASMSTART
10276 ; GFX900-NEXT: ; def s[6:7]
10277 ; GFX900-NEXT: ;;#ASMEND
10278 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10279 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
10280 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10281 ; GFX900-NEXT: ;;#ASMSTART
10282 ; GFX900-NEXT: ; use s[8:9]
10283 ; GFX900-NEXT: ;;#ASMEND
10284 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10286 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_1:
10288 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10289 ; GFX90A-NEXT: ;;#ASMSTART
10290 ; GFX90A-NEXT: ; def s[4:5]
10291 ; GFX90A-NEXT: ;;#ASMEND
10292 ; GFX90A-NEXT: ;;#ASMSTART
10293 ; GFX90A-NEXT: ; def s[6:7]
10294 ; GFX90A-NEXT: ;;#ASMEND
10295 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10296 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
10297 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10298 ; GFX90A-NEXT: ;;#ASMSTART
10299 ; GFX90A-NEXT: ; use s[8:9]
10300 ; GFX90A-NEXT: ;;#ASMEND
10301 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10303 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_1:
10305 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10306 ; GFX940-NEXT: ;;#ASMSTART
10307 ; GFX940-NEXT: ; def s[0:1]
10308 ; GFX940-NEXT: ;;#ASMEND
10309 ; GFX940-NEXT: ;;#ASMSTART
10310 ; GFX940-NEXT: ; def s[2:3]
10311 ; GFX940-NEXT: ;;#ASMEND
10312 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10313 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
10314 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
10315 ; GFX940-NEXT: ;;#ASMSTART
10316 ; GFX940-NEXT: ; use s[8:9]
10317 ; GFX940-NEXT: ;;#ASMEND
10318 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10319 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10320 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10321 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
10322 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10323 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10327 define void @s_shuffle_v3bf16_v4bf16__7_u_1() {
10328 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_1:
10330 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10331 ; GFX900-NEXT: ;;#ASMSTART
10332 ; GFX900-NEXT: ; def s[4:5]
10333 ; GFX900-NEXT: ;;#ASMEND
10334 ; GFX900-NEXT: ;;#ASMSTART
10335 ; GFX900-NEXT: ; def s[6:7]
10336 ; GFX900-NEXT: ;;#ASMEND
10337 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10338 ; GFX900-NEXT: s_lshr_b32 s8, s7, 16
10339 ; GFX900-NEXT: ;;#ASMSTART
10340 ; GFX900-NEXT: ; use s[8:9]
10341 ; GFX900-NEXT: ;;#ASMEND
10342 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10344 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_1:
10346 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10347 ; GFX90A-NEXT: ;;#ASMSTART
10348 ; GFX90A-NEXT: ; def s[4:5]
10349 ; GFX90A-NEXT: ;;#ASMEND
10350 ; GFX90A-NEXT: ;;#ASMSTART
10351 ; GFX90A-NEXT: ; def s[6:7]
10352 ; GFX90A-NEXT: ;;#ASMEND
10353 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10354 ; GFX90A-NEXT: s_lshr_b32 s8, s7, 16
10355 ; GFX90A-NEXT: ;;#ASMSTART
10356 ; GFX90A-NEXT: ; use s[8:9]
10357 ; GFX90A-NEXT: ;;#ASMEND
10358 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10360 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_1:
10362 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10363 ; GFX940-NEXT: ;;#ASMSTART
10364 ; GFX940-NEXT: ; def s[0:1]
10365 ; GFX940-NEXT: ;;#ASMEND
10366 ; GFX940-NEXT: ;;#ASMSTART
10367 ; GFX940-NEXT: ; def s[2:3]
10368 ; GFX940-NEXT: ;;#ASMEND
10369 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10370 ; GFX940-NEXT: s_lshr_b32 s8, s3, 16
10371 ; GFX940-NEXT: ;;#ASMSTART
10372 ; GFX940-NEXT: ; use s[8:9]
10373 ; GFX940-NEXT: ;;#ASMEND
10374 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10375 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10376 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10377 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
10378 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10379 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10383 define void @s_shuffle_v3bf16_v4bf16__7_0_1() {
10384 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_1:
10386 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10387 ; GFX900-NEXT: ;;#ASMSTART
10388 ; GFX900-NEXT: ; def s[4:5]
10389 ; GFX900-NEXT: ;;#ASMEND
10390 ; GFX900-NEXT: ;;#ASMSTART
10391 ; GFX900-NEXT: ; def s[6:7]
10392 ; GFX900-NEXT: ;;#ASMEND
10393 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
10394 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
10395 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10396 ; GFX900-NEXT: ;;#ASMSTART
10397 ; GFX900-NEXT: ; use s[8:9]
10398 ; GFX900-NEXT: ;;#ASMEND
10399 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10401 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_1:
10403 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10404 ; GFX90A-NEXT: ;;#ASMSTART
10405 ; GFX90A-NEXT: ; def s[4:5]
10406 ; GFX90A-NEXT: ;;#ASMEND
10407 ; GFX90A-NEXT: ;;#ASMSTART
10408 ; GFX90A-NEXT: ; def s[6:7]
10409 ; GFX90A-NEXT: ;;#ASMEND
10410 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
10411 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
10412 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10413 ; GFX90A-NEXT: ;;#ASMSTART
10414 ; GFX90A-NEXT: ; use s[8:9]
10415 ; GFX90A-NEXT: ;;#ASMEND
10416 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10418 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_1:
10420 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10421 ; GFX940-NEXT: ;;#ASMSTART
10422 ; GFX940-NEXT: ; def s[0:1]
10423 ; GFX940-NEXT: ;;#ASMEND
10424 ; GFX940-NEXT: ;;#ASMSTART
10425 ; GFX940-NEXT: ; def s[2:3]
10426 ; GFX940-NEXT: ;;#ASMEND
10427 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
10428 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
10429 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10430 ; GFX940-NEXT: ;;#ASMSTART
10431 ; GFX940-NEXT: ; use s[8:9]
10432 ; GFX940-NEXT: ;;#ASMEND
10433 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10434 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10435 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10436 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
10437 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10438 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10442 define void @s_shuffle_v3bf16_v4bf16__7_2_1() {
10443 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_1:
10445 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10446 ; GFX900-NEXT: ;;#ASMSTART
10447 ; GFX900-NEXT: ; def s[6:7]
10448 ; GFX900-NEXT: ;;#ASMEND
10449 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
10450 ; GFX900-NEXT: ;;#ASMSTART
10451 ; GFX900-NEXT: ; def s[4:5]
10452 ; GFX900-NEXT: ;;#ASMEND
10453 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
10454 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10455 ; GFX900-NEXT: ;;#ASMSTART
10456 ; GFX900-NEXT: ; use s[8:9]
10457 ; GFX900-NEXT: ;;#ASMEND
10458 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10460 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_1:
10462 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10463 ; GFX90A-NEXT: ;;#ASMSTART
10464 ; GFX90A-NEXT: ; def s[6:7]
10465 ; GFX90A-NEXT: ;;#ASMEND
10466 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
10467 ; GFX90A-NEXT: ;;#ASMSTART
10468 ; GFX90A-NEXT: ; def s[4:5]
10469 ; GFX90A-NEXT: ;;#ASMEND
10470 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
10471 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10472 ; GFX90A-NEXT: ;;#ASMSTART
10473 ; GFX90A-NEXT: ; use s[8:9]
10474 ; GFX90A-NEXT: ;;#ASMEND
10475 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10477 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_1:
10479 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10480 ; GFX940-NEXT: ;;#ASMSTART
10481 ; GFX940-NEXT: ; def s[2:3]
10482 ; GFX940-NEXT: ;;#ASMEND
10483 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
10484 ; GFX940-NEXT: ;;#ASMSTART
10485 ; GFX940-NEXT: ; def s[0:1]
10486 ; GFX940-NEXT: ;;#ASMEND
10487 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
10488 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10489 ; GFX940-NEXT: ;;#ASMSTART
10490 ; GFX940-NEXT: ; use s[8:9]
10491 ; GFX940-NEXT: ;;#ASMEND
10492 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10493 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10494 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10495 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
10496 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10497 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10501 define void @s_shuffle_v3bf16_v4bf16__7_3_1() {
10502 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_1:
10504 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10505 ; GFX900-NEXT: ;;#ASMSTART
10506 ; GFX900-NEXT: ; def s[4:5]
10507 ; GFX900-NEXT: ;;#ASMEND
10508 ; GFX900-NEXT: ;;#ASMSTART
10509 ; GFX900-NEXT: ; def s[6:7]
10510 ; GFX900-NEXT: ;;#ASMEND
10511 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
10512 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
10513 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
10514 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10515 ; GFX900-NEXT: ;;#ASMSTART
10516 ; GFX900-NEXT: ; use s[8:9]
10517 ; GFX900-NEXT: ;;#ASMEND
10518 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10520 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_1:
10522 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10523 ; GFX90A-NEXT: ;;#ASMSTART
10524 ; GFX90A-NEXT: ; def s[4:5]
10525 ; GFX90A-NEXT: ;;#ASMEND
10526 ; GFX90A-NEXT: ;;#ASMSTART
10527 ; GFX90A-NEXT: ; def s[6:7]
10528 ; GFX90A-NEXT: ;;#ASMEND
10529 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
10530 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
10531 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
10532 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10533 ; GFX90A-NEXT: ;;#ASMSTART
10534 ; GFX90A-NEXT: ; use s[8:9]
10535 ; GFX90A-NEXT: ;;#ASMEND
10536 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10538 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_1:
10540 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10541 ; GFX940-NEXT: ;;#ASMSTART
10542 ; GFX940-NEXT: ; def s[0:1]
10543 ; GFX940-NEXT: ;;#ASMEND
10544 ; GFX940-NEXT: ;;#ASMSTART
10545 ; GFX940-NEXT: ; def s[2:3]
10546 ; GFX940-NEXT: ;;#ASMEND
10547 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
10548 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
10549 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
10550 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10551 ; GFX940-NEXT: ;;#ASMSTART
10552 ; GFX940-NEXT: ; use s[8:9]
10553 ; GFX940-NEXT: ;;#ASMEND
10554 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10555 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10556 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10557 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
10558 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10559 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10563 define void @s_shuffle_v3bf16_v4bf16__7_4_1() {
10564 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_1:
10566 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10567 ; GFX900-NEXT: ;;#ASMSTART
10568 ; GFX900-NEXT: ; def s[4:5]
10569 ; GFX900-NEXT: ;;#ASMEND
10570 ; GFX900-NEXT: ;;#ASMSTART
10571 ; GFX900-NEXT: ; def s[6:7]
10572 ; GFX900-NEXT: ;;#ASMEND
10573 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
10574 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6
10575 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10576 ; GFX900-NEXT: ;;#ASMSTART
10577 ; GFX900-NEXT: ; use s[8:9]
10578 ; GFX900-NEXT: ;;#ASMEND
10579 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10581 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_1:
10583 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10584 ; GFX90A-NEXT: ;;#ASMSTART
10585 ; GFX90A-NEXT: ; def s[4:5]
10586 ; GFX90A-NEXT: ;;#ASMEND
10587 ; GFX90A-NEXT: ;;#ASMSTART
10588 ; GFX90A-NEXT: ; def s[6:7]
10589 ; GFX90A-NEXT: ;;#ASMEND
10590 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
10591 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6
10592 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10593 ; GFX90A-NEXT: ;;#ASMSTART
10594 ; GFX90A-NEXT: ; use s[8:9]
10595 ; GFX90A-NEXT: ;;#ASMEND
10596 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10598 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_1:
10600 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10601 ; GFX940-NEXT: ;;#ASMSTART
10602 ; GFX940-NEXT: ; def s[0:1]
10603 ; GFX940-NEXT: ;;#ASMEND
10604 ; GFX940-NEXT: ;;#ASMSTART
10605 ; GFX940-NEXT: ; def s[2:3]
10606 ; GFX940-NEXT: ;;#ASMEND
10607 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
10608 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2
10609 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10610 ; GFX940-NEXT: ;;#ASMSTART
10611 ; GFX940-NEXT: ; use s[8:9]
10612 ; GFX940-NEXT: ;;#ASMEND
10613 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10614 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10615 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10616 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
10617 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10618 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10622 define void @s_shuffle_v3bf16_v4bf16__7_5_1() {
10623 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_1:
10625 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10626 ; GFX900-NEXT: ;;#ASMSTART
10627 ; GFX900-NEXT: ; def s[4:5]
10628 ; GFX900-NEXT: ;;#ASMEND
10629 ; GFX900-NEXT: ;;#ASMSTART
10630 ; GFX900-NEXT: ; def s[6:7]
10631 ; GFX900-NEXT: ;;#ASMEND
10632 ; GFX900-NEXT: s_lshr_b32 s5, s6, 16
10633 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
10634 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
10635 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10636 ; GFX900-NEXT: ;;#ASMSTART
10637 ; GFX900-NEXT: ; use s[8:9]
10638 ; GFX900-NEXT: ;;#ASMEND
10639 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10641 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_1:
10643 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10644 ; GFX90A-NEXT: ;;#ASMSTART
10645 ; GFX90A-NEXT: ; def s[4:5]
10646 ; GFX90A-NEXT: ;;#ASMEND
10647 ; GFX90A-NEXT: ;;#ASMSTART
10648 ; GFX90A-NEXT: ; def s[6:7]
10649 ; GFX90A-NEXT: ;;#ASMEND
10650 ; GFX90A-NEXT: s_lshr_b32 s5, s6, 16
10651 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
10652 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
10653 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10654 ; GFX90A-NEXT: ;;#ASMSTART
10655 ; GFX90A-NEXT: ; use s[8:9]
10656 ; GFX90A-NEXT: ;;#ASMEND
10657 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10659 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_1:
10661 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10662 ; GFX940-NEXT: ;;#ASMSTART
10663 ; GFX940-NEXT: ; def s[0:1]
10664 ; GFX940-NEXT: ;;#ASMEND
10665 ; GFX940-NEXT: ;;#ASMSTART
10666 ; GFX940-NEXT: ; def s[2:3]
10667 ; GFX940-NEXT: ;;#ASMEND
10668 ; GFX940-NEXT: s_lshr_b32 s1, s2, 16
10669 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
10670 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
10671 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10672 ; GFX940-NEXT: ;;#ASMSTART
10673 ; GFX940-NEXT: ; use s[8:9]
10674 ; GFX940-NEXT: ;;#ASMEND
10675 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10676 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10677 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10678 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
10679 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10680 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10684 define void @s_shuffle_v3bf16_v4bf16__7_6_1() {
10685 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_1:
10687 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10688 ; GFX900-NEXT: ;;#ASMSTART
10689 ; GFX900-NEXT: ; def s[4:5]
10690 ; GFX900-NEXT: ;;#ASMEND
10691 ; GFX900-NEXT: ;;#ASMSTART
10692 ; GFX900-NEXT: ; def s[6:7]
10693 ; GFX900-NEXT: ;;#ASMEND
10694 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
10695 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7
10696 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
10697 ; GFX900-NEXT: ;;#ASMSTART
10698 ; GFX900-NEXT: ; use s[8:9]
10699 ; GFX900-NEXT: ;;#ASMEND
10700 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10702 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_1:
10704 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10705 ; GFX90A-NEXT: ;;#ASMSTART
10706 ; GFX90A-NEXT: ; def s[4:5]
10707 ; GFX90A-NEXT: ;;#ASMEND
10708 ; GFX90A-NEXT: ;;#ASMSTART
10709 ; GFX90A-NEXT: ; def s[6:7]
10710 ; GFX90A-NEXT: ;;#ASMEND
10711 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
10712 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7
10713 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
10714 ; GFX90A-NEXT: ;;#ASMSTART
10715 ; GFX90A-NEXT: ; use s[8:9]
10716 ; GFX90A-NEXT: ;;#ASMEND
10717 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10719 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_1:
10721 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10722 ; GFX940-NEXT: ;;#ASMSTART
10723 ; GFX940-NEXT: ; def s[0:1]
10724 ; GFX940-NEXT: ;;#ASMEND
10725 ; GFX940-NEXT: ;;#ASMSTART
10726 ; GFX940-NEXT: ; def s[2:3]
10727 ; GFX940-NEXT: ;;#ASMEND
10728 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
10729 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3
10730 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
10731 ; GFX940-NEXT: ;;#ASMSTART
10732 ; GFX940-NEXT: ; use s[8:9]
10733 ; GFX940-NEXT: ;;#ASMEND
10734 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10735 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10736 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10737 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
10738 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10739 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10743 define void @s_shuffle_v3bf16_v4bf16__u_2_2() {
10744 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__u_2_2:
10746 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10747 ; GFX9-NEXT: ;;#ASMSTART
10748 ; GFX9-NEXT: ; def s[8:9]
10749 ; GFX9-NEXT: ;;#ASMEND
10750 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
10751 ; GFX9-NEXT: ;;#ASMSTART
10752 ; GFX9-NEXT: ; use s[8:9]
10753 ; GFX9-NEXT: ;;#ASMEND
10754 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10755 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10756 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 2, i32 2>
10757 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10758 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10762 define void @s_shuffle_v3bf16_v4bf16__0_2_2() {
10763 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__0_2_2:
10765 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10766 ; GFX9-NEXT: ;;#ASMSTART
10767 ; GFX9-NEXT: ; def s[8:9]
10768 ; GFX9-NEXT: ;;#ASMEND
10769 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9
10770 ; GFX9-NEXT: ;;#ASMSTART
10771 ; GFX9-NEXT: ; use s[8:9]
10772 ; GFX9-NEXT: ;;#ASMEND
10773 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10774 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10775 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 2, i32 2>
10776 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10777 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10781 define void @s_shuffle_v3bf16_v4bf16__1_2_2() {
10782 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_2_2:
10784 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10785 ; GFX900-NEXT: ;;#ASMSTART
10786 ; GFX900-NEXT: ; def s[8:9]
10787 ; GFX900-NEXT: ;;#ASMEND
10788 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
10789 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10790 ; GFX900-NEXT: ;;#ASMSTART
10791 ; GFX900-NEXT: ; use s[8:9]
10792 ; GFX900-NEXT: ;;#ASMEND
10793 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10795 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_2_2:
10797 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10798 ; GFX90A-NEXT: ;;#ASMSTART
10799 ; GFX90A-NEXT: ; def s[8:9]
10800 ; GFX90A-NEXT: ;;#ASMEND
10801 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
10802 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10803 ; GFX90A-NEXT: ;;#ASMSTART
10804 ; GFX90A-NEXT: ; use s[8:9]
10805 ; GFX90A-NEXT: ;;#ASMEND
10806 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10808 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_2_2:
10810 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10811 ; GFX940-NEXT: ;;#ASMSTART
10812 ; GFX940-NEXT: ; def s[8:9]
10813 ; GFX940-NEXT: ;;#ASMEND
10814 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
10815 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
10816 ; GFX940-NEXT: ;;#ASMSTART
10817 ; GFX940-NEXT: ; use s[8:9]
10818 ; GFX940-NEXT: ;;#ASMEND
10819 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10820 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10821 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 2>
10822 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10823 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10827 define void @s_shuffle_v3bf16_v4bf16__2_2_2() {
10828 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__2_2_2:
10830 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10831 ; GFX9-NEXT: ;;#ASMSTART
10832 ; GFX9-NEXT: ; def s[8:9]
10833 ; GFX9-NEXT: ;;#ASMEND
10834 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9
10835 ; GFX9-NEXT: ;;#ASMSTART
10836 ; GFX9-NEXT: ; use s[8:9]
10837 ; GFX9-NEXT: ;;#ASMEND
10838 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10839 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10840 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
10841 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10842 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10846 define void @s_shuffle_v3bf16_v4bf16__3_2_2() {
10847 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_2_2:
10849 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10850 ; GFX900-NEXT: ;;#ASMSTART
10851 ; GFX900-NEXT: ; def s[8:9]
10852 ; GFX900-NEXT: ;;#ASMEND
10853 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
10854 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10855 ; GFX900-NEXT: ;;#ASMSTART
10856 ; GFX900-NEXT: ; use s[8:9]
10857 ; GFX900-NEXT: ;;#ASMEND
10858 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10860 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_2_2:
10862 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10863 ; GFX90A-NEXT: ;;#ASMSTART
10864 ; GFX90A-NEXT: ; def s[8:9]
10865 ; GFX90A-NEXT: ;;#ASMEND
10866 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
10867 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10868 ; GFX90A-NEXT: ;;#ASMSTART
10869 ; GFX90A-NEXT: ; use s[8:9]
10870 ; GFX90A-NEXT: ;;#ASMEND
10871 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10873 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_2_2:
10875 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10876 ; GFX940-NEXT: ;;#ASMSTART
10877 ; GFX940-NEXT: ; def s[8:9]
10878 ; GFX940-NEXT: ;;#ASMEND
10879 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
10880 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
10881 ; GFX940-NEXT: ;;#ASMSTART
10882 ; GFX940-NEXT: ; use s[8:9]
10883 ; GFX940-NEXT: ;;#ASMEND
10884 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10885 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10886 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 2, i32 2>
10887 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10888 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10892 define void @s_shuffle_v3bf16_v4bf16__4_2_2() {
10893 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__4_2_2:
10895 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10896 ; GFX9-NEXT: ;;#ASMSTART
10897 ; GFX9-NEXT: ; def s[8:9]
10898 ; GFX9-NEXT: ;;#ASMEND
10899 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
10900 ; GFX9-NEXT: ;;#ASMSTART
10901 ; GFX9-NEXT: ; use s[8:9]
10902 ; GFX9-NEXT: ;;#ASMEND
10903 ; GFX9-NEXT: s_setpc_b64 s[30:31]
10904 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10905 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 2, i32 2>
10906 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10907 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10911 define void @s_shuffle_v3bf16_v4bf16__5_2_2() {
10912 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_2_2:
10914 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10915 ; GFX900-NEXT: ;;#ASMSTART
10916 ; GFX900-NEXT: ; def s[4:5]
10917 ; GFX900-NEXT: ;;#ASMEND
10918 ; GFX900-NEXT: ;;#ASMSTART
10919 ; GFX900-NEXT: ; def s[8:9]
10920 ; GFX900-NEXT: ;;#ASMEND
10921 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
10922 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10923 ; GFX900-NEXT: ;;#ASMSTART
10924 ; GFX900-NEXT: ; use s[8:9]
10925 ; GFX900-NEXT: ;;#ASMEND
10926 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10928 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_2_2:
10930 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10931 ; GFX90A-NEXT: ;;#ASMSTART
10932 ; GFX90A-NEXT: ; def s[4:5]
10933 ; GFX90A-NEXT: ;;#ASMEND
10934 ; GFX90A-NEXT: ;;#ASMSTART
10935 ; GFX90A-NEXT: ; def s[8:9]
10936 ; GFX90A-NEXT: ;;#ASMEND
10937 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
10938 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
10939 ; GFX90A-NEXT: ;;#ASMSTART
10940 ; GFX90A-NEXT: ; use s[8:9]
10941 ; GFX90A-NEXT: ;;#ASMEND
10942 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10944 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_2_2:
10946 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10947 ; GFX940-NEXT: ;;#ASMSTART
10948 ; GFX940-NEXT: ; def s[0:1]
10949 ; GFX940-NEXT: ;;#ASMEND
10950 ; GFX940-NEXT: ;;#ASMSTART
10951 ; GFX940-NEXT: ; def s[8:9]
10952 ; GFX940-NEXT: ;;#ASMEND
10953 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
10954 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
10955 ; GFX940-NEXT: ;;#ASMSTART
10956 ; GFX940-NEXT: ; use s[8:9]
10957 ; GFX940-NEXT: ;;#ASMEND
10958 ; GFX940-NEXT: s_setpc_b64 s[30:31]
10959 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
10960 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
10961 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
10962 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10963 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
10967 define void @s_shuffle_v3bf16_v4bf16__6_2_2() {
10968 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_2_2:
10970 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10971 ; GFX900-NEXT: ;;#ASMSTART
10972 ; GFX900-NEXT: ; def s[8:9]
10973 ; GFX900-NEXT: ;;#ASMEND
10974 ; GFX900-NEXT: ;;#ASMSTART
10975 ; GFX900-NEXT: ; def s[4:5]
10976 ; GFX900-NEXT: ;;#ASMEND
10977 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
10978 ; GFX900-NEXT: ;;#ASMSTART
10979 ; GFX900-NEXT: ; use s[8:9]
10980 ; GFX900-NEXT: ;;#ASMEND
10981 ; GFX900-NEXT: s_setpc_b64 s[30:31]
10983 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_2_2:
10985 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10986 ; GFX90A-NEXT: ;;#ASMSTART
10987 ; GFX90A-NEXT: ; def s[8:9]
10988 ; GFX90A-NEXT: ;;#ASMEND
10989 ; GFX90A-NEXT: ;;#ASMSTART
10990 ; GFX90A-NEXT: ; def s[4:5]
10991 ; GFX90A-NEXT: ;;#ASMEND
10992 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
10993 ; GFX90A-NEXT: ;;#ASMSTART
10994 ; GFX90A-NEXT: ; use s[8:9]
10995 ; GFX90A-NEXT: ;;#ASMEND
10996 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
10998 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_2_2:
11000 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11001 ; GFX940-NEXT: ;;#ASMSTART
11002 ; GFX940-NEXT: ; def s[8:9]
11003 ; GFX940-NEXT: ;;#ASMEND
11004 ; GFX940-NEXT: ;;#ASMSTART
11005 ; GFX940-NEXT: ; def s[0:1]
11006 ; GFX940-NEXT: ;;#ASMEND
11007 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
11008 ; GFX940-NEXT: ;;#ASMSTART
11009 ; GFX940-NEXT: ; use s[8:9]
11010 ; GFX940-NEXT: ;;#ASMEND
11011 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11012 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11013 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11014 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
11015 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11016 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11020 define void @s_shuffle_v3bf16_v4bf16__7_2_2() {
11021 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_2:
11023 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11024 ; GFX900-NEXT: ;;#ASMSTART
11025 ; GFX900-NEXT: ; def s[4:5]
11026 ; GFX900-NEXT: ;;#ASMEND
11027 ; GFX900-NEXT: ;;#ASMSTART
11028 ; GFX900-NEXT: ; def s[8:9]
11029 ; GFX900-NEXT: ;;#ASMEND
11030 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
11031 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11032 ; GFX900-NEXT: ;;#ASMSTART
11033 ; GFX900-NEXT: ; use s[8:9]
11034 ; GFX900-NEXT: ;;#ASMEND
11035 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11037 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_2:
11039 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11040 ; GFX90A-NEXT: ;;#ASMSTART
11041 ; GFX90A-NEXT: ; def s[4:5]
11042 ; GFX90A-NEXT: ;;#ASMEND
11043 ; GFX90A-NEXT: ;;#ASMSTART
11044 ; GFX90A-NEXT: ; def s[8:9]
11045 ; GFX90A-NEXT: ;;#ASMEND
11046 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
11047 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11048 ; GFX90A-NEXT: ;;#ASMSTART
11049 ; GFX90A-NEXT: ; use s[8:9]
11050 ; GFX90A-NEXT: ;;#ASMEND
11051 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11053 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_2:
11055 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11056 ; GFX940-NEXT: ;;#ASMSTART
11057 ; GFX940-NEXT: ; def s[0:1]
11058 ; GFX940-NEXT: ;;#ASMEND
11059 ; GFX940-NEXT: ;;#ASMSTART
11060 ; GFX940-NEXT: ; def s[8:9]
11061 ; GFX940-NEXT: ;;#ASMEND
11062 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
11063 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
11064 ; GFX940-NEXT: ;;#ASMSTART
11065 ; GFX940-NEXT: ; use s[8:9]
11066 ; GFX940-NEXT: ;;#ASMEND
11067 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11068 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11069 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11070 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
11071 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11072 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11076 define void @s_shuffle_v3bf16_v4bf16__7_u_2() {
11077 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_2:
11079 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11080 ; GFX900-NEXT: ;;#ASMSTART
11081 ; GFX900-NEXT: ; def s[8:9]
11082 ; GFX900-NEXT: ;;#ASMEND
11083 ; GFX900-NEXT: ;;#ASMSTART
11084 ; GFX900-NEXT: ; def s[4:5]
11085 ; GFX900-NEXT: ;;#ASMEND
11086 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
11087 ; GFX900-NEXT: ;;#ASMSTART
11088 ; GFX900-NEXT: ; use s[8:9]
11089 ; GFX900-NEXT: ;;#ASMEND
11090 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11092 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_2:
11094 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11095 ; GFX90A-NEXT: ;;#ASMSTART
11096 ; GFX90A-NEXT: ; def s[8:9]
11097 ; GFX90A-NEXT: ;;#ASMEND
11098 ; GFX90A-NEXT: ;;#ASMSTART
11099 ; GFX90A-NEXT: ; def s[4:5]
11100 ; GFX90A-NEXT: ;;#ASMEND
11101 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
11102 ; GFX90A-NEXT: ;;#ASMSTART
11103 ; GFX90A-NEXT: ; use s[8:9]
11104 ; GFX90A-NEXT: ;;#ASMEND
11105 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11107 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_2:
11109 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11110 ; GFX940-NEXT: ;;#ASMSTART
11111 ; GFX940-NEXT: ; def s[8:9]
11112 ; GFX940-NEXT: ;;#ASMEND
11113 ; GFX940-NEXT: ;;#ASMSTART
11114 ; GFX940-NEXT: ; def s[0:1]
11115 ; GFX940-NEXT: ;;#ASMEND
11116 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
11117 ; GFX940-NEXT: ;;#ASMSTART
11118 ; GFX940-NEXT: ; use s[8:9]
11119 ; GFX940-NEXT: ;;#ASMEND
11120 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11121 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11122 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11123 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
11124 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11125 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11129 define void @s_shuffle_v3bf16_v4bf16__7_0_2() {
11130 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_2:
11132 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11133 ; GFX900-NEXT: ;;#ASMSTART
11134 ; GFX900-NEXT: ; def s[4:5]
11135 ; GFX900-NEXT: ;;#ASMEND
11136 ; GFX900-NEXT: ;;#ASMSTART
11137 ; GFX900-NEXT: ; def s[8:9]
11138 ; GFX900-NEXT: ;;#ASMEND
11139 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
11140 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s8
11141 ; GFX900-NEXT: ;;#ASMSTART
11142 ; GFX900-NEXT: ; use s[8:9]
11143 ; GFX900-NEXT: ;;#ASMEND
11144 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11146 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_2:
11148 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11149 ; GFX90A-NEXT: ;;#ASMSTART
11150 ; GFX90A-NEXT: ; def s[4:5]
11151 ; GFX90A-NEXT: ;;#ASMEND
11152 ; GFX90A-NEXT: ;;#ASMSTART
11153 ; GFX90A-NEXT: ; def s[8:9]
11154 ; GFX90A-NEXT: ;;#ASMEND
11155 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
11156 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s8
11157 ; GFX90A-NEXT: ;;#ASMSTART
11158 ; GFX90A-NEXT: ; use s[8:9]
11159 ; GFX90A-NEXT: ;;#ASMEND
11160 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11162 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_2:
11164 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11165 ; GFX940-NEXT: ;;#ASMSTART
11166 ; GFX940-NEXT: ; def s[0:1]
11167 ; GFX940-NEXT: ;;#ASMEND
11168 ; GFX940-NEXT: ;;#ASMSTART
11169 ; GFX940-NEXT: ; def s[8:9]
11170 ; GFX940-NEXT: ;;#ASMEND
11171 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
11172 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8
11173 ; GFX940-NEXT: ;;#ASMSTART
11174 ; GFX940-NEXT: ; use s[8:9]
11175 ; GFX940-NEXT: ;;#ASMEND
11176 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11177 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11178 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11179 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
11180 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11181 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11185 define void @s_shuffle_v3bf16_v4bf16__7_1_2() {
11186 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_2:
11188 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11189 ; GFX900-NEXT: ;;#ASMSTART
11190 ; GFX900-NEXT: ; def s[4:5]
11191 ; GFX900-NEXT: ;;#ASMEND
11192 ; GFX900-NEXT: ;;#ASMSTART
11193 ; GFX900-NEXT: ; def s[8:9]
11194 ; GFX900-NEXT: ;;#ASMEND
11195 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
11196 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
11197 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11198 ; GFX900-NEXT: ;;#ASMSTART
11199 ; GFX900-NEXT: ; use s[8:9]
11200 ; GFX900-NEXT: ;;#ASMEND
11201 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11203 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_2:
11205 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11206 ; GFX90A-NEXT: ;;#ASMSTART
11207 ; GFX90A-NEXT: ; def s[4:5]
11208 ; GFX90A-NEXT: ;;#ASMEND
11209 ; GFX90A-NEXT: ;;#ASMSTART
11210 ; GFX90A-NEXT: ; def s[8:9]
11211 ; GFX90A-NEXT: ;;#ASMEND
11212 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
11213 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
11214 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11215 ; GFX90A-NEXT: ;;#ASMSTART
11216 ; GFX90A-NEXT: ; use s[8:9]
11217 ; GFX90A-NEXT: ;;#ASMEND
11218 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11220 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_2:
11222 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11223 ; GFX940-NEXT: ;;#ASMSTART
11224 ; GFX940-NEXT: ; def s[0:1]
11225 ; GFX940-NEXT: ;;#ASMEND
11226 ; GFX940-NEXT: ;;#ASMSTART
11227 ; GFX940-NEXT: ; def s[8:9]
11228 ; GFX940-NEXT: ;;#ASMEND
11229 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
11230 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
11231 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
11232 ; GFX940-NEXT: ;;#ASMSTART
11233 ; GFX940-NEXT: ; use s[8:9]
11234 ; GFX940-NEXT: ;;#ASMEND
11235 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11236 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11237 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11238 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
11239 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11240 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11244 define void @s_shuffle_v3bf16_v4bf16__7_3_2() {
11245 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_2:
11247 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11248 ; GFX900-NEXT: ;;#ASMSTART
11249 ; GFX900-NEXT: ; def s[4:5]
11250 ; GFX900-NEXT: ;;#ASMEND
11251 ; GFX900-NEXT: ;;#ASMSTART
11252 ; GFX900-NEXT: ; def s[8:9]
11253 ; GFX900-NEXT: ;;#ASMEND
11254 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
11255 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
11256 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11257 ; GFX900-NEXT: ;;#ASMSTART
11258 ; GFX900-NEXT: ; use s[8:9]
11259 ; GFX900-NEXT: ;;#ASMEND
11260 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11262 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_2:
11264 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11265 ; GFX90A-NEXT: ;;#ASMSTART
11266 ; GFX90A-NEXT: ; def s[4:5]
11267 ; GFX90A-NEXT: ;;#ASMEND
11268 ; GFX90A-NEXT: ;;#ASMSTART
11269 ; GFX90A-NEXT: ; def s[8:9]
11270 ; GFX90A-NEXT: ;;#ASMEND
11271 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
11272 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
11273 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11274 ; GFX90A-NEXT: ;;#ASMSTART
11275 ; GFX90A-NEXT: ; use s[8:9]
11276 ; GFX90A-NEXT: ;;#ASMEND
11277 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11279 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_2:
11281 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11282 ; GFX940-NEXT: ;;#ASMSTART
11283 ; GFX940-NEXT: ; def s[0:1]
11284 ; GFX940-NEXT: ;;#ASMEND
11285 ; GFX940-NEXT: ;;#ASMSTART
11286 ; GFX940-NEXT: ; def s[8:9]
11287 ; GFX940-NEXT: ;;#ASMEND
11288 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
11289 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
11290 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
11291 ; GFX940-NEXT: ;;#ASMSTART
11292 ; GFX940-NEXT: ; use s[8:9]
11293 ; GFX940-NEXT: ;;#ASMEND
11294 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11295 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11296 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11297 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
11298 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11299 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11303 define void @s_shuffle_v3bf16_v4bf16__7_4_2() {
11304 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_2:
11306 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11307 ; GFX900-NEXT: ;;#ASMSTART
11308 ; GFX900-NEXT: ; def s[4:5]
11309 ; GFX900-NEXT: ;;#ASMEND
11310 ; GFX900-NEXT: ;;#ASMSTART
11311 ; GFX900-NEXT: ; def s[8:9]
11312 ; GFX900-NEXT: ;;#ASMEND
11313 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
11314 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11315 ; GFX900-NEXT: ;;#ASMSTART
11316 ; GFX900-NEXT: ; use s[8:9]
11317 ; GFX900-NEXT: ;;#ASMEND
11318 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11320 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_2:
11322 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11323 ; GFX90A-NEXT: ;;#ASMSTART
11324 ; GFX90A-NEXT: ; def s[4:5]
11325 ; GFX90A-NEXT: ;;#ASMEND
11326 ; GFX90A-NEXT: ;;#ASMSTART
11327 ; GFX90A-NEXT: ; def s[8:9]
11328 ; GFX90A-NEXT: ;;#ASMEND
11329 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
11330 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11331 ; GFX90A-NEXT: ;;#ASMSTART
11332 ; GFX90A-NEXT: ; use s[8:9]
11333 ; GFX90A-NEXT: ;;#ASMEND
11334 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11336 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_2:
11338 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11339 ; GFX940-NEXT: ;;#ASMSTART
11340 ; GFX940-NEXT: ; def s[0:1]
11341 ; GFX940-NEXT: ;;#ASMEND
11342 ; GFX940-NEXT: ;;#ASMSTART
11343 ; GFX940-NEXT: ; def s[8:9]
11344 ; GFX940-NEXT: ;;#ASMEND
11345 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
11346 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
11347 ; GFX940-NEXT: ;;#ASMSTART
11348 ; GFX940-NEXT: ; use s[8:9]
11349 ; GFX940-NEXT: ;;#ASMEND
11350 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11351 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11352 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11353 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
11354 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11355 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11359 define void @s_shuffle_v3bf16_v4bf16__7_5_2() {
11360 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_2:
11362 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11363 ; GFX900-NEXT: ;;#ASMSTART
11364 ; GFX900-NEXT: ; def s[4:5]
11365 ; GFX900-NEXT: ;;#ASMEND
11366 ; GFX900-NEXT: ;;#ASMSTART
11367 ; GFX900-NEXT: ; def s[8:9]
11368 ; GFX900-NEXT: ;;#ASMEND
11369 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
11370 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
11371 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11372 ; GFX900-NEXT: ;;#ASMSTART
11373 ; GFX900-NEXT: ; use s[8:9]
11374 ; GFX900-NEXT: ;;#ASMEND
11375 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11377 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_2:
11379 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11380 ; GFX90A-NEXT: ;;#ASMSTART
11381 ; GFX90A-NEXT: ; def s[4:5]
11382 ; GFX90A-NEXT: ;;#ASMEND
11383 ; GFX90A-NEXT: ;;#ASMSTART
11384 ; GFX90A-NEXT: ; def s[8:9]
11385 ; GFX90A-NEXT: ;;#ASMEND
11386 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
11387 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
11388 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
11389 ; GFX90A-NEXT: ;;#ASMSTART
11390 ; GFX90A-NEXT: ; use s[8:9]
11391 ; GFX90A-NEXT: ;;#ASMEND
11392 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11394 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_2:
11396 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11397 ; GFX940-NEXT: ;;#ASMSTART
11398 ; GFX940-NEXT: ; def s[0:1]
11399 ; GFX940-NEXT: ;;#ASMEND
11400 ; GFX940-NEXT: ;;#ASMSTART
11401 ; GFX940-NEXT: ; def s[8:9]
11402 ; GFX940-NEXT: ;;#ASMEND
11403 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
11404 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
11405 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
11406 ; GFX940-NEXT: ;;#ASMSTART
11407 ; GFX940-NEXT: ; use s[8:9]
11408 ; GFX940-NEXT: ;;#ASMEND
11409 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11410 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11411 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11412 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
11413 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11414 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11418 define void @s_shuffle_v3bf16_v4bf16__7_6_2() {
11419 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_2:
11421 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11422 ; GFX900-NEXT: ;;#ASMSTART
11423 ; GFX900-NEXT: ; def s[4:5]
11424 ; GFX900-NEXT: ;;#ASMEND
11425 ; GFX900-NEXT: ;;#ASMSTART
11426 ; GFX900-NEXT: ; def s[8:9]
11427 ; GFX900-NEXT: ;;#ASMEND
11428 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
11429 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
11430 ; GFX900-NEXT: ;;#ASMSTART
11431 ; GFX900-NEXT: ; use s[8:9]
11432 ; GFX900-NEXT: ;;#ASMEND
11433 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11435 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_2:
11437 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11438 ; GFX90A-NEXT: ;;#ASMSTART
11439 ; GFX90A-NEXT: ; def s[4:5]
11440 ; GFX90A-NEXT: ;;#ASMEND
11441 ; GFX90A-NEXT: ;;#ASMSTART
11442 ; GFX90A-NEXT: ; def s[8:9]
11443 ; GFX90A-NEXT: ;;#ASMEND
11444 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
11445 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
11446 ; GFX90A-NEXT: ;;#ASMSTART
11447 ; GFX90A-NEXT: ; use s[8:9]
11448 ; GFX90A-NEXT: ;;#ASMEND
11449 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11451 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_2:
11453 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11454 ; GFX940-NEXT: ;;#ASMSTART
11455 ; GFX940-NEXT: ; def s[0:1]
11456 ; GFX940-NEXT: ;;#ASMEND
11457 ; GFX940-NEXT: ;;#ASMSTART
11458 ; GFX940-NEXT: ; def s[8:9]
11459 ; GFX940-NEXT: ;;#ASMEND
11460 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
11461 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
11462 ; GFX940-NEXT: ;;#ASMSTART
11463 ; GFX940-NEXT: ; use s[8:9]
11464 ; GFX940-NEXT: ;;#ASMEND
11465 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11466 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11467 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11468 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
11469 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11470 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11474 define void @s_shuffle_v3bf16_v4bf16__u_3_3() {
11475 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__u_3_3:
11477 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11478 ; GFX900-NEXT: ;;#ASMSTART
11479 ; GFX900-NEXT: ; def s[4:5]
11480 ; GFX900-NEXT: ;;#ASMEND
11481 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11482 ; GFX900-NEXT: s_mov_b32 s8, s5
11483 ; GFX900-NEXT: ;;#ASMSTART
11484 ; GFX900-NEXT: ; use s[8:9]
11485 ; GFX900-NEXT: ;;#ASMEND
11486 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11488 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__u_3_3:
11490 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11491 ; GFX90A-NEXT: ;;#ASMSTART
11492 ; GFX90A-NEXT: ; def s[4:5]
11493 ; GFX90A-NEXT: ;;#ASMEND
11494 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11495 ; GFX90A-NEXT: s_mov_b32 s8, s5
11496 ; GFX90A-NEXT: ;;#ASMSTART
11497 ; GFX90A-NEXT: ; use s[8:9]
11498 ; GFX90A-NEXT: ;;#ASMEND
11499 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11501 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__u_3_3:
11503 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11504 ; GFX940-NEXT: ;;#ASMSTART
11505 ; GFX940-NEXT: ; def s[0:1]
11506 ; GFX940-NEXT: ;;#ASMEND
11507 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11508 ; GFX940-NEXT: s_mov_b32 s8, s1
11509 ; GFX940-NEXT: ;;#ASMSTART
11510 ; GFX940-NEXT: ; use s[8:9]
11511 ; GFX940-NEXT: ;;#ASMEND
11512 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11513 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11514 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 3, i32 3>
11515 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11516 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11520 define void @s_shuffle_v3bf16_v4bf16__0_3_3() {
11521 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__0_3_3:
11523 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11524 ; GFX900-NEXT: ;;#ASMSTART
11525 ; GFX900-NEXT: ; def s[4:5]
11526 ; GFX900-NEXT: ;;#ASMEND
11527 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11528 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11529 ; GFX900-NEXT: ;;#ASMSTART
11530 ; GFX900-NEXT: ; use s[8:9]
11531 ; GFX900-NEXT: ;;#ASMEND
11532 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11534 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__0_3_3:
11536 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11537 ; GFX90A-NEXT: ;;#ASMSTART
11538 ; GFX90A-NEXT: ; def s[4:5]
11539 ; GFX90A-NEXT: ;;#ASMEND
11540 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11541 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11542 ; GFX90A-NEXT: ;;#ASMSTART
11543 ; GFX90A-NEXT: ; use s[8:9]
11544 ; GFX90A-NEXT: ;;#ASMEND
11545 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11547 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_3_3:
11549 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11550 ; GFX940-NEXT: ;;#ASMSTART
11551 ; GFX940-NEXT: ; def s[0:1]
11552 ; GFX940-NEXT: ;;#ASMEND
11553 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11554 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
11555 ; GFX940-NEXT: ;;#ASMSTART
11556 ; GFX940-NEXT: ; use s[8:9]
11557 ; GFX940-NEXT: ;;#ASMEND
11558 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11559 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11560 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 3, i32 3>
11561 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11562 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11566 define void @s_shuffle_v3bf16_v4bf16__1_3_3() {
11567 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_3_3:
11569 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11570 ; GFX900-NEXT: ;;#ASMSTART
11571 ; GFX900-NEXT: ; def s[4:5]
11572 ; GFX900-NEXT: ;;#ASMEND
11573 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11574 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
11575 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11576 ; GFX900-NEXT: ;;#ASMSTART
11577 ; GFX900-NEXT: ; use s[8:9]
11578 ; GFX900-NEXT: ;;#ASMEND
11579 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11581 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_3_3:
11583 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11584 ; GFX90A-NEXT: ;;#ASMSTART
11585 ; GFX90A-NEXT: ; def s[4:5]
11586 ; GFX90A-NEXT: ;;#ASMEND
11587 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11588 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
11589 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11590 ; GFX90A-NEXT: ;;#ASMSTART
11591 ; GFX90A-NEXT: ; use s[8:9]
11592 ; GFX90A-NEXT: ;;#ASMEND
11593 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11595 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_3_3:
11597 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11598 ; GFX940-NEXT: ;;#ASMSTART
11599 ; GFX940-NEXT: ; def s[0:1]
11600 ; GFX940-NEXT: ;;#ASMEND
11601 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11602 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
11603 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
11604 ; GFX940-NEXT: ;;#ASMSTART
11605 ; GFX940-NEXT: ; use s[8:9]
11606 ; GFX940-NEXT: ;;#ASMEND
11607 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11608 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11609 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 3, i32 3>
11610 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11611 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11615 define void @s_shuffle_v3bf16_v4bf16__2_3_3() {
11616 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_3_3:
11618 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11619 ; GFX900-NEXT: ;;#ASMSTART
11620 ; GFX900-NEXT: ; def s[4:5]
11621 ; GFX900-NEXT: ;;#ASMEND
11622 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11623 ; GFX900-NEXT: s_mov_b32 s8, s5
11624 ; GFX900-NEXT: ;;#ASMSTART
11625 ; GFX900-NEXT: ; use s[8:9]
11626 ; GFX900-NEXT: ;;#ASMEND
11627 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11629 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_3_3:
11631 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11632 ; GFX90A-NEXT: ;;#ASMSTART
11633 ; GFX90A-NEXT: ; def s[4:5]
11634 ; GFX90A-NEXT: ;;#ASMEND
11635 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11636 ; GFX90A-NEXT: s_mov_b32 s8, s5
11637 ; GFX90A-NEXT: ;;#ASMSTART
11638 ; GFX90A-NEXT: ; use s[8:9]
11639 ; GFX90A-NEXT: ;;#ASMEND
11640 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11642 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_3_3:
11644 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11645 ; GFX940-NEXT: ;;#ASMSTART
11646 ; GFX940-NEXT: ; def s[0:1]
11647 ; GFX940-NEXT: ;;#ASMEND
11648 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11649 ; GFX940-NEXT: s_mov_b32 s8, s1
11650 ; GFX940-NEXT: ;;#ASMSTART
11651 ; GFX940-NEXT: ; use s[8:9]
11652 ; GFX940-NEXT: ;;#ASMEND
11653 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11654 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11655 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 3, i32 3>
11656 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11657 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11661 define void @s_shuffle_v3bf16_v4bf16__3_3_3() {
11662 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_3_3:
11664 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11665 ; GFX900-NEXT: ;;#ASMSTART
11666 ; GFX900-NEXT: ; def s[4:5]
11667 ; GFX900-NEXT: ;;#ASMEND
11668 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11669 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
11670 ; GFX900-NEXT: ;;#ASMSTART
11671 ; GFX900-NEXT: ; use s[8:9]
11672 ; GFX900-NEXT: ;;#ASMEND
11673 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11675 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_3_3:
11677 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11678 ; GFX90A-NEXT: ;;#ASMSTART
11679 ; GFX90A-NEXT: ; def s[4:5]
11680 ; GFX90A-NEXT: ;;#ASMEND
11681 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11682 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
11683 ; GFX90A-NEXT: ;;#ASMSTART
11684 ; GFX90A-NEXT: ; use s[8:9]
11685 ; GFX90A-NEXT: ;;#ASMEND
11686 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11688 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_3_3:
11690 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11691 ; GFX940-NEXT: ;;#ASMSTART
11692 ; GFX940-NEXT: ; def s[0:1]
11693 ; GFX940-NEXT: ;;#ASMEND
11694 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11695 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
11696 ; GFX940-NEXT: ;;#ASMSTART
11697 ; GFX940-NEXT: ; use s[8:9]
11698 ; GFX940-NEXT: ;;#ASMEND
11699 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11700 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11701 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 3, i32 3>
11702 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11703 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11707 define void @s_shuffle_v3bf16_v4bf16__4_3_3() {
11708 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__4_3_3:
11710 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11711 ; GFX900-NEXT: ;;#ASMSTART
11712 ; GFX900-NEXT: ; def s[4:5]
11713 ; GFX900-NEXT: ;;#ASMEND
11714 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11715 ; GFX900-NEXT: s_mov_b32 s8, s5
11716 ; GFX900-NEXT: ;;#ASMSTART
11717 ; GFX900-NEXT: ; use s[8:9]
11718 ; GFX900-NEXT: ;;#ASMEND
11719 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11721 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__4_3_3:
11723 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11724 ; GFX90A-NEXT: ;;#ASMSTART
11725 ; GFX90A-NEXT: ; def s[4:5]
11726 ; GFX90A-NEXT: ;;#ASMEND
11727 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11728 ; GFX90A-NEXT: s_mov_b32 s8, s5
11729 ; GFX90A-NEXT: ;;#ASMSTART
11730 ; GFX90A-NEXT: ; use s[8:9]
11731 ; GFX90A-NEXT: ;;#ASMEND
11732 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11734 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__4_3_3:
11736 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11737 ; GFX940-NEXT: ;;#ASMSTART
11738 ; GFX940-NEXT: ; def s[0:1]
11739 ; GFX940-NEXT: ;;#ASMEND
11740 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11741 ; GFX940-NEXT: s_mov_b32 s8, s1
11742 ; GFX940-NEXT: ;;#ASMSTART
11743 ; GFX940-NEXT: ; use s[8:9]
11744 ; GFX940-NEXT: ;;#ASMEND
11745 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11746 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11747 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 3, i32 3>
11748 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11749 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11753 define void @s_shuffle_v3bf16_v4bf16__5_3_3() {
11754 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_3_3:
11756 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11757 ; GFX900-NEXT: ;;#ASMSTART
11758 ; GFX900-NEXT: ; def s[4:5]
11759 ; GFX900-NEXT: ;;#ASMEND
11760 ; GFX900-NEXT: ;;#ASMSTART
11761 ; GFX900-NEXT: ; def s[6:7]
11762 ; GFX900-NEXT: ;;#ASMEND
11763 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11764 ; GFX900-NEXT: s_lshr_b32 s4, s6, 16
11765 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11766 ; GFX900-NEXT: ;;#ASMSTART
11767 ; GFX900-NEXT: ; use s[8:9]
11768 ; GFX900-NEXT: ;;#ASMEND
11769 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11771 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_3_3:
11773 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11774 ; GFX90A-NEXT: ;;#ASMSTART
11775 ; GFX90A-NEXT: ; def s[4:5]
11776 ; GFX90A-NEXT: ;;#ASMEND
11777 ; GFX90A-NEXT: ;;#ASMSTART
11778 ; GFX90A-NEXT: ; def s[6:7]
11779 ; GFX90A-NEXT: ;;#ASMEND
11780 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11781 ; GFX90A-NEXT: s_lshr_b32 s4, s6, 16
11782 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11783 ; GFX90A-NEXT: ;;#ASMSTART
11784 ; GFX90A-NEXT: ; use s[8:9]
11785 ; GFX90A-NEXT: ;;#ASMEND
11786 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11788 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_3_3:
11790 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11791 ; GFX940-NEXT: ;;#ASMSTART
11792 ; GFX940-NEXT: ; def s[0:1]
11793 ; GFX940-NEXT: ;;#ASMEND
11794 ; GFX940-NEXT: ;;#ASMSTART
11795 ; GFX940-NEXT: ; def s[2:3]
11796 ; GFX940-NEXT: ;;#ASMEND
11797 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11798 ; GFX940-NEXT: s_lshr_b32 s0, s2, 16
11799 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
11800 ; GFX940-NEXT: ;;#ASMSTART
11801 ; GFX940-NEXT: ; use s[8:9]
11802 ; GFX940-NEXT: ;;#ASMEND
11803 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11804 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11805 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11806 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
11807 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11808 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11812 define void @s_shuffle_v3bf16_v4bf16__6_3_3() {
11813 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_3_3:
11815 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11816 ; GFX900-NEXT: ;;#ASMSTART
11817 ; GFX900-NEXT: ; def s[4:5]
11818 ; GFX900-NEXT: ;;#ASMEND
11819 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11820 ; GFX900-NEXT: ;;#ASMSTART
11821 ; GFX900-NEXT: ; def s[6:7]
11822 ; GFX900-NEXT: ;;#ASMEND
11823 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s9
11824 ; GFX900-NEXT: ;;#ASMSTART
11825 ; GFX900-NEXT: ; use s[8:9]
11826 ; GFX900-NEXT: ;;#ASMEND
11827 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11829 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_3_3:
11831 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11832 ; GFX90A-NEXT: ;;#ASMSTART
11833 ; GFX90A-NEXT: ; def s[4:5]
11834 ; GFX90A-NEXT: ;;#ASMEND
11835 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11836 ; GFX90A-NEXT: ;;#ASMSTART
11837 ; GFX90A-NEXT: ; def s[6:7]
11838 ; GFX90A-NEXT: ;;#ASMEND
11839 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s9
11840 ; GFX90A-NEXT: ;;#ASMSTART
11841 ; GFX90A-NEXT: ; use s[8:9]
11842 ; GFX90A-NEXT: ;;#ASMEND
11843 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11845 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_3_3:
11847 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11848 ; GFX940-NEXT: ;;#ASMSTART
11849 ; GFX940-NEXT: ; def s[0:1]
11850 ; GFX940-NEXT: ;;#ASMEND
11851 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11852 ; GFX940-NEXT: ;;#ASMSTART
11853 ; GFX940-NEXT: ; def s[2:3]
11854 ; GFX940-NEXT: ;;#ASMEND
11855 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9
11856 ; GFX940-NEXT: ;;#ASMSTART
11857 ; GFX940-NEXT: ; use s[8:9]
11858 ; GFX940-NEXT: ;;#ASMEND
11859 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11860 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11861 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11862 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
11863 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11864 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11868 define void @s_shuffle_v3bf16_v4bf16__7_3_3() {
11869 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_3:
11871 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11872 ; GFX900-NEXT: ;;#ASMSTART
11873 ; GFX900-NEXT: ; def s[4:5]
11874 ; GFX900-NEXT: ;;#ASMEND
11875 ; GFX900-NEXT: ;;#ASMSTART
11876 ; GFX900-NEXT: ; def s[6:7]
11877 ; GFX900-NEXT: ;;#ASMEND
11878 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11879 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
11880 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11881 ; GFX900-NEXT: ;;#ASMSTART
11882 ; GFX900-NEXT: ; use s[8:9]
11883 ; GFX900-NEXT: ;;#ASMEND
11884 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11886 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_3:
11888 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11889 ; GFX90A-NEXT: ;;#ASMSTART
11890 ; GFX90A-NEXT: ; def s[4:5]
11891 ; GFX90A-NEXT: ;;#ASMEND
11892 ; GFX90A-NEXT: ;;#ASMSTART
11893 ; GFX90A-NEXT: ; def s[6:7]
11894 ; GFX90A-NEXT: ;;#ASMEND
11895 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11896 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
11897 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
11898 ; GFX90A-NEXT: ;;#ASMSTART
11899 ; GFX90A-NEXT: ; use s[8:9]
11900 ; GFX90A-NEXT: ;;#ASMEND
11901 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11903 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_3:
11905 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11906 ; GFX940-NEXT: ;;#ASMSTART
11907 ; GFX940-NEXT: ; def s[0:1]
11908 ; GFX940-NEXT: ;;#ASMEND
11909 ; GFX940-NEXT: ;;#ASMSTART
11910 ; GFX940-NEXT: ; def s[2:3]
11911 ; GFX940-NEXT: ;;#ASMEND
11912 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11913 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
11914 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
11915 ; GFX940-NEXT: ;;#ASMSTART
11916 ; GFX940-NEXT: ; use s[8:9]
11917 ; GFX940-NEXT: ;;#ASMEND
11918 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11919 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11920 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11921 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
11922 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11923 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11927 define void @s_shuffle_v3bf16_v4bf16__7_u_3() {
11928 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_3:
11930 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11931 ; GFX900-NEXT: ;;#ASMSTART
11932 ; GFX900-NEXT: ; def s[4:5]
11933 ; GFX900-NEXT: ;;#ASMEND
11934 ; GFX900-NEXT: ;;#ASMSTART
11935 ; GFX900-NEXT: ; def s[6:7]
11936 ; GFX900-NEXT: ;;#ASMEND
11937 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11938 ; GFX900-NEXT: s_lshr_b32 s8, s7, 16
11939 ; GFX900-NEXT: ;;#ASMSTART
11940 ; GFX900-NEXT: ; use s[8:9]
11941 ; GFX900-NEXT: ;;#ASMEND
11942 ; GFX900-NEXT: s_setpc_b64 s[30:31]
11944 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_3:
11946 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11947 ; GFX90A-NEXT: ;;#ASMSTART
11948 ; GFX90A-NEXT: ; def s[4:5]
11949 ; GFX90A-NEXT: ;;#ASMEND
11950 ; GFX90A-NEXT: ;;#ASMSTART
11951 ; GFX90A-NEXT: ; def s[6:7]
11952 ; GFX90A-NEXT: ;;#ASMEND
11953 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
11954 ; GFX90A-NEXT: s_lshr_b32 s8, s7, 16
11955 ; GFX90A-NEXT: ;;#ASMSTART
11956 ; GFX90A-NEXT: ; use s[8:9]
11957 ; GFX90A-NEXT: ;;#ASMEND
11958 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
11960 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_3:
11962 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11963 ; GFX940-NEXT: ;;#ASMSTART
11964 ; GFX940-NEXT: ; def s[0:1]
11965 ; GFX940-NEXT: ;;#ASMEND
11966 ; GFX940-NEXT: ;;#ASMSTART
11967 ; GFX940-NEXT: ; def s[2:3]
11968 ; GFX940-NEXT: ;;#ASMEND
11969 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
11970 ; GFX940-NEXT: s_lshr_b32 s8, s3, 16
11971 ; GFX940-NEXT: ;;#ASMSTART
11972 ; GFX940-NEXT: ; use s[8:9]
11973 ; GFX940-NEXT: ;;#ASMEND
11974 ; GFX940-NEXT: s_setpc_b64 s[30:31]
11975 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
11976 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
11977 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
11978 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11979 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
11983 define void @s_shuffle_v3bf16_v4bf16__7_0_3() {
11984 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_3:
11986 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11987 ; GFX900-NEXT: ;;#ASMSTART
11988 ; GFX900-NEXT: ; def s[6:7]
11989 ; GFX900-NEXT: ;;#ASMEND
11990 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
11991 ; GFX900-NEXT: ;;#ASMSTART
11992 ; GFX900-NEXT: ; def s[4:5]
11993 ; GFX900-NEXT: ;;#ASMEND
11994 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4
11995 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
11996 ; GFX900-NEXT: ;;#ASMSTART
11997 ; GFX900-NEXT: ; use s[8:9]
11998 ; GFX900-NEXT: ;;#ASMEND
11999 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12001 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_3:
12003 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12004 ; GFX90A-NEXT: ;;#ASMSTART
12005 ; GFX90A-NEXT: ; def s[6:7]
12006 ; GFX90A-NEXT: ;;#ASMEND
12007 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
12008 ; GFX90A-NEXT: ;;#ASMSTART
12009 ; GFX90A-NEXT: ; def s[4:5]
12010 ; GFX90A-NEXT: ;;#ASMEND
12011 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4
12012 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
12013 ; GFX90A-NEXT: ;;#ASMSTART
12014 ; GFX90A-NEXT: ; use s[8:9]
12015 ; GFX90A-NEXT: ;;#ASMEND
12016 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12018 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_3:
12020 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12021 ; GFX940-NEXT: ;;#ASMSTART
12022 ; GFX940-NEXT: ; def s[2:3]
12023 ; GFX940-NEXT: ;;#ASMEND
12024 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
12025 ; GFX940-NEXT: ;;#ASMSTART
12026 ; GFX940-NEXT: ; def s[0:1]
12027 ; GFX940-NEXT: ;;#ASMEND
12028 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0
12029 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
12030 ; GFX940-NEXT: ;;#ASMSTART
12031 ; GFX940-NEXT: ; use s[8:9]
12032 ; GFX940-NEXT: ;;#ASMEND
12033 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12034 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12035 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12036 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
12037 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12038 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12042 define void @s_shuffle_v3bf16_v4bf16__7_1_3() {
12043 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_3:
12045 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12046 ; GFX900-NEXT: ;;#ASMSTART
12047 ; GFX900-NEXT: ; def s[4:5]
12048 ; GFX900-NEXT: ;;#ASMEND
12049 ; GFX900-NEXT: ;;#ASMSTART
12050 ; GFX900-NEXT: ; def s[6:7]
12051 ; GFX900-NEXT: ;;#ASMEND
12052 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
12053 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
12054 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4
12055 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
12056 ; GFX900-NEXT: ;;#ASMSTART
12057 ; GFX900-NEXT: ; use s[8:9]
12058 ; GFX900-NEXT: ;;#ASMEND
12059 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12061 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_3:
12063 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12064 ; GFX90A-NEXT: ;;#ASMSTART
12065 ; GFX90A-NEXT: ; def s[4:5]
12066 ; GFX90A-NEXT: ;;#ASMEND
12067 ; GFX90A-NEXT: ;;#ASMSTART
12068 ; GFX90A-NEXT: ; def s[6:7]
12069 ; GFX90A-NEXT: ;;#ASMEND
12070 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
12071 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
12072 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4
12073 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
12074 ; GFX90A-NEXT: ;;#ASMSTART
12075 ; GFX90A-NEXT: ; use s[8:9]
12076 ; GFX90A-NEXT: ;;#ASMEND
12077 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12079 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_3:
12081 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12082 ; GFX940-NEXT: ;;#ASMSTART
12083 ; GFX940-NEXT: ; def s[0:1]
12084 ; GFX940-NEXT: ;;#ASMEND
12085 ; GFX940-NEXT: ;;#ASMSTART
12086 ; GFX940-NEXT: ; def s[2:3]
12087 ; GFX940-NEXT: ;;#ASMEND
12088 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
12089 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
12090 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0
12091 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
12092 ; GFX940-NEXT: ;;#ASMSTART
12093 ; GFX940-NEXT: ; use s[8:9]
12094 ; GFX940-NEXT: ;;#ASMEND
12095 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12096 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12097 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12098 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
12099 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12100 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12104 define void @s_shuffle_v3bf16_v4bf16__7_2_3() {
12105 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_3:
12107 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12108 ; GFX900-NEXT: ;;#ASMSTART
12109 ; GFX900-NEXT: ; def s[4:5]
12110 ; GFX900-NEXT: ;;#ASMEND
12111 ; GFX900-NEXT: ;;#ASMSTART
12112 ; GFX900-NEXT: ; def s[6:7]
12113 ; GFX900-NEXT: ;;#ASMEND
12114 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
12115 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
12116 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
12117 ; GFX900-NEXT: ;;#ASMSTART
12118 ; GFX900-NEXT: ; use s[8:9]
12119 ; GFX900-NEXT: ;;#ASMEND
12120 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12122 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_3:
12124 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12125 ; GFX90A-NEXT: ;;#ASMSTART
12126 ; GFX90A-NEXT: ; def s[4:5]
12127 ; GFX90A-NEXT: ;;#ASMEND
12128 ; GFX90A-NEXT: ;;#ASMSTART
12129 ; GFX90A-NEXT: ; def s[6:7]
12130 ; GFX90A-NEXT: ;;#ASMEND
12131 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
12132 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
12133 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
12134 ; GFX90A-NEXT: ;;#ASMSTART
12135 ; GFX90A-NEXT: ; use s[8:9]
12136 ; GFX90A-NEXT: ;;#ASMEND
12137 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12139 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_3:
12141 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12142 ; GFX940-NEXT: ;;#ASMSTART
12143 ; GFX940-NEXT: ; def s[0:1]
12144 ; GFX940-NEXT: ;;#ASMEND
12145 ; GFX940-NEXT: ;;#ASMSTART
12146 ; GFX940-NEXT: ; def s[2:3]
12147 ; GFX940-NEXT: ;;#ASMEND
12148 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
12149 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
12150 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
12151 ; GFX940-NEXT: ;;#ASMSTART
12152 ; GFX940-NEXT: ; use s[8:9]
12153 ; GFX940-NEXT: ;;#ASMEND
12154 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12155 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12156 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12157 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
12158 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12159 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12163 define void @s_shuffle_v3bf16_v4bf16__7_4_3() {
12164 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_3:
12166 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12167 ; GFX900-NEXT: ;;#ASMSTART
12168 ; GFX900-NEXT: ; def s[4:5]
12169 ; GFX900-NEXT: ;;#ASMEND
12170 ; GFX900-NEXT: ;;#ASMSTART
12171 ; GFX900-NEXT: ; def s[6:7]
12172 ; GFX900-NEXT: ;;#ASMEND
12173 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
12174 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6
12175 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
12176 ; GFX900-NEXT: ;;#ASMSTART
12177 ; GFX900-NEXT: ; use s[8:9]
12178 ; GFX900-NEXT: ;;#ASMEND
12179 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12181 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_3:
12183 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12184 ; GFX90A-NEXT: ;;#ASMSTART
12185 ; GFX90A-NEXT: ; def s[4:5]
12186 ; GFX90A-NEXT: ;;#ASMEND
12187 ; GFX90A-NEXT: ;;#ASMSTART
12188 ; GFX90A-NEXT: ; def s[6:7]
12189 ; GFX90A-NEXT: ;;#ASMEND
12190 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
12191 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6
12192 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
12193 ; GFX90A-NEXT: ;;#ASMSTART
12194 ; GFX90A-NEXT: ; use s[8:9]
12195 ; GFX90A-NEXT: ;;#ASMEND
12196 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12198 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_3:
12200 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12201 ; GFX940-NEXT: ;;#ASMSTART
12202 ; GFX940-NEXT: ; def s[0:1]
12203 ; GFX940-NEXT: ;;#ASMEND
12204 ; GFX940-NEXT: ;;#ASMSTART
12205 ; GFX940-NEXT: ; def s[2:3]
12206 ; GFX940-NEXT: ;;#ASMEND
12207 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
12208 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2
12209 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
12210 ; GFX940-NEXT: ;;#ASMSTART
12211 ; GFX940-NEXT: ; use s[8:9]
12212 ; GFX940-NEXT: ;;#ASMEND
12213 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12214 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12215 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12216 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
12217 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12218 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12222 define void @s_shuffle_v3bf16_v4bf16__7_5_3() {
12223 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_3:
12225 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12226 ; GFX900-NEXT: ;;#ASMSTART
12227 ; GFX900-NEXT: ; def s[4:5]
12228 ; GFX900-NEXT: ;;#ASMEND
12229 ; GFX900-NEXT: ;;#ASMSTART
12230 ; GFX900-NEXT: ; def s[6:7]
12231 ; GFX900-NEXT: ;;#ASMEND
12232 ; GFX900-NEXT: s_lshr_b32 s4, s6, 16
12233 ; GFX900-NEXT: s_lshr_b32 s6, s7, 16
12234 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4
12235 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
12236 ; GFX900-NEXT: ;;#ASMSTART
12237 ; GFX900-NEXT: ; use s[8:9]
12238 ; GFX900-NEXT: ;;#ASMEND
12239 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12241 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_3:
12243 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12244 ; GFX90A-NEXT: ;;#ASMSTART
12245 ; GFX90A-NEXT: ; def s[4:5]
12246 ; GFX90A-NEXT: ;;#ASMEND
12247 ; GFX90A-NEXT: ;;#ASMSTART
12248 ; GFX90A-NEXT: ; def s[6:7]
12249 ; GFX90A-NEXT: ;;#ASMEND
12250 ; GFX90A-NEXT: s_lshr_b32 s4, s6, 16
12251 ; GFX90A-NEXT: s_lshr_b32 s6, s7, 16
12252 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4
12253 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
12254 ; GFX90A-NEXT: ;;#ASMSTART
12255 ; GFX90A-NEXT: ; use s[8:9]
12256 ; GFX90A-NEXT: ;;#ASMEND
12257 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12259 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_3:
12261 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12262 ; GFX940-NEXT: ;;#ASMSTART
12263 ; GFX940-NEXT: ; def s[0:1]
12264 ; GFX940-NEXT: ;;#ASMEND
12265 ; GFX940-NEXT: ;;#ASMSTART
12266 ; GFX940-NEXT: ; def s[2:3]
12267 ; GFX940-NEXT: ;;#ASMEND
12268 ; GFX940-NEXT: s_lshr_b32 s0, s2, 16
12269 ; GFX940-NEXT: s_lshr_b32 s2, s3, 16
12270 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0
12271 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
12272 ; GFX940-NEXT: ;;#ASMSTART
12273 ; GFX940-NEXT: ; use s[8:9]
12274 ; GFX940-NEXT: ;;#ASMEND
12275 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12276 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12277 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12278 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
12279 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12280 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12284 define void @s_shuffle_v3bf16_v4bf16__7_6_3() {
12285 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_3:
12287 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12288 ; GFX900-NEXT: ;;#ASMSTART
12289 ; GFX900-NEXT: ; def s[4:5]
12290 ; GFX900-NEXT: ;;#ASMEND
12291 ; GFX900-NEXT: ;;#ASMSTART
12292 ; GFX900-NEXT: ; def s[6:7]
12293 ; GFX900-NEXT: ;;#ASMEND
12294 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
12295 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7
12296 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
12297 ; GFX900-NEXT: ;;#ASMSTART
12298 ; GFX900-NEXT: ; use s[8:9]
12299 ; GFX900-NEXT: ;;#ASMEND
12300 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12302 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_3:
12304 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12305 ; GFX90A-NEXT: ;;#ASMSTART
12306 ; GFX90A-NEXT: ; def s[4:5]
12307 ; GFX90A-NEXT: ;;#ASMEND
12308 ; GFX90A-NEXT: ;;#ASMSTART
12309 ; GFX90A-NEXT: ; def s[6:7]
12310 ; GFX90A-NEXT: ;;#ASMEND
12311 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
12312 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7
12313 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
12314 ; GFX90A-NEXT: ;;#ASMSTART
12315 ; GFX90A-NEXT: ; use s[8:9]
12316 ; GFX90A-NEXT: ;;#ASMEND
12317 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12319 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_3:
12321 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12322 ; GFX940-NEXT: ;;#ASMSTART
12323 ; GFX940-NEXT: ; def s[0:1]
12324 ; GFX940-NEXT: ;;#ASMEND
12325 ; GFX940-NEXT: ;;#ASMSTART
12326 ; GFX940-NEXT: ; def s[2:3]
12327 ; GFX940-NEXT: ;;#ASMEND
12328 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
12329 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3
12330 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
12331 ; GFX940-NEXT: ;;#ASMSTART
12332 ; GFX940-NEXT: ; use s[8:9]
12333 ; GFX940-NEXT: ;;#ASMEND
12334 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12335 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12336 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12337 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
12338 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12339 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12343 define void @s_shuffle_v3bf16_v4bf16__u_4_4() {
12344 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__u_4_4:
12346 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12347 ; GFX9-NEXT: ;;#ASMSTART
12348 ; GFX9-NEXT: ; use s[8:9]
12349 ; GFX9-NEXT: ;;#ASMEND
12350 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12351 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12352 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 poison, i32 4, i32 4>
12353 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12354 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12358 define void @s_shuffle_v3bf16_v4bf16__0_4_4() {
12359 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__0_4_4:
12361 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12362 ; GFX900-NEXT: ;;#ASMSTART
12363 ; GFX900-NEXT: ; def s[8:9]
12364 ; GFX900-NEXT: ;;#ASMEND
12365 ; GFX900-NEXT: ;;#ASMSTART
12366 ; GFX900-NEXT: ; use s[8:9]
12367 ; GFX900-NEXT: ;;#ASMEND
12368 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12370 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__0_4_4:
12372 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12373 ; GFX90A-NEXT: ;;#ASMSTART
12374 ; GFX90A-NEXT: ; def s[8:9]
12375 ; GFX90A-NEXT: ;;#ASMEND
12376 ; GFX90A-NEXT: ;;#ASMSTART
12377 ; GFX90A-NEXT: ; use s[8:9]
12378 ; GFX90A-NEXT: ;;#ASMEND
12379 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12381 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_4_4:
12383 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12384 ; GFX940-NEXT: ;;#ASMSTART
12385 ; GFX940-NEXT: ; def s[8:9]
12386 ; GFX940-NEXT: ;;#ASMEND
12387 ; GFX940-NEXT: s_nop 0
12388 ; GFX940-NEXT: ;;#ASMSTART
12389 ; GFX940-NEXT: ; use s[8:9]
12390 ; GFX940-NEXT: ;;#ASMEND
12391 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12392 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12393 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 0, i32 4, i32 4>
12394 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12395 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12399 define void @s_shuffle_v3bf16_v4bf16__1_4_4() {
12400 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_4_4:
12402 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12403 ; GFX900-NEXT: ;;#ASMSTART
12404 ; GFX900-NEXT: ; def s[4:5]
12405 ; GFX900-NEXT: ;;#ASMEND
12406 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
12407 ; GFX900-NEXT: ;;#ASMSTART
12408 ; GFX900-NEXT: ; use s[8:9]
12409 ; GFX900-NEXT: ;;#ASMEND
12410 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12412 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_4_4:
12414 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12415 ; GFX90A-NEXT: ;;#ASMSTART
12416 ; GFX90A-NEXT: ; def s[4:5]
12417 ; GFX90A-NEXT: ;;#ASMEND
12418 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
12419 ; GFX90A-NEXT: ;;#ASMSTART
12420 ; GFX90A-NEXT: ; use s[8:9]
12421 ; GFX90A-NEXT: ;;#ASMEND
12422 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12424 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_4_4:
12426 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12427 ; GFX940-NEXT: ;;#ASMSTART
12428 ; GFX940-NEXT: ; def s[0:1]
12429 ; GFX940-NEXT: ;;#ASMEND
12430 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
12431 ; GFX940-NEXT: ;;#ASMSTART
12432 ; GFX940-NEXT: ; use s[8:9]
12433 ; GFX940-NEXT: ;;#ASMEND
12434 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12435 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12436 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 1, i32 4, i32 4>
12437 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12438 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12442 define void @s_shuffle_v3bf16_v4bf16__2_4_4() {
12443 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4:
12445 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12446 ; GFX900-NEXT: ;;#ASMSTART
12447 ; GFX900-NEXT: ; def s[4:5]
12448 ; GFX900-NEXT: ;;#ASMEND
12449 ; GFX900-NEXT: s_mov_b32 s8, s5
12450 ; GFX900-NEXT: ;;#ASMSTART
12451 ; GFX900-NEXT: ; use s[8:9]
12452 ; GFX900-NEXT: ;;#ASMEND
12453 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12455 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4:
12457 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12458 ; GFX90A-NEXT: ;;#ASMSTART
12459 ; GFX90A-NEXT: ; def s[4:5]
12460 ; GFX90A-NEXT: ;;#ASMEND
12461 ; GFX90A-NEXT: s_mov_b32 s8, s5
12462 ; GFX90A-NEXT: ;;#ASMSTART
12463 ; GFX90A-NEXT: ; use s[8:9]
12464 ; GFX90A-NEXT: ;;#ASMEND
12465 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12467 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4:
12469 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12470 ; GFX940-NEXT: ;;#ASMSTART
12471 ; GFX940-NEXT: ; def s[0:1]
12472 ; GFX940-NEXT: ;;#ASMEND
12473 ; GFX940-NEXT: s_mov_b32 s8, s1
12474 ; GFX940-NEXT: ;;#ASMSTART
12475 ; GFX940-NEXT: ; use s[8:9]
12476 ; GFX940-NEXT: ;;#ASMEND
12477 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12478 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12479 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 2, i32 4, i32 4>
12480 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12481 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12485 define void @s_shuffle_v3bf16_v4bf16__3_4_4() {
12486 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_4_4:
12488 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12489 ; GFX900-NEXT: ;;#ASMSTART
12490 ; GFX900-NEXT: ; def s[4:5]
12491 ; GFX900-NEXT: ;;#ASMEND
12492 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
12493 ; GFX900-NEXT: ;;#ASMSTART
12494 ; GFX900-NEXT: ; use s[8:9]
12495 ; GFX900-NEXT: ;;#ASMEND
12496 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12498 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_4_4:
12500 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12501 ; GFX90A-NEXT: ;;#ASMSTART
12502 ; GFX90A-NEXT: ; def s[4:5]
12503 ; GFX90A-NEXT: ;;#ASMEND
12504 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
12505 ; GFX90A-NEXT: ;;#ASMSTART
12506 ; GFX90A-NEXT: ; use s[8:9]
12507 ; GFX90A-NEXT: ;;#ASMEND
12508 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12510 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_4_4:
12512 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12513 ; GFX940-NEXT: ;;#ASMSTART
12514 ; GFX940-NEXT: ; def s[0:1]
12515 ; GFX940-NEXT: ;;#ASMEND
12516 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
12517 ; GFX940-NEXT: ;;#ASMSTART
12518 ; GFX940-NEXT: ; use s[8:9]
12519 ; GFX940-NEXT: ;;#ASMEND
12520 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12521 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12522 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 3, i32 4, i32 4>
12523 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12524 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12528 define void @s_shuffle_v3bf16_v4bf16__4_4_4() {
12529 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__4_4_4:
12531 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12532 ; GFX9-NEXT: ;;#ASMSTART
12533 ; GFX9-NEXT: ; use s[8:9]
12534 ; GFX9-NEXT: ;;#ASMEND
12535 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12536 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12537 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> <i32 4, i32 4, i32 4>
12538 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12539 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12543 define void @s_shuffle_v3bf16_v4bf16__5_4_4() {
12544 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_4_4:
12546 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12547 ; GFX900-NEXT: ;;#ASMSTART
12548 ; GFX900-NEXT: ; def s[4:5]
12549 ; GFX900-NEXT: ;;#ASMEND
12550 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
12551 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12552 ; GFX900-NEXT: s_mov_b32 s9, s4
12553 ; GFX900-NEXT: ;;#ASMSTART
12554 ; GFX900-NEXT: ; use s[8:9]
12555 ; GFX900-NEXT: ;;#ASMEND
12556 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12558 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_4_4:
12560 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12561 ; GFX90A-NEXT: ;;#ASMSTART
12562 ; GFX90A-NEXT: ; def s[4:5]
12563 ; GFX90A-NEXT: ;;#ASMEND
12564 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
12565 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12566 ; GFX90A-NEXT: s_mov_b32 s9, s4
12567 ; GFX90A-NEXT: ;;#ASMSTART
12568 ; GFX90A-NEXT: ; use s[8:9]
12569 ; GFX90A-NEXT: ;;#ASMEND
12570 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12572 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_4_4:
12574 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12575 ; GFX940-NEXT: ;;#ASMSTART
12576 ; GFX940-NEXT: ; def s[0:1]
12577 ; GFX940-NEXT: ;;#ASMEND
12578 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
12579 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
12580 ; GFX940-NEXT: s_mov_b32 s9, s0
12581 ; GFX940-NEXT: ;;#ASMSTART
12582 ; GFX940-NEXT: ; use s[8:9]
12583 ; GFX940-NEXT: ;;#ASMEND
12584 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12585 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12586 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12587 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
12588 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12589 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12593 define void @s_shuffle_v3bf16_v4bf16__6_4_4() {
12594 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_4_4:
12596 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12597 ; GFX900-NEXT: ;;#ASMSTART
12598 ; GFX900-NEXT: ; def s[4:5]
12599 ; GFX900-NEXT: ;;#ASMEND
12600 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12601 ; GFX900-NEXT: s_mov_b32 s9, s4
12602 ; GFX900-NEXT: ;;#ASMSTART
12603 ; GFX900-NEXT: ; use s[8:9]
12604 ; GFX900-NEXT: ;;#ASMEND
12605 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12607 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_4_4:
12609 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12610 ; GFX90A-NEXT: ;;#ASMSTART
12611 ; GFX90A-NEXT: ; def s[4:5]
12612 ; GFX90A-NEXT: ;;#ASMEND
12613 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12614 ; GFX90A-NEXT: s_mov_b32 s9, s4
12615 ; GFX90A-NEXT: ;;#ASMSTART
12616 ; GFX90A-NEXT: ; use s[8:9]
12617 ; GFX90A-NEXT: ;;#ASMEND
12618 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12620 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_4_4:
12622 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12623 ; GFX940-NEXT: ;;#ASMSTART
12624 ; GFX940-NEXT: ; def s[0:1]
12625 ; GFX940-NEXT: ;;#ASMEND
12626 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
12627 ; GFX940-NEXT: s_mov_b32 s9, s0
12628 ; GFX940-NEXT: ;;#ASMSTART
12629 ; GFX940-NEXT: ; use s[8:9]
12630 ; GFX940-NEXT: ;;#ASMEND
12631 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12632 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12633 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12634 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
12635 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12636 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12640 define void @s_shuffle_v3bf16_v4bf16__7_4_4() {
12641 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_4:
12643 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12644 ; GFX900-NEXT: ;;#ASMSTART
12645 ; GFX900-NEXT: ; def s[4:5]
12646 ; GFX900-NEXT: ;;#ASMEND
12647 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
12648 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12649 ; GFX900-NEXT: s_mov_b32 s9, s4
12650 ; GFX900-NEXT: ;;#ASMSTART
12651 ; GFX900-NEXT: ; use s[8:9]
12652 ; GFX900-NEXT: ;;#ASMEND
12653 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12655 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_4:
12657 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12658 ; GFX90A-NEXT: ;;#ASMSTART
12659 ; GFX90A-NEXT: ; def s[4:5]
12660 ; GFX90A-NEXT: ;;#ASMEND
12661 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
12662 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12663 ; GFX90A-NEXT: s_mov_b32 s9, s4
12664 ; GFX90A-NEXT: ;;#ASMSTART
12665 ; GFX90A-NEXT: ; use s[8:9]
12666 ; GFX90A-NEXT: ;;#ASMEND
12667 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12669 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_4:
12671 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12672 ; GFX940-NEXT: ;;#ASMSTART
12673 ; GFX940-NEXT: ; def s[0:1]
12674 ; GFX940-NEXT: ;;#ASMEND
12675 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
12676 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
12677 ; GFX940-NEXT: s_mov_b32 s9, s0
12678 ; GFX940-NEXT: ;;#ASMSTART
12679 ; GFX940-NEXT: ; use s[8:9]
12680 ; GFX940-NEXT: ;;#ASMEND
12681 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12682 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12683 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12684 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
12685 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12686 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12690 define void @s_shuffle_v3bf16_v4bf16__7_u_4() {
12691 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_4:
12693 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12694 ; GFX900-NEXT: ;;#ASMSTART
12695 ; GFX900-NEXT: ; def s[4:5]
12696 ; GFX900-NEXT: ;;#ASMEND
12697 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
12698 ; GFX900-NEXT: s_mov_b32 s9, s4
12699 ; GFX900-NEXT: ;;#ASMSTART
12700 ; GFX900-NEXT: ; use s[8:9]
12701 ; GFX900-NEXT: ;;#ASMEND
12702 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12704 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_4:
12706 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12707 ; GFX90A-NEXT: ;;#ASMSTART
12708 ; GFX90A-NEXT: ; def s[4:5]
12709 ; GFX90A-NEXT: ;;#ASMEND
12710 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
12711 ; GFX90A-NEXT: s_mov_b32 s9, s4
12712 ; GFX90A-NEXT: ;;#ASMSTART
12713 ; GFX90A-NEXT: ; use s[8:9]
12714 ; GFX90A-NEXT: ;;#ASMEND
12715 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12717 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_4:
12719 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12720 ; GFX940-NEXT: ;;#ASMSTART
12721 ; GFX940-NEXT: ; def s[0:1]
12722 ; GFX940-NEXT: ;;#ASMEND
12723 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
12724 ; GFX940-NEXT: s_mov_b32 s9, s0
12725 ; GFX940-NEXT: ;;#ASMSTART
12726 ; GFX940-NEXT: ; use s[8:9]
12727 ; GFX940-NEXT: ;;#ASMEND
12728 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12729 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12730 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12731 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
12732 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12733 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12737 define void @s_shuffle_v3bf16_v4bf16__7_0_4() {
12738 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_4:
12740 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12741 ; GFX900-NEXT: ;;#ASMSTART
12742 ; GFX900-NEXT: ; def s[4:5]
12743 ; GFX900-NEXT: ;;#ASMEND
12744 ; GFX900-NEXT: ;;#ASMSTART
12745 ; GFX900-NEXT: ; def s[6:7]
12746 ; GFX900-NEXT: ;;#ASMEND
12747 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
12748 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12749 ; GFX900-NEXT: s_mov_b32 s9, s6
12750 ; GFX900-NEXT: ;;#ASMSTART
12751 ; GFX900-NEXT: ; use s[8:9]
12752 ; GFX900-NEXT: ;;#ASMEND
12753 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12755 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_4:
12757 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12758 ; GFX90A-NEXT: ;;#ASMSTART
12759 ; GFX90A-NEXT: ; def s[4:5]
12760 ; GFX90A-NEXT: ;;#ASMEND
12761 ; GFX90A-NEXT: ;;#ASMSTART
12762 ; GFX90A-NEXT: ; def s[6:7]
12763 ; GFX90A-NEXT: ;;#ASMEND
12764 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
12765 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12766 ; GFX90A-NEXT: s_mov_b32 s9, s6
12767 ; GFX90A-NEXT: ;;#ASMSTART
12768 ; GFX90A-NEXT: ; use s[8:9]
12769 ; GFX90A-NEXT: ;;#ASMEND
12770 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12772 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_4:
12774 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12775 ; GFX940-NEXT: ;;#ASMSTART
12776 ; GFX940-NEXT: ; def s[0:1]
12777 ; GFX940-NEXT: ;;#ASMEND
12778 ; GFX940-NEXT: ;;#ASMSTART
12779 ; GFX940-NEXT: ; def s[2:3]
12780 ; GFX940-NEXT: ;;#ASMEND
12781 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
12782 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
12783 ; GFX940-NEXT: s_mov_b32 s9, s2
12784 ; GFX940-NEXT: ;;#ASMSTART
12785 ; GFX940-NEXT: ; use s[8:9]
12786 ; GFX940-NEXT: ;;#ASMEND
12787 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12788 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12789 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12790 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
12791 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12792 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12796 define void @s_shuffle_v3bf16_v4bf16__7_1_4() {
12797 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_4:
12799 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12800 ; GFX900-NEXT: ;;#ASMSTART
12801 ; GFX900-NEXT: ; def s[4:5]
12802 ; GFX900-NEXT: ;;#ASMEND
12803 ; GFX900-NEXT: ;;#ASMSTART
12804 ; GFX900-NEXT: ; def s[6:7]
12805 ; GFX900-NEXT: ;;#ASMEND
12806 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
12807 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
12808 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12809 ; GFX900-NEXT: s_mov_b32 s9, s6
12810 ; GFX900-NEXT: ;;#ASMSTART
12811 ; GFX900-NEXT: ; use s[8:9]
12812 ; GFX900-NEXT: ;;#ASMEND
12813 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12815 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_4:
12817 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12818 ; GFX90A-NEXT: ;;#ASMSTART
12819 ; GFX90A-NEXT: ; def s[4:5]
12820 ; GFX90A-NEXT: ;;#ASMEND
12821 ; GFX90A-NEXT: ;;#ASMSTART
12822 ; GFX90A-NEXT: ; def s[6:7]
12823 ; GFX90A-NEXT: ;;#ASMEND
12824 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
12825 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
12826 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12827 ; GFX90A-NEXT: s_mov_b32 s9, s6
12828 ; GFX90A-NEXT: ;;#ASMSTART
12829 ; GFX90A-NEXT: ; use s[8:9]
12830 ; GFX90A-NEXT: ;;#ASMEND
12831 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12833 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_4:
12835 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12836 ; GFX940-NEXT: ;;#ASMSTART
12837 ; GFX940-NEXT: ; def s[0:1]
12838 ; GFX940-NEXT: ;;#ASMEND
12839 ; GFX940-NEXT: ;;#ASMSTART
12840 ; GFX940-NEXT: ; def s[2:3]
12841 ; GFX940-NEXT: ;;#ASMEND
12842 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
12843 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
12844 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
12845 ; GFX940-NEXT: s_mov_b32 s9, s2
12846 ; GFX940-NEXT: ;;#ASMSTART
12847 ; GFX940-NEXT: ; use s[8:9]
12848 ; GFX940-NEXT: ;;#ASMEND
12849 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12850 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12851 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12852 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
12853 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12854 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12858 define void @s_shuffle_v3bf16_v4bf16__7_2_4() {
12859 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_4:
12861 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12862 ; GFX900-NEXT: ;;#ASMSTART
12863 ; GFX900-NEXT: ; def s[4:5]
12864 ; GFX900-NEXT: ;;#ASMEND
12865 ; GFX900-NEXT: ;;#ASMSTART
12866 ; GFX900-NEXT: ; def s[6:7]
12867 ; GFX900-NEXT: ;;#ASMEND
12868 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
12869 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
12870 ; GFX900-NEXT: s_mov_b32 s9, s6
12871 ; GFX900-NEXT: ;;#ASMSTART
12872 ; GFX900-NEXT: ; use s[8:9]
12873 ; GFX900-NEXT: ;;#ASMEND
12874 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12876 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_4:
12878 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12879 ; GFX90A-NEXT: ;;#ASMSTART
12880 ; GFX90A-NEXT: ; def s[4:5]
12881 ; GFX90A-NEXT: ;;#ASMEND
12882 ; GFX90A-NEXT: ;;#ASMSTART
12883 ; GFX90A-NEXT: ; def s[6:7]
12884 ; GFX90A-NEXT: ;;#ASMEND
12885 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
12886 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
12887 ; GFX90A-NEXT: s_mov_b32 s9, s6
12888 ; GFX90A-NEXT: ;;#ASMSTART
12889 ; GFX90A-NEXT: ; use s[8:9]
12890 ; GFX90A-NEXT: ;;#ASMEND
12891 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12893 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_4:
12895 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12896 ; GFX940-NEXT: ;;#ASMSTART
12897 ; GFX940-NEXT: ; def s[0:1]
12898 ; GFX940-NEXT: ;;#ASMEND
12899 ; GFX940-NEXT: ;;#ASMSTART
12900 ; GFX940-NEXT: ; def s[2:3]
12901 ; GFX940-NEXT: ;;#ASMEND
12902 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
12903 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
12904 ; GFX940-NEXT: s_mov_b32 s9, s2
12905 ; GFX940-NEXT: ;;#ASMSTART
12906 ; GFX940-NEXT: ; use s[8:9]
12907 ; GFX940-NEXT: ;;#ASMEND
12908 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12909 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12910 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12911 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
12912 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12913 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12917 define void @s_shuffle_v3bf16_v4bf16__7_3_4() {
12918 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_4:
12920 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12921 ; GFX900-NEXT: ;;#ASMSTART
12922 ; GFX900-NEXT: ; def s[4:5]
12923 ; GFX900-NEXT: ;;#ASMEND
12924 ; GFX900-NEXT: ;;#ASMSTART
12925 ; GFX900-NEXT: ; def s[6:7]
12926 ; GFX900-NEXT: ;;#ASMEND
12927 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
12928 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
12929 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12930 ; GFX900-NEXT: s_mov_b32 s9, s6
12931 ; GFX900-NEXT: ;;#ASMSTART
12932 ; GFX900-NEXT: ; use s[8:9]
12933 ; GFX900-NEXT: ;;#ASMEND
12934 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12936 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_4:
12938 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12939 ; GFX90A-NEXT: ;;#ASMSTART
12940 ; GFX90A-NEXT: ; def s[4:5]
12941 ; GFX90A-NEXT: ;;#ASMEND
12942 ; GFX90A-NEXT: ;;#ASMSTART
12943 ; GFX90A-NEXT: ; def s[6:7]
12944 ; GFX90A-NEXT: ;;#ASMEND
12945 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
12946 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
12947 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
12948 ; GFX90A-NEXT: s_mov_b32 s9, s6
12949 ; GFX90A-NEXT: ;;#ASMSTART
12950 ; GFX90A-NEXT: ; use s[8:9]
12951 ; GFX90A-NEXT: ;;#ASMEND
12952 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
12954 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_4:
12956 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12957 ; GFX940-NEXT: ;;#ASMSTART
12958 ; GFX940-NEXT: ; def s[0:1]
12959 ; GFX940-NEXT: ;;#ASMEND
12960 ; GFX940-NEXT: ;;#ASMSTART
12961 ; GFX940-NEXT: ; def s[2:3]
12962 ; GFX940-NEXT: ;;#ASMEND
12963 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
12964 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
12965 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
12966 ; GFX940-NEXT: s_mov_b32 s9, s2
12967 ; GFX940-NEXT: ;;#ASMSTART
12968 ; GFX940-NEXT: ; use s[8:9]
12969 ; GFX940-NEXT: ;;#ASMEND
12970 ; GFX940-NEXT: s_setpc_b64 s[30:31]
12971 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
12972 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
12973 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
12974 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12975 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
12979 define void @s_shuffle_v3bf16_v4bf16__7_5_4() {
12980 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_4:
12982 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12983 ; GFX900-NEXT: ;;#ASMSTART
12984 ; GFX900-NEXT: ; def s[4:5]
12985 ; GFX900-NEXT: ;;#ASMEND
12986 ; GFX900-NEXT: s_lshr_b32 s6, s4, 16
12987 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
12988 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6
12989 ; GFX900-NEXT: s_mov_b32 s9, s4
12990 ; GFX900-NEXT: ;;#ASMSTART
12991 ; GFX900-NEXT: ; use s[8:9]
12992 ; GFX900-NEXT: ;;#ASMEND
12993 ; GFX900-NEXT: s_setpc_b64 s[30:31]
12995 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_4:
12997 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12998 ; GFX90A-NEXT: ;;#ASMSTART
12999 ; GFX90A-NEXT: ; def s[4:5]
13000 ; GFX90A-NEXT: ;;#ASMEND
13001 ; GFX90A-NEXT: s_lshr_b32 s6, s4, 16
13002 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
13003 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6
13004 ; GFX90A-NEXT: s_mov_b32 s9, s4
13005 ; GFX90A-NEXT: ;;#ASMSTART
13006 ; GFX90A-NEXT: ; use s[8:9]
13007 ; GFX90A-NEXT: ;;#ASMEND
13008 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13010 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_4:
13012 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13013 ; GFX940-NEXT: ;;#ASMSTART
13014 ; GFX940-NEXT: ; def s[0:1]
13015 ; GFX940-NEXT: ;;#ASMEND
13016 ; GFX940-NEXT: s_lshr_b32 s2, s0, 16
13017 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
13018 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2
13019 ; GFX940-NEXT: s_mov_b32 s9, s0
13020 ; GFX940-NEXT: ;;#ASMSTART
13021 ; GFX940-NEXT: ; use s[8:9]
13022 ; GFX940-NEXT: ;;#ASMEND
13023 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13024 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13025 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13026 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
13027 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13028 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13032 define void @s_shuffle_v3bf16_v4bf16__7_6_4() {
13033 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_4:
13035 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13036 ; GFX900-NEXT: ;;#ASMSTART
13037 ; GFX900-NEXT: ; def s[4:5]
13038 ; GFX900-NEXT: ;;#ASMEND
13039 ; GFX900-NEXT: s_lshr_b32 s6, s5, 16
13040 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
13041 ; GFX900-NEXT: s_mov_b32 s9, s4
13042 ; GFX900-NEXT: ;;#ASMSTART
13043 ; GFX900-NEXT: ; use s[8:9]
13044 ; GFX900-NEXT: ;;#ASMEND
13045 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13047 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_4:
13049 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13050 ; GFX90A-NEXT: ;;#ASMSTART
13051 ; GFX90A-NEXT: ; def s[4:5]
13052 ; GFX90A-NEXT: ;;#ASMEND
13053 ; GFX90A-NEXT: s_lshr_b32 s6, s5, 16
13054 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
13055 ; GFX90A-NEXT: s_mov_b32 s9, s4
13056 ; GFX90A-NEXT: ;;#ASMSTART
13057 ; GFX90A-NEXT: ; use s[8:9]
13058 ; GFX90A-NEXT: ;;#ASMEND
13059 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13061 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_4:
13063 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13064 ; GFX940-NEXT: ;;#ASMSTART
13065 ; GFX940-NEXT: ; def s[0:1]
13066 ; GFX940-NEXT: ;;#ASMEND
13067 ; GFX940-NEXT: s_lshr_b32 s2, s1, 16
13068 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
13069 ; GFX940-NEXT: s_mov_b32 s9, s0
13070 ; GFX940-NEXT: ;;#ASMSTART
13071 ; GFX940-NEXT: ; use s[8:9]
13072 ; GFX940-NEXT: ;;#ASMEND
13073 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13074 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13075 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13076 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
13077 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13078 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13082 define void @s_shuffle_v3bf16_v4bf16__u_5_5() {
13083 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__u_5_5:
13085 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13086 ; GFX9-NEXT: ;;#ASMSTART
13087 ; GFX9-NEXT: ; def s[8:9]
13088 ; GFX9-NEXT: ;;#ASMEND
13089 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
13090 ; GFX9-NEXT: ;;#ASMSTART
13091 ; GFX9-NEXT: ; use s[8:9]
13092 ; GFX9-NEXT: ;;#ASMEND
13093 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13094 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13095 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13096 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
13097 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13098 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13102 define void @s_shuffle_v3bf16_v4bf16__0_5_5() {
13103 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__0_5_5:
13105 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13106 ; GFX900-NEXT: ;;#ASMSTART
13107 ; GFX900-NEXT: ; def s[6:7]
13108 ; GFX900-NEXT: ;;#ASMEND
13109 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13110 ; GFX900-NEXT: ;;#ASMSTART
13111 ; GFX900-NEXT: ; def s[4:5]
13112 ; GFX900-NEXT: ;;#ASMEND
13113 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13114 ; GFX900-NEXT: ;;#ASMSTART
13115 ; GFX900-NEXT: ; use s[8:9]
13116 ; GFX900-NEXT: ;;#ASMEND
13117 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13119 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__0_5_5:
13121 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13122 ; GFX90A-NEXT: ;;#ASMSTART
13123 ; GFX90A-NEXT: ; def s[6:7]
13124 ; GFX90A-NEXT: ;;#ASMEND
13125 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13126 ; GFX90A-NEXT: ;;#ASMSTART
13127 ; GFX90A-NEXT: ; def s[4:5]
13128 ; GFX90A-NEXT: ;;#ASMEND
13129 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13130 ; GFX90A-NEXT: ;;#ASMSTART
13131 ; GFX90A-NEXT: ; use s[8:9]
13132 ; GFX90A-NEXT: ;;#ASMEND
13133 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13135 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_5_5:
13137 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13138 ; GFX940-NEXT: ;;#ASMSTART
13139 ; GFX940-NEXT: ; def s[2:3]
13140 ; GFX940-NEXT: ;;#ASMEND
13141 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13142 ; GFX940-NEXT: ;;#ASMSTART
13143 ; GFX940-NEXT: ; def s[0:1]
13144 ; GFX940-NEXT: ;;#ASMEND
13145 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
13146 ; GFX940-NEXT: ;;#ASMSTART
13147 ; GFX940-NEXT: ; use s[8:9]
13148 ; GFX940-NEXT: ;;#ASMEND
13149 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13150 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13151 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13152 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
13153 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13154 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13158 define void @s_shuffle_v3bf16_v4bf16__1_5_5() {
13159 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_5_5:
13161 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13162 ; GFX900-NEXT: ;;#ASMSTART
13163 ; GFX900-NEXT: ; def s[4:5]
13164 ; GFX900-NEXT: ;;#ASMEND
13165 ; GFX900-NEXT: ;;#ASMSTART
13166 ; GFX900-NEXT: ; def s[6:7]
13167 ; GFX900-NEXT: ;;#ASMEND
13168 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13169 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
13170 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13171 ; GFX900-NEXT: ;;#ASMSTART
13172 ; GFX900-NEXT: ; use s[8:9]
13173 ; GFX900-NEXT: ;;#ASMEND
13174 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13176 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_5_5:
13178 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13179 ; GFX90A-NEXT: ;;#ASMSTART
13180 ; GFX90A-NEXT: ; def s[4:5]
13181 ; GFX90A-NEXT: ;;#ASMEND
13182 ; GFX90A-NEXT: ;;#ASMSTART
13183 ; GFX90A-NEXT: ; def s[6:7]
13184 ; GFX90A-NEXT: ;;#ASMEND
13185 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13186 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
13187 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13188 ; GFX90A-NEXT: ;;#ASMSTART
13189 ; GFX90A-NEXT: ; use s[8:9]
13190 ; GFX90A-NEXT: ;;#ASMEND
13191 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13193 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_5_5:
13195 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13196 ; GFX940-NEXT: ;;#ASMSTART
13197 ; GFX940-NEXT: ; def s[0:1]
13198 ; GFX940-NEXT: ;;#ASMEND
13199 ; GFX940-NEXT: ;;#ASMSTART
13200 ; GFX940-NEXT: ; def s[2:3]
13201 ; GFX940-NEXT: ;;#ASMEND
13202 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13203 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
13204 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
13205 ; GFX940-NEXT: ;;#ASMSTART
13206 ; GFX940-NEXT: ; use s[8:9]
13207 ; GFX940-NEXT: ;;#ASMEND
13208 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13209 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13210 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13211 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
13212 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13213 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13217 define void @s_shuffle_v3bf16_v4bf16__2_5_5() {
13218 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_5_5:
13220 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13221 ; GFX900-NEXT: ;;#ASMSTART
13222 ; GFX900-NEXT: ; def s[6:7]
13223 ; GFX900-NEXT: ;;#ASMEND
13224 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13225 ; GFX900-NEXT: ;;#ASMSTART
13226 ; GFX900-NEXT: ; def s[4:5]
13227 ; GFX900-NEXT: ;;#ASMEND
13228 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
13229 ; GFX900-NEXT: ;;#ASMSTART
13230 ; GFX900-NEXT: ; use s[8:9]
13231 ; GFX900-NEXT: ;;#ASMEND
13232 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13234 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_5_5:
13236 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13237 ; GFX90A-NEXT: ;;#ASMSTART
13238 ; GFX90A-NEXT: ; def s[6:7]
13239 ; GFX90A-NEXT: ;;#ASMEND
13240 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13241 ; GFX90A-NEXT: ;;#ASMSTART
13242 ; GFX90A-NEXT: ; def s[4:5]
13243 ; GFX90A-NEXT: ;;#ASMEND
13244 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
13245 ; GFX90A-NEXT: ;;#ASMSTART
13246 ; GFX90A-NEXT: ; use s[8:9]
13247 ; GFX90A-NEXT: ;;#ASMEND
13248 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13250 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_5_5:
13252 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13253 ; GFX940-NEXT: ;;#ASMSTART
13254 ; GFX940-NEXT: ; def s[2:3]
13255 ; GFX940-NEXT: ;;#ASMEND
13256 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13257 ; GFX940-NEXT: ;;#ASMSTART
13258 ; GFX940-NEXT: ; def s[0:1]
13259 ; GFX940-NEXT: ;;#ASMEND
13260 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
13261 ; GFX940-NEXT: ;;#ASMSTART
13262 ; GFX940-NEXT: ; use s[8:9]
13263 ; GFX940-NEXT: ;;#ASMEND
13264 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13265 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13266 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13267 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
13268 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13269 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13273 define void @s_shuffle_v3bf16_v4bf16__3_5_5() {
13274 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_5_5:
13276 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13277 ; GFX900-NEXT: ;;#ASMSTART
13278 ; GFX900-NEXT: ; def s[4:5]
13279 ; GFX900-NEXT: ;;#ASMEND
13280 ; GFX900-NEXT: ;;#ASMSTART
13281 ; GFX900-NEXT: ; def s[6:7]
13282 ; GFX900-NEXT: ;;#ASMEND
13283 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13284 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
13285 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13286 ; GFX900-NEXT: ;;#ASMSTART
13287 ; GFX900-NEXT: ; use s[8:9]
13288 ; GFX900-NEXT: ;;#ASMEND
13289 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13291 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_5_5:
13293 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13294 ; GFX90A-NEXT: ;;#ASMSTART
13295 ; GFX90A-NEXT: ; def s[4:5]
13296 ; GFX90A-NEXT: ;;#ASMEND
13297 ; GFX90A-NEXT: ;;#ASMSTART
13298 ; GFX90A-NEXT: ; def s[6:7]
13299 ; GFX90A-NEXT: ;;#ASMEND
13300 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13301 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
13302 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13303 ; GFX90A-NEXT: ;;#ASMSTART
13304 ; GFX90A-NEXT: ; use s[8:9]
13305 ; GFX90A-NEXT: ;;#ASMEND
13306 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13308 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_5_5:
13310 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13311 ; GFX940-NEXT: ;;#ASMSTART
13312 ; GFX940-NEXT: ; def s[0:1]
13313 ; GFX940-NEXT: ;;#ASMEND
13314 ; GFX940-NEXT: ;;#ASMSTART
13315 ; GFX940-NEXT: ; def s[2:3]
13316 ; GFX940-NEXT: ;;#ASMEND
13317 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13318 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
13319 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
13320 ; GFX940-NEXT: ;;#ASMSTART
13321 ; GFX940-NEXT: ; use s[8:9]
13322 ; GFX940-NEXT: ;;#ASMEND
13323 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13324 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13325 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13326 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
13327 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13328 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13332 define void @s_shuffle_v3bf16_v4bf16__4_5_5() {
13333 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__4_5_5:
13335 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13336 ; GFX9-NEXT: ;;#ASMSTART
13337 ; GFX9-NEXT: ; def s[8:9]
13338 ; GFX9-NEXT: ;;#ASMEND
13339 ; GFX9-NEXT: s_lshr_b32 s9, s8, 16
13340 ; GFX9-NEXT: ;;#ASMSTART
13341 ; GFX9-NEXT: ; use s[8:9]
13342 ; GFX9-NEXT: ;;#ASMEND
13343 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13344 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13345 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13346 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
13347 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13348 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13352 define void @s_shuffle_v3bf16_v4bf16__5_5_5() {
13353 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_5_5:
13355 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13356 ; GFX900-NEXT: ;;#ASMSTART
13357 ; GFX900-NEXT: ; def s[4:5]
13358 ; GFX900-NEXT: ;;#ASMEND
13359 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
13360 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
13361 ; GFX900-NEXT: ;;#ASMSTART
13362 ; GFX900-NEXT: ; use s[8:9]
13363 ; GFX900-NEXT: ;;#ASMEND
13364 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13366 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_5_5:
13368 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13369 ; GFX90A-NEXT: ;;#ASMSTART
13370 ; GFX90A-NEXT: ; def s[4:5]
13371 ; GFX90A-NEXT: ;;#ASMEND
13372 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
13373 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
13374 ; GFX90A-NEXT: ;;#ASMSTART
13375 ; GFX90A-NEXT: ; use s[8:9]
13376 ; GFX90A-NEXT: ;;#ASMEND
13377 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13379 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_5_5:
13381 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13382 ; GFX940-NEXT: ;;#ASMSTART
13383 ; GFX940-NEXT: ; def s[0:1]
13384 ; GFX940-NEXT: ;;#ASMEND
13385 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
13386 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
13387 ; GFX940-NEXT: ;;#ASMSTART
13388 ; GFX940-NEXT: ; use s[8:9]
13389 ; GFX940-NEXT: ;;#ASMEND
13390 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13391 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13392 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13393 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
13394 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13395 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13399 define void @s_shuffle_v3bf16_v4bf16__6_5_5() {
13400 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_5_5:
13402 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13403 ; GFX900-NEXT: ;;#ASMSTART
13404 ; GFX900-NEXT: ; def s[4:5]
13405 ; GFX900-NEXT: ;;#ASMEND
13406 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
13407 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
13408 ; GFX900-NEXT: ;;#ASMSTART
13409 ; GFX900-NEXT: ; use s[8:9]
13410 ; GFX900-NEXT: ;;#ASMEND
13411 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13413 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_5_5:
13415 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13416 ; GFX90A-NEXT: ;;#ASMSTART
13417 ; GFX90A-NEXT: ; def s[4:5]
13418 ; GFX90A-NEXT: ;;#ASMEND
13419 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
13420 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
13421 ; GFX90A-NEXT: ;;#ASMSTART
13422 ; GFX90A-NEXT: ; use s[8:9]
13423 ; GFX90A-NEXT: ;;#ASMEND
13424 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13426 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_5_5:
13428 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13429 ; GFX940-NEXT: ;;#ASMSTART
13430 ; GFX940-NEXT: ; def s[0:1]
13431 ; GFX940-NEXT: ;;#ASMEND
13432 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
13433 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
13434 ; GFX940-NEXT: ;;#ASMSTART
13435 ; GFX940-NEXT: ; use s[8:9]
13436 ; GFX940-NEXT: ;;#ASMEND
13437 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13438 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13439 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13440 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
13441 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13442 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13446 define void @s_shuffle_v3bf16_v4bf16__7_5_5() {
13447 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_5:
13449 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13450 ; GFX900-NEXT: ;;#ASMSTART
13451 ; GFX900-NEXT: ; def s[4:5]
13452 ; GFX900-NEXT: ;;#ASMEND
13453 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
13454 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
13455 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13456 ; GFX900-NEXT: ;;#ASMSTART
13457 ; GFX900-NEXT: ; use s[8:9]
13458 ; GFX900-NEXT: ;;#ASMEND
13459 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13461 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_5:
13463 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13464 ; GFX90A-NEXT: ;;#ASMSTART
13465 ; GFX90A-NEXT: ; def s[4:5]
13466 ; GFX90A-NEXT: ;;#ASMEND
13467 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
13468 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
13469 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13470 ; GFX90A-NEXT: ;;#ASMSTART
13471 ; GFX90A-NEXT: ; use s[8:9]
13472 ; GFX90A-NEXT: ;;#ASMEND
13473 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13475 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_5:
13477 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13478 ; GFX940-NEXT: ;;#ASMSTART
13479 ; GFX940-NEXT: ; def s[0:1]
13480 ; GFX940-NEXT: ;;#ASMEND
13481 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
13482 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
13483 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
13484 ; GFX940-NEXT: ;;#ASMSTART
13485 ; GFX940-NEXT: ; use s[8:9]
13486 ; GFX940-NEXT: ;;#ASMEND
13487 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13488 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13489 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13490 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
13491 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13492 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13496 define void @s_shuffle_v3bf16_v4bf16__7_u_5() {
13497 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_5:
13499 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13500 ; GFX900-NEXT: ;;#ASMSTART
13501 ; GFX900-NEXT: ; def s[4:5]
13502 ; GFX900-NEXT: ;;#ASMEND
13503 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
13504 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
13505 ; GFX900-NEXT: ;;#ASMSTART
13506 ; GFX900-NEXT: ; use s[8:9]
13507 ; GFX900-NEXT: ;;#ASMEND
13508 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13510 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_5:
13512 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13513 ; GFX90A-NEXT: ;;#ASMSTART
13514 ; GFX90A-NEXT: ; def s[4:5]
13515 ; GFX90A-NEXT: ;;#ASMEND
13516 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
13517 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
13518 ; GFX90A-NEXT: ;;#ASMSTART
13519 ; GFX90A-NEXT: ; use s[8:9]
13520 ; GFX90A-NEXT: ;;#ASMEND
13521 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13523 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_5:
13525 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13526 ; GFX940-NEXT: ;;#ASMSTART
13527 ; GFX940-NEXT: ; def s[0:1]
13528 ; GFX940-NEXT: ;;#ASMEND
13529 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
13530 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
13531 ; GFX940-NEXT: ;;#ASMSTART
13532 ; GFX940-NEXT: ; use s[8:9]
13533 ; GFX940-NEXT: ;;#ASMEND
13534 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13535 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13536 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13537 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
13538 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13539 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13543 define void @s_shuffle_v3bf16_v4bf16__7_0_5() {
13544 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_5:
13546 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13547 ; GFX900-NEXT: ;;#ASMSTART
13548 ; GFX900-NEXT: ; def s[4:5]
13549 ; GFX900-NEXT: ;;#ASMEND
13550 ; GFX900-NEXT: ;;#ASMSTART
13551 ; GFX900-NEXT: ; def s[6:7]
13552 ; GFX900-NEXT: ;;#ASMEND
13553 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
13554 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13555 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13556 ; GFX900-NEXT: ;;#ASMSTART
13557 ; GFX900-NEXT: ; use s[8:9]
13558 ; GFX900-NEXT: ;;#ASMEND
13559 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13561 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_5:
13563 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13564 ; GFX90A-NEXT: ;;#ASMSTART
13565 ; GFX90A-NEXT: ; def s[4:5]
13566 ; GFX90A-NEXT: ;;#ASMEND
13567 ; GFX90A-NEXT: ;;#ASMSTART
13568 ; GFX90A-NEXT: ; def s[6:7]
13569 ; GFX90A-NEXT: ;;#ASMEND
13570 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
13571 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13572 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13573 ; GFX90A-NEXT: ;;#ASMSTART
13574 ; GFX90A-NEXT: ; use s[8:9]
13575 ; GFX90A-NEXT: ;;#ASMEND
13576 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13578 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_5:
13580 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13581 ; GFX940-NEXT: ;;#ASMSTART
13582 ; GFX940-NEXT: ; def s[0:1]
13583 ; GFX940-NEXT: ;;#ASMEND
13584 ; GFX940-NEXT: ;;#ASMSTART
13585 ; GFX940-NEXT: ; def s[2:3]
13586 ; GFX940-NEXT: ;;#ASMEND
13587 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
13588 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
13589 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13590 ; GFX940-NEXT: ;;#ASMSTART
13591 ; GFX940-NEXT: ; use s[8:9]
13592 ; GFX940-NEXT: ;;#ASMEND
13593 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13594 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13595 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13596 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
13597 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13598 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13602 define void @s_shuffle_v3bf16_v4bf16__7_1_5() {
13603 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_5:
13605 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13606 ; GFX900-NEXT: ;;#ASMSTART
13607 ; GFX900-NEXT: ; def s[4:5]
13608 ; GFX900-NEXT: ;;#ASMEND
13609 ; GFX900-NEXT: ;;#ASMSTART
13610 ; GFX900-NEXT: ; def s[6:7]
13611 ; GFX900-NEXT: ;;#ASMEND
13612 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
13613 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
13614 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13615 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13616 ; GFX900-NEXT: ;;#ASMSTART
13617 ; GFX900-NEXT: ; use s[8:9]
13618 ; GFX900-NEXT: ;;#ASMEND
13619 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13621 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_5:
13623 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13624 ; GFX90A-NEXT: ;;#ASMSTART
13625 ; GFX90A-NEXT: ; def s[4:5]
13626 ; GFX90A-NEXT: ;;#ASMEND
13627 ; GFX90A-NEXT: ;;#ASMSTART
13628 ; GFX90A-NEXT: ; def s[6:7]
13629 ; GFX90A-NEXT: ;;#ASMEND
13630 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
13631 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
13632 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13633 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13634 ; GFX90A-NEXT: ;;#ASMSTART
13635 ; GFX90A-NEXT: ; use s[8:9]
13636 ; GFX90A-NEXT: ;;#ASMEND
13637 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13639 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_5:
13641 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13642 ; GFX940-NEXT: ;;#ASMSTART
13643 ; GFX940-NEXT: ; def s[0:1]
13644 ; GFX940-NEXT: ;;#ASMEND
13645 ; GFX940-NEXT: ;;#ASMSTART
13646 ; GFX940-NEXT: ; def s[2:3]
13647 ; GFX940-NEXT: ;;#ASMEND
13648 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
13649 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
13650 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
13651 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13652 ; GFX940-NEXT: ;;#ASMSTART
13653 ; GFX940-NEXT: ; use s[8:9]
13654 ; GFX940-NEXT: ;;#ASMEND
13655 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13656 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13657 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13658 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
13659 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13660 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13664 define void @s_shuffle_v3bf16_v4bf16__7_2_5() {
13665 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_5:
13667 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13668 ; GFX900-NEXT: ;;#ASMSTART
13669 ; GFX900-NEXT: ; def s[4:5]
13670 ; GFX900-NEXT: ;;#ASMEND
13671 ; GFX900-NEXT: ;;#ASMSTART
13672 ; GFX900-NEXT: ; def s[6:7]
13673 ; GFX900-NEXT: ;;#ASMEND
13674 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
13675 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
13676 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13677 ; GFX900-NEXT: ;;#ASMSTART
13678 ; GFX900-NEXT: ; use s[8:9]
13679 ; GFX900-NEXT: ;;#ASMEND
13680 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13682 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_5:
13684 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13685 ; GFX90A-NEXT: ;;#ASMSTART
13686 ; GFX90A-NEXT: ; def s[4:5]
13687 ; GFX90A-NEXT: ;;#ASMEND
13688 ; GFX90A-NEXT: ;;#ASMSTART
13689 ; GFX90A-NEXT: ; def s[6:7]
13690 ; GFX90A-NEXT: ;;#ASMEND
13691 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
13692 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
13693 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13694 ; GFX90A-NEXT: ;;#ASMSTART
13695 ; GFX90A-NEXT: ; use s[8:9]
13696 ; GFX90A-NEXT: ;;#ASMEND
13697 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13699 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_5:
13701 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13702 ; GFX940-NEXT: ;;#ASMSTART
13703 ; GFX940-NEXT: ; def s[0:1]
13704 ; GFX940-NEXT: ;;#ASMEND
13705 ; GFX940-NEXT: ;;#ASMSTART
13706 ; GFX940-NEXT: ; def s[2:3]
13707 ; GFX940-NEXT: ;;#ASMEND
13708 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
13709 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
13710 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13711 ; GFX940-NEXT: ;;#ASMSTART
13712 ; GFX940-NEXT: ; use s[8:9]
13713 ; GFX940-NEXT: ;;#ASMEND
13714 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13715 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13716 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13717 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
13718 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13719 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13723 define void @s_shuffle_v3bf16_v4bf16__7_3_5() {
13724 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_5:
13726 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13727 ; GFX900-NEXT: ;;#ASMSTART
13728 ; GFX900-NEXT: ; def s[4:5]
13729 ; GFX900-NEXT: ;;#ASMEND
13730 ; GFX900-NEXT: ;;#ASMSTART
13731 ; GFX900-NEXT: ; def s[6:7]
13732 ; GFX900-NEXT: ;;#ASMEND
13733 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
13734 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
13735 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13736 ; GFX900-NEXT: s_lshr_b32 s9, s6, 16
13737 ; GFX900-NEXT: ;;#ASMSTART
13738 ; GFX900-NEXT: ; use s[8:9]
13739 ; GFX900-NEXT: ;;#ASMEND
13740 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13742 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_5:
13744 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13745 ; GFX90A-NEXT: ;;#ASMSTART
13746 ; GFX90A-NEXT: ; def s[4:5]
13747 ; GFX90A-NEXT: ;;#ASMEND
13748 ; GFX90A-NEXT: ;;#ASMSTART
13749 ; GFX90A-NEXT: ; def s[6:7]
13750 ; GFX90A-NEXT: ;;#ASMEND
13751 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
13752 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
13753 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13754 ; GFX90A-NEXT: s_lshr_b32 s9, s6, 16
13755 ; GFX90A-NEXT: ;;#ASMSTART
13756 ; GFX90A-NEXT: ; use s[8:9]
13757 ; GFX90A-NEXT: ;;#ASMEND
13758 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13760 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_5:
13762 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13763 ; GFX940-NEXT: ;;#ASMSTART
13764 ; GFX940-NEXT: ; def s[0:1]
13765 ; GFX940-NEXT: ;;#ASMEND
13766 ; GFX940-NEXT: ;;#ASMSTART
13767 ; GFX940-NEXT: ; def s[2:3]
13768 ; GFX940-NEXT: ;;#ASMEND
13769 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
13770 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
13771 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
13772 ; GFX940-NEXT: s_lshr_b32 s9, s2, 16
13773 ; GFX940-NEXT: ;;#ASMSTART
13774 ; GFX940-NEXT: ; use s[8:9]
13775 ; GFX940-NEXT: ;;#ASMEND
13776 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13777 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13778 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13779 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
13780 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13781 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13785 define void @s_shuffle_v3bf16_v4bf16__7_4_5() {
13786 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_5:
13788 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13789 ; GFX900-NEXT: ;;#ASMSTART
13790 ; GFX900-NEXT: ; def s[4:5]
13791 ; GFX900-NEXT: ;;#ASMEND
13792 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
13793 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13794 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
13795 ; GFX900-NEXT: ;;#ASMSTART
13796 ; GFX900-NEXT: ; use s[8:9]
13797 ; GFX900-NEXT: ;;#ASMEND
13798 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13800 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_5:
13802 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13803 ; GFX90A-NEXT: ;;#ASMSTART
13804 ; GFX90A-NEXT: ; def s[4:5]
13805 ; GFX90A-NEXT: ;;#ASMEND
13806 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
13807 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
13808 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
13809 ; GFX90A-NEXT: ;;#ASMSTART
13810 ; GFX90A-NEXT: ; use s[8:9]
13811 ; GFX90A-NEXT: ;;#ASMEND
13812 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13814 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_5:
13816 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13817 ; GFX940-NEXT: ;;#ASMSTART
13818 ; GFX940-NEXT: ; def s[0:1]
13819 ; GFX940-NEXT: ;;#ASMEND
13820 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
13821 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
13822 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
13823 ; GFX940-NEXT: ;;#ASMSTART
13824 ; GFX940-NEXT: ; use s[8:9]
13825 ; GFX940-NEXT: ;;#ASMEND
13826 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13827 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13828 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13829 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
13830 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13831 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13835 define void @s_shuffle_v3bf16_v4bf16__7_6_5() {
13836 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_5:
13838 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13839 ; GFX900-NEXT: ;;#ASMSTART
13840 ; GFX900-NEXT: ; def s[4:5]
13841 ; GFX900-NEXT: ;;#ASMEND
13842 ; GFX900-NEXT: s_lshr_b32 s6, s5, 16
13843 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
13844 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
13845 ; GFX900-NEXT: ;;#ASMSTART
13846 ; GFX900-NEXT: ; use s[8:9]
13847 ; GFX900-NEXT: ;;#ASMEND
13848 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13850 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_5:
13852 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13853 ; GFX90A-NEXT: ;;#ASMSTART
13854 ; GFX90A-NEXT: ; def s[4:5]
13855 ; GFX90A-NEXT: ;;#ASMEND
13856 ; GFX90A-NEXT: s_lshr_b32 s6, s5, 16
13857 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
13858 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
13859 ; GFX90A-NEXT: ;;#ASMSTART
13860 ; GFX90A-NEXT: ; use s[8:9]
13861 ; GFX90A-NEXT: ;;#ASMEND
13862 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13864 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_5:
13866 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13867 ; GFX940-NEXT: ;;#ASMSTART
13868 ; GFX940-NEXT: ; def s[0:1]
13869 ; GFX940-NEXT: ;;#ASMEND
13870 ; GFX940-NEXT: s_lshr_b32 s2, s1, 16
13871 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
13872 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
13873 ; GFX940-NEXT: ;;#ASMSTART
13874 ; GFX940-NEXT: ; use s[8:9]
13875 ; GFX940-NEXT: ;;#ASMEND
13876 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13877 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13878 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13879 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
13880 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13881 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13885 define void @s_shuffle_v3bf16_v4bf16__u_6_6() {
13886 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__u_6_6:
13888 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13889 ; GFX9-NEXT: ;;#ASMSTART
13890 ; GFX9-NEXT: ; def s[8:9]
13891 ; GFX9-NEXT: ;;#ASMEND
13892 ; GFX9-NEXT: s_lshl_b32 s8, s9, 16
13893 ; GFX9-NEXT: ;;#ASMSTART
13894 ; GFX9-NEXT: ; use s[8:9]
13895 ; GFX9-NEXT: ;;#ASMEND
13896 ; GFX9-NEXT: s_setpc_b64 s[30:31]
13897 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13898 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13899 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
13900 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13901 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13905 define void @s_shuffle_v3bf16_v4bf16__0_6_6() {
13906 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__0_6_6:
13908 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13909 ; GFX900-NEXT: ;;#ASMSTART
13910 ; GFX900-NEXT: ; def s[8:9]
13911 ; GFX900-NEXT: ;;#ASMEND
13912 ; GFX900-NEXT: ;;#ASMSTART
13913 ; GFX900-NEXT: ; def s[4:5]
13914 ; GFX900-NEXT: ;;#ASMEND
13915 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13916 ; GFX900-NEXT: ;;#ASMSTART
13917 ; GFX900-NEXT: ; use s[8:9]
13918 ; GFX900-NEXT: ;;#ASMEND
13919 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13921 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__0_6_6:
13923 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13924 ; GFX90A-NEXT: ;;#ASMSTART
13925 ; GFX90A-NEXT: ; def s[8:9]
13926 ; GFX90A-NEXT: ;;#ASMEND
13927 ; GFX90A-NEXT: ;;#ASMSTART
13928 ; GFX90A-NEXT: ; def s[4:5]
13929 ; GFX90A-NEXT: ;;#ASMEND
13930 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13931 ; GFX90A-NEXT: ;;#ASMSTART
13932 ; GFX90A-NEXT: ; use s[8:9]
13933 ; GFX90A-NEXT: ;;#ASMEND
13934 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13936 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_6_6:
13938 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13939 ; GFX940-NEXT: ;;#ASMSTART
13940 ; GFX940-NEXT: ; def s[8:9]
13941 ; GFX940-NEXT: ;;#ASMEND
13942 ; GFX940-NEXT: ;;#ASMSTART
13943 ; GFX940-NEXT: ; def s[0:1]
13944 ; GFX940-NEXT: ;;#ASMEND
13945 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
13946 ; GFX940-NEXT: ;;#ASMSTART
13947 ; GFX940-NEXT: ; use s[8:9]
13948 ; GFX940-NEXT: ;;#ASMEND
13949 ; GFX940-NEXT: s_setpc_b64 s[30:31]
13950 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
13951 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
13952 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
13953 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13954 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
13958 define void @s_shuffle_v3bf16_v4bf16__1_6_6() {
13959 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_6_6:
13961 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13962 ; GFX900-NEXT: ;;#ASMSTART
13963 ; GFX900-NEXT: ; def s[4:5]
13964 ; GFX900-NEXT: ;;#ASMEND
13965 ; GFX900-NEXT: ;;#ASMSTART
13966 ; GFX900-NEXT: ; def s[8:9]
13967 ; GFX900-NEXT: ;;#ASMEND
13968 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
13969 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13970 ; GFX900-NEXT: ;;#ASMSTART
13971 ; GFX900-NEXT: ; use s[8:9]
13972 ; GFX900-NEXT: ;;#ASMEND
13973 ; GFX900-NEXT: s_setpc_b64 s[30:31]
13975 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_6_6:
13977 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13978 ; GFX90A-NEXT: ;;#ASMSTART
13979 ; GFX90A-NEXT: ; def s[4:5]
13980 ; GFX90A-NEXT: ;;#ASMEND
13981 ; GFX90A-NEXT: ;;#ASMSTART
13982 ; GFX90A-NEXT: ; def s[8:9]
13983 ; GFX90A-NEXT: ;;#ASMEND
13984 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
13985 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
13986 ; GFX90A-NEXT: ;;#ASMSTART
13987 ; GFX90A-NEXT: ; use s[8:9]
13988 ; GFX90A-NEXT: ;;#ASMEND
13989 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
13991 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_6_6:
13993 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13994 ; GFX940-NEXT: ;;#ASMSTART
13995 ; GFX940-NEXT: ; def s[0:1]
13996 ; GFX940-NEXT: ;;#ASMEND
13997 ; GFX940-NEXT: ;;#ASMSTART
13998 ; GFX940-NEXT: ; def s[8:9]
13999 ; GFX940-NEXT: ;;#ASMEND
14000 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
14001 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14002 ; GFX940-NEXT: ;;#ASMSTART
14003 ; GFX940-NEXT: ; use s[8:9]
14004 ; GFX940-NEXT: ;;#ASMEND
14005 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14006 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14007 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14008 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
14009 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14010 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14014 define void @s_shuffle_v3bf16_v4bf16__2_6_6() {
14015 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_6_6:
14017 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14018 ; GFX900-NEXT: ;;#ASMSTART
14019 ; GFX900-NEXT: ; def s[8:9]
14020 ; GFX900-NEXT: ;;#ASMEND
14021 ; GFX900-NEXT: ;;#ASMSTART
14022 ; GFX900-NEXT: ; def s[4:5]
14023 ; GFX900-NEXT: ;;#ASMEND
14024 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
14025 ; GFX900-NEXT: ;;#ASMSTART
14026 ; GFX900-NEXT: ; use s[8:9]
14027 ; GFX900-NEXT: ;;#ASMEND
14028 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14030 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_6_6:
14032 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14033 ; GFX90A-NEXT: ;;#ASMSTART
14034 ; GFX90A-NEXT: ; def s[8:9]
14035 ; GFX90A-NEXT: ;;#ASMEND
14036 ; GFX90A-NEXT: ;;#ASMSTART
14037 ; GFX90A-NEXT: ; def s[4:5]
14038 ; GFX90A-NEXT: ;;#ASMEND
14039 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
14040 ; GFX90A-NEXT: ;;#ASMSTART
14041 ; GFX90A-NEXT: ; use s[8:9]
14042 ; GFX90A-NEXT: ;;#ASMEND
14043 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14045 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_6_6:
14047 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14048 ; GFX940-NEXT: ;;#ASMSTART
14049 ; GFX940-NEXT: ; def s[8:9]
14050 ; GFX940-NEXT: ;;#ASMEND
14051 ; GFX940-NEXT: ;;#ASMSTART
14052 ; GFX940-NEXT: ; def s[0:1]
14053 ; GFX940-NEXT: ;;#ASMEND
14054 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
14055 ; GFX940-NEXT: ;;#ASMSTART
14056 ; GFX940-NEXT: ; use s[8:9]
14057 ; GFX940-NEXT: ;;#ASMEND
14058 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14059 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14060 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14061 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
14062 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14063 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14067 define void @s_shuffle_v3bf16_v4bf16__3_6_6() {
14068 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_6_6:
14070 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14071 ; GFX900-NEXT: ;;#ASMSTART
14072 ; GFX900-NEXT: ; def s[4:5]
14073 ; GFX900-NEXT: ;;#ASMEND
14074 ; GFX900-NEXT: ;;#ASMSTART
14075 ; GFX900-NEXT: ; def s[8:9]
14076 ; GFX900-NEXT: ;;#ASMEND
14077 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
14078 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14079 ; GFX900-NEXT: ;;#ASMSTART
14080 ; GFX900-NEXT: ; use s[8:9]
14081 ; GFX900-NEXT: ;;#ASMEND
14082 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14084 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_6_6:
14086 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14087 ; GFX90A-NEXT: ;;#ASMSTART
14088 ; GFX90A-NEXT: ; def s[4:5]
14089 ; GFX90A-NEXT: ;;#ASMEND
14090 ; GFX90A-NEXT: ;;#ASMSTART
14091 ; GFX90A-NEXT: ; def s[8:9]
14092 ; GFX90A-NEXT: ;;#ASMEND
14093 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
14094 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14095 ; GFX90A-NEXT: ;;#ASMSTART
14096 ; GFX90A-NEXT: ; use s[8:9]
14097 ; GFX90A-NEXT: ;;#ASMEND
14098 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14100 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_6_6:
14102 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14103 ; GFX940-NEXT: ;;#ASMSTART
14104 ; GFX940-NEXT: ; def s[0:1]
14105 ; GFX940-NEXT: ;;#ASMEND
14106 ; GFX940-NEXT: ;;#ASMSTART
14107 ; GFX940-NEXT: ; def s[8:9]
14108 ; GFX940-NEXT: ;;#ASMEND
14109 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
14110 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14111 ; GFX940-NEXT: ;;#ASMSTART
14112 ; GFX940-NEXT: ; use s[8:9]
14113 ; GFX940-NEXT: ;;#ASMEND
14114 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14115 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14116 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14117 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
14118 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14119 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14123 define void @s_shuffle_v3bf16_v4bf16__4_6_6() {
14124 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__4_6_6:
14126 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14127 ; GFX9-NEXT: ;;#ASMSTART
14128 ; GFX9-NEXT: ; def s[8:9]
14129 ; GFX9-NEXT: ;;#ASMEND
14130 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9
14131 ; GFX9-NEXT: ;;#ASMSTART
14132 ; GFX9-NEXT: ; use s[8:9]
14133 ; GFX9-NEXT: ;;#ASMEND
14134 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14135 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14136 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14137 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
14138 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14139 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14143 define void @s_shuffle_v3bf16_v4bf16__5_6_6() {
14144 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_6_6:
14146 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14147 ; GFX900-NEXT: ;;#ASMSTART
14148 ; GFX900-NEXT: ; def s[8:9]
14149 ; GFX900-NEXT: ;;#ASMEND
14150 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
14151 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14152 ; GFX900-NEXT: ;;#ASMSTART
14153 ; GFX900-NEXT: ; use s[8:9]
14154 ; GFX900-NEXT: ;;#ASMEND
14155 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14157 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_6_6:
14159 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14160 ; GFX90A-NEXT: ;;#ASMSTART
14161 ; GFX90A-NEXT: ; def s[8:9]
14162 ; GFX90A-NEXT: ;;#ASMEND
14163 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
14164 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14165 ; GFX90A-NEXT: ;;#ASMSTART
14166 ; GFX90A-NEXT: ; use s[8:9]
14167 ; GFX90A-NEXT: ;;#ASMEND
14168 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14170 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_6_6:
14172 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14173 ; GFX940-NEXT: ;;#ASMSTART
14174 ; GFX940-NEXT: ; def s[8:9]
14175 ; GFX940-NEXT: ;;#ASMEND
14176 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
14177 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14178 ; GFX940-NEXT: ;;#ASMSTART
14179 ; GFX940-NEXT: ; use s[8:9]
14180 ; GFX940-NEXT: ;;#ASMEND
14181 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14182 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14183 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14184 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
14185 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14186 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14190 define void @s_shuffle_v3bf16_v4bf16__6_6_6() {
14191 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__6_6_6:
14193 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14194 ; GFX9-NEXT: ;;#ASMSTART
14195 ; GFX9-NEXT: ; def s[8:9]
14196 ; GFX9-NEXT: ;;#ASMEND
14197 ; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9
14198 ; GFX9-NEXT: ;;#ASMSTART
14199 ; GFX9-NEXT: ; use s[8:9]
14200 ; GFX9-NEXT: ;;#ASMEND
14201 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14202 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14203 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14204 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
14205 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14206 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14210 define void @s_shuffle_v3bf16_v4bf16__7_6_6() {
14211 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_6:
14213 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14214 ; GFX900-NEXT: ;;#ASMSTART
14215 ; GFX900-NEXT: ; def s[8:9]
14216 ; GFX900-NEXT: ;;#ASMEND
14217 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
14218 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14219 ; GFX900-NEXT: ;;#ASMSTART
14220 ; GFX900-NEXT: ; use s[8:9]
14221 ; GFX900-NEXT: ;;#ASMEND
14222 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14224 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_6:
14226 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14227 ; GFX90A-NEXT: ;;#ASMSTART
14228 ; GFX90A-NEXT: ; def s[8:9]
14229 ; GFX90A-NEXT: ;;#ASMEND
14230 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
14231 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14232 ; GFX90A-NEXT: ;;#ASMSTART
14233 ; GFX90A-NEXT: ; use s[8:9]
14234 ; GFX90A-NEXT: ;;#ASMEND
14235 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14237 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_6:
14239 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14240 ; GFX940-NEXT: ;;#ASMSTART
14241 ; GFX940-NEXT: ; def s[8:9]
14242 ; GFX940-NEXT: ;;#ASMEND
14243 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
14244 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14245 ; GFX940-NEXT: ;;#ASMSTART
14246 ; GFX940-NEXT: ; use s[8:9]
14247 ; GFX940-NEXT: ;;#ASMEND
14248 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14249 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14250 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14251 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
14252 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14253 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14257 define void @s_shuffle_v3bf16_v4bf16__7_u_6() {
14258 ; GFX9-LABEL: s_shuffle_v3bf16_v4bf16__7_u_6:
14260 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14261 ; GFX9-NEXT: ;;#ASMSTART
14262 ; GFX9-NEXT: ; def s[8:9]
14263 ; GFX9-NEXT: ;;#ASMEND
14264 ; GFX9-NEXT: s_lshr_b32 s8, s9, 16
14265 ; GFX9-NEXT: ;;#ASMSTART
14266 ; GFX9-NEXT: ; use s[8:9]
14267 ; GFX9-NEXT: ;;#ASMEND
14268 ; GFX9-NEXT: s_setpc_b64 s[30:31]
14269 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14270 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14271 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
14272 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14273 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14277 define void @s_shuffle_v3bf16_v4bf16__7_0_6() {
14278 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_6:
14280 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14281 ; GFX900-NEXT: ;;#ASMSTART
14282 ; GFX900-NEXT: ; def s[4:5]
14283 ; GFX900-NEXT: ;;#ASMEND
14284 ; GFX900-NEXT: ;;#ASMSTART
14285 ; GFX900-NEXT: ; def s[8:9]
14286 ; GFX900-NEXT: ;;#ASMEND
14287 ; GFX900-NEXT: s_lshr_b32 s5, s9, 16
14288 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14289 ; GFX900-NEXT: ;;#ASMSTART
14290 ; GFX900-NEXT: ; use s[8:9]
14291 ; GFX900-NEXT: ;;#ASMEND
14292 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14294 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_6:
14296 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14297 ; GFX90A-NEXT: ;;#ASMSTART
14298 ; GFX90A-NEXT: ; def s[4:5]
14299 ; GFX90A-NEXT: ;;#ASMEND
14300 ; GFX90A-NEXT: ;;#ASMSTART
14301 ; GFX90A-NEXT: ; def s[8:9]
14302 ; GFX90A-NEXT: ;;#ASMEND
14303 ; GFX90A-NEXT: s_lshr_b32 s5, s9, 16
14304 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14305 ; GFX90A-NEXT: ;;#ASMSTART
14306 ; GFX90A-NEXT: ; use s[8:9]
14307 ; GFX90A-NEXT: ;;#ASMEND
14308 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14310 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_6:
14312 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14313 ; GFX940-NEXT: ;;#ASMSTART
14314 ; GFX940-NEXT: ; def s[0:1]
14315 ; GFX940-NEXT: ;;#ASMEND
14316 ; GFX940-NEXT: ;;#ASMSTART
14317 ; GFX940-NEXT: ; def s[8:9]
14318 ; GFX940-NEXT: ;;#ASMEND
14319 ; GFX940-NEXT: s_lshr_b32 s1, s9, 16
14320 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
14321 ; GFX940-NEXT: ;;#ASMSTART
14322 ; GFX940-NEXT: ; use s[8:9]
14323 ; GFX940-NEXT: ;;#ASMEND
14324 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14325 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14326 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14327 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 6>
14328 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14329 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14333 define void @s_shuffle_v3bf16_v4bf16__7_1_6() {
14334 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_6:
14336 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14337 ; GFX900-NEXT: ;;#ASMSTART
14338 ; GFX900-NEXT: ; def s[4:5]
14339 ; GFX900-NEXT: ;;#ASMEND
14340 ; GFX900-NEXT: ;;#ASMSTART
14341 ; GFX900-NEXT: ; def s[8:9]
14342 ; GFX900-NEXT: ;;#ASMEND
14343 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
14344 ; GFX900-NEXT: s_lshr_b32 s5, s9, 16
14345 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14346 ; GFX900-NEXT: ;;#ASMSTART
14347 ; GFX900-NEXT: ; use s[8:9]
14348 ; GFX900-NEXT: ;;#ASMEND
14349 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14351 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_6:
14353 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14354 ; GFX90A-NEXT: ;;#ASMSTART
14355 ; GFX90A-NEXT: ; def s[4:5]
14356 ; GFX90A-NEXT: ;;#ASMEND
14357 ; GFX90A-NEXT: ;;#ASMSTART
14358 ; GFX90A-NEXT: ; def s[8:9]
14359 ; GFX90A-NEXT: ;;#ASMEND
14360 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
14361 ; GFX90A-NEXT: s_lshr_b32 s5, s9, 16
14362 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14363 ; GFX90A-NEXT: ;;#ASMSTART
14364 ; GFX90A-NEXT: ; use s[8:9]
14365 ; GFX90A-NEXT: ;;#ASMEND
14366 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14368 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_6:
14370 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14371 ; GFX940-NEXT: ;;#ASMSTART
14372 ; GFX940-NEXT: ; def s[0:1]
14373 ; GFX940-NEXT: ;;#ASMEND
14374 ; GFX940-NEXT: ;;#ASMSTART
14375 ; GFX940-NEXT: ; def s[8:9]
14376 ; GFX940-NEXT: ;;#ASMEND
14377 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
14378 ; GFX940-NEXT: s_lshr_b32 s1, s9, 16
14379 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
14380 ; GFX940-NEXT: ;;#ASMSTART
14381 ; GFX940-NEXT: ; use s[8:9]
14382 ; GFX940-NEXT: ;;#ASMEND
14383 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14384 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14385 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14386 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 6>
14387 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14388 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14392 define void @s_shuffle_v3bf16_v4bf16__7_2_6() {
14393 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_6:
14395 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14396 ; GFX900-NEXT: ;;#ASMSTART
14397 ; GFX900-NEXT: ; def s[4:5]
14398 ; GFX900-NEXT: ;;#ASMEND
14399 ; GFX900-NEXT: ;;#ASMSTART
14400 ; GFX900-NEXT: ; def s[8:9]
14401 ; GFX900-NEXT: ;;#ASMEND
14402 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
14403 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
14404 ; GFX900-NEXT: ;;#ASMSTART
14405 ; GFX900-NEXT: ; use s[8:9]
14406 ; GFX900-NEXT: ;;#ASMEND
14407 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14409 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_6:
14411 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14412 ; GFX90A-NEXT: ;;#ASMSTART
14413 ; GFX90A-NEXT: ; def s[4:5]
14414 ; GFX90A-NEXT: ;;#ASMEND
14415 ; GFX90A-NEXT: ;;#ASMSTART
14416 ; GFX90A-NEXT: ; def s[8:9]
14417 ; GFX90A-NEXT: ;;#ASMEND
14418 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
14419 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
14420 ; GFX90A-NEXT: ;;#ASMSTART
14421 ; GFX90A-NEXT: ; use s[8:9]
14422 ; GFX90A-NEXT: ;;#ASMEND
14423 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14425 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_6:
14427 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14428 ; GFX940-NEXT: ;;#ASMSTART
14429 ; GFX940-NEXT: ; def s[0:1]
14430 ; GFX940-NEXT: ;;#ASMEND
14431 ; GFX940-NEXT: ;;#ASMSTART
14432 ; GFX940-NEXT: ; def s[8:9]
14433 ; GFX940-NEXT: ;;#ASMEND
14434 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
14435 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
14436 ; GFX940-NEXT: ;;#ASMSTART
14437 ; GFX940-NEXT: ; use s[8:9]
14438 ; GFX940-NEXT: ;;#ASMEND
14439 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14440 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14441 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14442 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 6>
14443 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14444 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14448 define void @s_shuffle_v3bf16_v4bf16__7_3_6() {
14449 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_6:
14451 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14452 ; GFX900-NEXT: ;;#ASMSTART
14453 ; GFX900-NEXT: ; def s[4:5]
14454 ; GFX900-NEXT: ;;#ASMEND
14455 ; GFX900-NEXT: ;;#ASMSTART
14456 ; GFX900-NEXT: ; def s[8:9]
14457 ; GFX900-NEXT: ;;#ASMEND
14458 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
14459 ; GFX900-NEXT: s_lshr_b32 s5, s9, 16
14460 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14461 ; GFX900-NEXT: ;;#ASMSTART
14462 ; GFX900-NEXT: ; use s[8:9]
14463 ; GFX900-NEXT: ;;#ASMEND
14464 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14466 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_6:
14468 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14469 ; GFX90A-NEXT: ;;#ASMSTART
14470 ; GFX90A-NEXT: ; def s[4:5]
14471 ; GFX90A-NEXT: ;;#ASMEND
14472 ; GFX90A-NEXT: ;;#ASMSTART
14473 ; GFX90A-NEXT: ; def s[8:9]
14474 ; GFX90A-NEXT: ;;#ASMEND
14475 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
14476 ; GFX90A-NEXT: s_lshr_b32 s5, s9, 16
14477 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14478 ; GFX90A-NEXT: ;;#ASMSTART
14479 ; GFX90A-NEXT: ; use s[8:9]
14480 ; GFX90A-NEXT: ;;#ASMEND
14481 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14483 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_6:
14485 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14486 ; GFX940-NEXT: ;;#ASMSTART
14487 ; GFX940-NEXT: ; def s[0:1]
14488 ; GFX940-NEXT: ;;#ASMEND
14489 ; GFX940-NEXT: ;;#ASMSTART
14490 ; GFX940-NEXT: ; def s[8:9]
14491 ; GFX940-NEXT: ;;#ASMEND
14492 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
14493 ; GFX940-NEXT: s_lshr_b32 s1, s9, 16
14494 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
14495 ; GFX940-NEXT: ;;#ASMSTART
14496 ; GFX940-NEXT: ; use s[8:9]
14497 ; GFX940-NEXT: ;;#ASMEND
14498 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14499 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14500 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14501 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 6>
14502 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14503 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14507 define void @s_shuffle_v3bf16_v4bf16__7_4_6() {
14508 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_6:
14510 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14511 ; GFX900-NEXT: ;;#ASMSTART
14512 ; GFX900-NEXT: ; def s[8:9]
14513 ; GFX900-NEXT: ;;#ASMEND
14514 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
14515 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s8
14516 ; GFX900-NEXT: ;;#ASMSTART
14517 ; GFX900-NEXT: ; use s[8:9]
14518 ; GFX900-NEXT: ;;#ASMEND
14519 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14521 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_6:
14523 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14524 ; GFX90A-NEXT: ;;#ASMSTART
14525 ; GFX90A-NEXT: ; def s[8:9]
14526 ; GFX90A-NEXT: ;;#ASMEND
14527 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
14528 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s8
14529 ; GFX90A-NEXT: ;;#ASMSTART
14530 ; GFX90A-NEXT: ; use s[8:9]
14531 ; GFX90A-NEXT: ;;#ASMEND
14532 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14534 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_6:
14536 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14537 ; GFX940-NEXT: ;;#ASMSTART
14538 ; GFX940-NEXT: ; def s[8:9]
14539 ; GFX940-NEXT: ;;#ASMEND
14540 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
14541 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8
14542 ; GFX940-NEXT: ;;#ASMSTART
14543 ; GFX940-NEXT: ; use s[8:9]
14544 ; GFX940-NEXT: ;;#ASMEND
14545 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14546 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14547 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14548 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
14549 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14550 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14554 define void @s_shuffle_v3bf16_v4bf16__7_5_6() {
14555 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_6:
14557 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14558 ; GFX900-NEXT: ;;#ASMSTART
14559 ; GFX900-NEXT: ; def s[8:9]
14560 ; GFX900-NEXT: ;;#ASMEND
14561 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
14562 ; GFX900-NEXT: s_lshr_b32 s5, s9, 16
14563 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14564 ; GFX900-NEXT: ;;#ASMSTART
14565 ; GFX900-NEXT: ; use s[8:9]
14566 ; GFX900-NEXT: ;;#ASMEND
14567 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14569 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_6:
14571 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14572 ; GFX90A-NEXT: ;;#ASMSTART
14573 ; GFX90A-NEXT: ; def s[8:9]
14574 ; GFX90A-NEXT: ;;#ASMEND
14575 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
14576 ; GFX90A-NEXT: s_lshr_b32 s5, s9, 16
14577 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
14578 ; GFX90A-NEXT: ;;#ASMSTART
14579 ; GFX90A-NEXT: ; use s[8:9]
14580 ; GFX90A-NEXT: ;;#ASMEND
14581 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14583 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_6:
14585 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14586 ; GFX940-NEXT: ;;#ASMSTART
14587 ; GFX940-NEXT: ; def s[8:9]
14588 ; GFX940-NEXT: ;;#ASMEND
14589 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
14590 ; GFX940-NEXT: s_lshr_b32 s1, s9, 16
14591 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
14592 ; GFX940-NEXT: ;;#ASMSTART
14593 ; GFX940-NEXT: ; use s[8:9]
14594 ; GFX940-NEXT: ;;#ASMEND
14595 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14596 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14597 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14598 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
14599 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14600 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14604 define void @s_shuffle_v3bf16_v4bf16__u_7_7() {
14605 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__u_7_7:
14607 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14608 ; GFX900-NEXT: ;;#ASMSTART
14609 ; GFX900-NEXT: ; def s[4:5]
14610 ; GFX900-NEXT: ;;#ASMEND
14611 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
14612 ; GFX900-NEXT: s_mov_b32 s8, s5
14613 ; GFX900-NEXT: ;;#ASMSTART
14614 ; GFX900-NEXT: ; use s[8:9]
14615 ; GFX900-NEXT: ;;#ASMEND
14616 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14618 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__u_7_7:
14620 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14621 ; GFX90A-NEXT: ;;#ASMSTART
14622 ; GFX90A-NEXT: ; def s[4:5]
14623 ; GFX90A-NEXT: ;;#ASMEND
14624 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
14625 ; GFX90A-NEXT: s_mov_b32 s8, s5
14626 ; GFX90A-NEXT: ;;#ASMSTART
14627 ; GFX90A-NEXT: ; use s[8:9]
14628 ; GFX90A-NEXT: ;;#ASMEND
14629 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14631 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__u_7_7:
14633 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14634 ; GFX940-NEXT: ;;#ASMSTART
14635 ; GFX940-NEXT: ; def s[0:1]
14636 ; GFX940-NEXT: ;;#ASMEND
14637 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
14638 ; GFX940-NEXT: s_mov_b32 s8, s1
14639 ; GFX940-NEXT: ;;#ASMSTART
14640 ; GFX940-NEXT: ; use s[8:9]
14641 ; GFX940-NEXT: ;;#ASMEND
14642 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14643 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14644 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14645 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
14646 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14647 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14651 define void @s_shuffle_v3bf16_v4bf16__0_7_7() {
14652 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__0_7_7:
14654 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14655 ; GFX900-NEXT: ;;#ASMSTART
14656 ; GFX900-NEXT: ; def s[6:7]
14657 ; GFX900-NEXT: ;;#ASMEND
14658 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
14659 ; GFX900-NEXT: ;;#ASMSTART
14660 ; GFX900-NEXT: ; def s[4:5]
14661 ; GFX900-NEXT: ;;#ASMEND
14662 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14663 ; GFX900-NEXT: ;;#ASMSTART
14664 ; GFX900-NEXT: ; use s[8:9]
14665 ; GFX900-NEXT: ;;#ASMEND
14666 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14668 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__0_7_7:
14670 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14671 ; GFX90A-NEXT: ;;#ASMSTART
14672 ; GFX90A-NEXT: ; def s[6:7]
14673 ; GFX90A-NEXT: ;;#ASMEND
14674 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
14675 ; GFX90A-NEXT: ;;#ASMSTART
14676 ; GFX90A-NEXT: ; def s[4:5]
14677 ; GFX90A-NEXT: ;;#ASMEND
14678 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14679 ; GFX90A-NEXT: ;;#ASMSTART
14680 ; GFX90A-NEXT: ; use s[8:9]
14681 ; GFX90A-NEXT: ;;#ASMEND
14682 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14684 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_7_7:
14686 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14687 ; GFX940-NEXT: ;;#ASMSTART
14688 ; GFX940-NEXT: ; def s[2:3]
14689 ; GFX940-NEXT: ;;#ASMEND
14690 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
14691 ; GFX940-NEXT: ;;#ASMSTART
14692 ; GFX940-NEXT: ; def s[0:1]
14693 ; GFX940-NEXT: ;;#ASMEND
14694 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14695 ; GFX940-NEXT: ;;#ASMSTART
14696 ; GFX940-NEXT: ; use s[8:9]
14697 ; GFX940-NEXT: ;;#ASMEND
14698 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14699 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14700 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14701 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 0, i32 7, i32 7>
14702 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14703 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14707 define void @s_shuffle_v3bf16_v4bf16__1_7_7() {
14708 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__1_7_7:
14710 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14711 ; GFX900-NEXT: ;;#ASMSTART
14712 ; GFX900-NEXT: ; def s[4:5]
14713 ; GFX900-NEXT: ;;#ASMEND
14714 ; GFX900-NEXT: ;;#ASMSTART
14715 ; GFX900-NEXT: ; def s[6:7]
14716 ; GFX900-NEXT: ;;#ASMEND
14717 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
14718 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
14719 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14720 ; GFX900-NEXT: ;;#ASMSTART
14721 ; GFX900-NEXT: ; use s[8:9]
14722 ; GFX900-NEXT: ;;#ASMEND
14723 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14725 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__1_7_7:
14727 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14728 ; GFX90A-NEXT: ;;#ASMSTART
14729 ; GFX90A-NEXT: ; def s[4:5]
14730 ; GFX90A-NEXT: ;;#ASMEND
14731 ; GFX90A-NEXT: ;;#ASMSTART
14732 ; GFX90A-NEXT: ; def s[6:7]
14733 ; GFX90A-NEXT: ;;#ASMEND
14734 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
14735 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
14736 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14737 ; GFX90A-NEXT: ;;#ASMSTART
14738 ; GFX90A-NEXT: ; use s[8:9]
14739 ; GFX90A-NEXT: ;;#ASMEND
14740 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14742 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_7_7:
14744 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14745 ; GFX940-NEXT: ;;#ASMSTART
14746 ; GFX940-NEXT: ; def s[0:1]
14747 ; GFX940-NEXT: ;;#ASMEND
14748 ; GFX940-NEXT: ;;#ASMSTART
14749 ; GFX940-NEXT: ; def s[2:3]
14750 ; GFX940-NEXT: ;;#ASMEND
14751 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
14752 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
14753 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14754 ; GFX940-NEXT: ;;#ASMSTART
14755 ; GFX940-NEXT: ; use s[8:9]
14756 ; GFX940-NEXT: ;;#ASMEND
14757 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14758 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14759 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14760 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 1, i32 7, i32 7>
14761 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14762 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14766 define void @s_shuffle_v3bf16_v4bf16__2_7_7() {
14767 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__2_7_7:
14769 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14770 ; GFX900-NEXT: ;;#ASMSTART
14771 ; GFX900-NEXT: ; def s[6:7]
14772 ; GFX900-NEXT: ;;#ASMEND
14773 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
14774 ; GFX900-NEXT: ;;#ASMSTART
14775 ; GFX900-NEXT: ; def s[4:5]
14776 ; GFX900-NEXT: ;;#ASMEND
14777 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9
14778 ; GFX900-NEXT: ;;#ASMSTART
14779 ; GFX900-NEXT: ; use s[8:9]
14780 ; GFX900-NEXT: ;;#ASMEND
14781 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14783 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__2_7_7:
14785 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14786 ; GFX90A-NEXT: ;;#ASMSTART
14787 ; GFX90A-NEXT: ; def s[6:7]
14788 ; GFX90A-NEXT: ;;#ASMEND
14789 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
14790 ; GFX90A-NEXT: ;;#ASMSTART
14791 ; GFX90A-NEXT: ; def s[4:5]
14792 ; GFX90A-NEXT: ;;#ASMEND
14793 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9
14794 ; GFX90A-NEXT: ;;#ASMSTART
14795 ; GFX90A-NEXT: ; use s[8:9]
14796 ; GFX90A-NEXT: ;;#ASMEND
14797 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14799 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_7_7:
14801 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14802 ; GFX940-NEXT: ;;#ASMSTART
14803 ; GFX940-NEXT: ; def s[2:3]
14804 ; GFX940-NEXT: ;;#ASMEND
14805 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
14806 ; GFX940-NEXT: ;;#ASMSTART
14807 ; GFX940-NEXT: ; def s[0:1]
14808 ; GFX940-NEXT: ;;#ASMEND
14809 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9
14810 ; GFX940-NEXT: ;;#ASMSTART
14811 ; GFX940-NEXT: ; use s[8:9]
14812 ; GFX940-NEXT: ;;#ASMEND
14813 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14814 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14815 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14816 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 2, i32 7, i32 7>
14817 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14818 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14822 define void @s_shuffle_v3bf16_v4bf16__3_7_7() {
14823 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__3_7_7:
14825 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14826 ; GFX900-NEXT: ;;#ASMSTART
14827 ; GFX900-NEXT: ; def s[4:5]
14828 ; GFX900-NEXT: ;;#ASMEND
14829 ; GFX900-NEXT: ;;#ASMSTART
14830 ; GFX900-NEXT: ; def s[6:7]
14831 ; GFX900-NEXT: ;;#ASMEND
14832 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
14833 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
14834 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14835 ; GFX900-NEXT: ;;#ASMSTART
14836 ; GFX900-NEXT: ; use s[8:9]
14837 ; GFX900-NEXT: ;;#ASMEND
14838 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14840 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__3_7_7:
14842 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14843 ; GFX90A-NEXT: ;;#ASMSTART
14844 ; GFX90A-NEXT: ; def s[4:5]
14845 ; GFX90A-NEXT: ;;#ASMEND
14846 ; GFX90A-NEXT: ;;#ASMSTART
14847 ; GFX90A-NEXT: ; def s[6:7]
14848 ; GFX90A-NEXT: ;;#ASMEND
14849 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
14850 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
14851 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14852 ; GFX90A-NEXT: ;;#ASMSTART
14853 ; GFX90A-NEXT: ; use s[8:9]
14854 ; GFX90A-NEXT: ;;#ASMEND
14855 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14857 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_7_7:
14859 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14860 ; GFX940-NEXT: ;;#ASMSTART
14861 ; GFX940-NEXT: ; def s[0:1]
14862 ; GFX940-NEXT: ;;#ASMEND
14863 ; GFX940-NEXT: ;;#ASMSTART
14864 ; GFX940-NEXT: ; def s[2:3]
14865 ; GFX940-NEXT: ;;#ASMEND
14866 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
14867 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
14868 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14869 ; GFX940-NEXT: ;;#ASMSTART
14870 ; GFX940-NEXT: ; use s[8:9]
14871 ; GFX940-NEXT: ;;#ASMEND
14872 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14873 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14874 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14875 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 3, i32 7, i32 7>
14876 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14877 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14881 define void @s_shuffle_v3bf16_v4bf16__4_7_7() {
14882 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__4_7_7:
14884 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14885 ; GFX900-NEXT: ;;#ASMSTART
14886 ; GFX900-NEXT: ; def s[4:5]
14887 ; GFX900-NEXT: ;;#ASMEND
14888 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
14889 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14890 ; GFX900-NEXT: ;;#ASMSTART
14891 ; GFX900-NEXT: ; use s[8:9]
14892 ; GFX900-NEXT: ;;#ASMEND
14893 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14895 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__4_7_7:
14897 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14898 ; GFX90A-NEXT: ;;#ASMSTART
14899 ; GFX90A-NEXT: ; def s[4:5]
14900 ; GFX90A-NEXT: ;;#ASMEND
14901 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
14902 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14903 ; GFX90A-NEXT: ;;#ASMSTART
14904 ; GFX90A-NEXT: ; use s[8:9]
14905 ; GFX90A-NEXT: ;;#ASMEND
14906 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14908 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__4_7_7:
14910 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14911 ; GFX940-NEXT: ;;#ASMSTART
14912 ; GFX940-NEXT: ; def s[0:1]
14913 ; GFX940-NEXT: ;;#ASMEND
14914 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
14915 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14916 ; GFX940-NEXT: ;;#ASMSTART
14917 ; GFX940-NEXT: ; use s[8:9]
14918 ; GFX940-NEXT: ;;#ASMEND
14919 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14920 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14921 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14922 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
14923 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14924 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14928 define void @s_shuffle_v3bf16_v4bf16__5_7_7() {
14929 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__5_7_7:
14931 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14932 ; GFX900-NEXT: ;;#ASMSTART
14933 ; GFX900-NEXT: ; def s[4:5]
14934 ; GFX900-NEXT: ;;#ASMEND
14935 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
14936 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
14937 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14938 ; GFX900-NEXT: ;;#ASMSTART
14939 ; GFX900-NEXT: ; use s[8:9]
14940 ; GFX900-NEXT: ;;#ASMEND
14941 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14943 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__5_7_7:
14945 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14946 ; GFX90A-NEXT: ;;#ASMSTART
14947 ; GFX90A-NEXT: ; def s[4:5]
14948 ; GFX90A-NEXT: ;;#ASMEND
14949 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
14950 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
14951 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9
14952 ; GFX90A-NEXT: ;;#ASMSTART
14953 ; GFX90A-NEXT: ; use s[8:9]
14954 ; GFX90A-NEXT: ;;#ASMEND
14955 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
14957 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_7_7:
14959 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14960 ; GFX940-NEXT: ;;#ASMSTART
14961 ; GFX940-NEXT: ; def s[0:1]
14962 ; GFX940-NEXT: ;;#ASMEND
14963 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
14964 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
14965 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9
14966 ; GFX940-NEXT: ;;#ASMSTART
14967 ; GFX940-NEXT: ; use s[8:9]
14968 ; GFX940-NEXT: ;;#ASMEND
14969 ; GFX940-NEXT: s_setpc_b64 s[30:31]
14970 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
14971 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
14972 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
14973 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14974 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
14978 define void @s_shuffle_v3bf16_v4bf16__6_7_7() {
14979 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__6_7_7:
14981 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14982 ; GFX900-NEXT: ;;#ASMSTART
14983 ; GFX900-NEXT: ; def s[4:5]
14984 ; GFX900-NEXT: ;;#ASMEND
14985 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
14986 ; GFX900-NEXT: s_mov_b32 s8, s5
14987 ; GFX900-NEXT: ;;#ASMSTART
14988 ; GFX900-NEXT: ; use s[8:9]
14989 ; GFX900-NEXT: ;;#ASMEND
14990 ; GFX900-NEXT: s_setpc_b64 s[30:31]
14992 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__6_7_7:
14994 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14995 ; GFX90A-NEXT: ;;#ASMSTART
14996 ; GFX90A-NEXT: ; def s[4:5]
14997 ; GFX90A-NEXT: ;;#ASMEND
14998 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
14999 ; GFX90A-NEXT: s_mov_b32 s8, s5
15000 ; GFX90A-NEXT: ;;#ASMSTART
15001 ; GFX90A-NEXT: ; use s[8:9]
15002 ; GFX90A-NEXT: ;;#ASMEND
15003 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15005 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_7_7:
15007 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15008 ; GFX940-NEXT: ;;#ASMSTART
15009 ; GFX940-NEXT: ; def s[0:1]
15010 ; GFX940-NEXT: ;;#ASMEND
15011 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
15012 ; GFX940-NEXT: s_mov_b32 s8, s1
15013 ; GFX940-NEXT: ;;#ASMSTART
15014 ; GFX940-NEXT: ; use s[8:9]
15015 ; GFX940-NEXT: ;;#ASMEND
15016 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15017 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15018 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15019 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
15020 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15021 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15025 define void @s_shuffle_v3bf16_v4bf16__7_u_7() {
15026 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_u_7:
15028 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15029 ; GFX900-NEXT: ;;#ASMSTART
15030 ; GFX900-NEXT: ; def s[4:5]
15031 ; GFX900-NEXT: ;;#ASMEND
15032 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
15033 ; GFX900-NEXT: s_mov_b32 s9, s8
15034 ; GFX900-NEXT: ;;#ASMSTART
15035 ; GFX900-NEXT: ; use s[8:9]
15036 ; GFX900-NEXT: ;;#ASMEND
15037 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15039 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_u_7:
15041 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15042 ; GFX90A-NEXT: ;;#ASMSTART
15043 ; GFX90A-NEXT: ; def s[4:5]
15044 ; GFX90A-NEXT: ;;#ASMEND
15045 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
15046 ; GFX90A-NEXT: s_mov_b32 s9, s8
15047 ; GFX90A-NEXT: ;;#ASMSTART
15048 ; GFX90A-NEXT: ; use s[8:9]
15049 ; GFX90A-NEXT: ;;#ASMEND
15050 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15052 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_7:
15054 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15055 ; GFX940-NEXT: ;;#ASMSTART
15056 ; GFX940-NEXT: ; def s[0:1]
15057 ; GFX940-NEXT: ;;#ASMEND
15058 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
15059 ; GFX940-NEXT: s_mov_b32 s9, s8
15060 ; GFX940-NEXT: ;;#ASMSTART
15061 ; GFX940-NEXT: ; use s[8:9]
15062 ; GFX940-NEXT: ;;#ASMEND
15063 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15064 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15065 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15066 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 poison, i32 7>
15067 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15068 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15072 define void @s_shuffle_v3bf16_v4bf16__7_0_7() {
15073 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_0_7:
15075 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15076 ; GFX900-NEXT: ;;#ASMSTART
15077 ; GFX900-NEXT: ; def s[6:7]
15078 ; GFX900-NEXT: ;;#ASMEND
15079 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
15080 ; GFX900-NEXT: ;;#ASMSTART
15081 ; GFX900-NEXT: ; def s[4:5]
15082 ; GFX900-NEXT: ;;#ASMEND
15083 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15084 ; GFX900-NEXT: ;;#ASMSTART
15085 ; GFX900-NEXT: ; use s[8:9]
15086 ; GFX900-NEXT: ;;#ASMEND
15087 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15089 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_0_7:
15091 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15092 ; GFX90A-NEXT: ;;#ASMSTART
15093 ; GFX90A-NEXT: ; def s[6:7]
15094 ; GFX90A-NEXT: ;;#ASMEND
15095 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
15096 ; GFX90A-NEXT: ;;#ASMSTART
15097 ; GFX90A-NEXT: ; def s[4:5]
15098 ; GFX90A-NEXT: ;;#ASMEND
15099 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15100 ; GFX90A-NEXT: ;;#ASMSTART
15101 ; GFX90A-NEXT: ; use s[8:9]
15102 ; GFX90A-NEXT: ;;#ASMEND
15103 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15105 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_7:
15107 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15108 ; GFX940-NEXT: ;;#ASMSTART
15109 ; GFX940-NEXT: ; def s[2:3]
15110 ; GFX940-NEXT: ;;#ASMEND
15111 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
15112 ; GFX940-NEXT: ;;#ASMSTART
15113 ; GFX940-NEXT: ; def s[0:1]
15114 ; GFX940-NEXT: ;;#ASMEND
15115 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
15116 ; GFX940-NEXT: ;;#ASMSTART
15117 ; GFX940-NEXT: ; use s[8:9]
15118 ; GFX940-NEXT: ;;#ASMEND
15119 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15120 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15121 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15122 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 0, i32 7>
15123 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15124 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15128 define void @s_shuffle_v3bf16_v4bf16__7_1_7() {
15129 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_1_7:
15131 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15132 ; GFX900-NEXT: ;;#ASMSTART
15133 ; GFX900-NEXT: ; def s[4:5]
15134 ; GFX900-NEXT: ;;#ASMEND
15135 ; GFX900-NEXT: ;;#ASMSTART
15136 ; GFX900-NEXT: ; def s[6:7]
15137 ; GFX900-NEXT: ;;#ASMEND
15138 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
15139 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
15140 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15141 ; GFX900-NEXT: ;;#ASMSTART
15142 ; GFX900-NEXT: ; use s[8:9]
15143 ; GFX900-NEXT: ;;#ASMEND
15144 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15146 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_1_7:
15148 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15149 ; GFX90A-NEXT: ;;#ASMSTART
15150 ; GFX90A-NEXT: ; def s[4:5]
15151 ; GFX90A-NEXT: ;;#ASMEND
15152 ; GFX90A-NEXT: ;;#ASMSTART
15153 ; GFX90A-NEXT: ; def s[6:7]
15154 ; GFX90A-NEXT: ;;#ASMEND
15155 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
15156 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
15157 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15158 ; GFX90A-NEXT: ;;#ASMSTART
15159 ; GFX90A-NEXT: ; use s[8:9]
15160 ; GFX90A-NEXT: ;;#ASMEND
15161 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15163 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_7:
15165 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15166 ; GFX940-NEXT: ;;#ASMSTART
15167 ; GFX940-NEXT: ; def s[0:1]
15168 ; GFX940-NEXT: ;;#ASMEND
15169 ; GFX940-NEXT: ;;#ASMSTART
15170 ; GFX940-NEXT: ; def s[2:3]
15171 ; GFX940-NEXT: ;;#ASMEND
15172 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
15173 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
15174 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
15175 ; GFX940-NEXT: ;;#ASMSTART
15176 ; GFX940-NEXT: ; use s[8:9]
15177 ; GFX940-NEXT: ;;#ASMEND
15178 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15179 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15180 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15181 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 1, i32 7>
15182 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15183 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15187 define void @s_shuffle_v3bf16_v4bf16__7_2_7() {
15188 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_2_7:
15190 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15191 ; GFX900-NEXT: ;;#ASMSTART
15192 ; GFX900-NEXT: ; def s[6:7]
15193 ; GFX900-NEXT: ;;#ASMEND
15194 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
15195 ; GFX900-NEXT: ;;#ASMSTART
15196 ; GFX900-NEXT: ; def s[4:5]
15197 ; GFX900-NEXT: ;;#ASMEND
15198 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5
15199 ; GFX900-NEXT: ;;#ASMSTART
15200 ; GFX900-NEXT: ; use s[8:9]
15201 ; GFX900-NEXT: ;;#ASMEND
15202 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15204 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_2_7:
15206 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15207 ; GFX90A-NEXT: ;;#ASMSTART
15208 ; GFX90A-NEXT: ; def s[6:7]
15209 ; GFX90A-NEXT: ;;#ASMEND
15210 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
15211 ; GFX90A-NEXT: ;;#ASMSTART
15212 ; GFX90A-NEXT: ; def s[4:5]
15213 ; GFX90A-NEXT: ;;#ASMEND
15214 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5
15215 ; GFX90A-NEXT: ;;#ASMSTART
15216 ; GFX90A-NEXT: ; use s[8:9]
15217 ; GFX90A-NEXT: ;;#ASMEND
15218 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15220 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_7:
15222 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15223 ; GFX940-NEXT: ;;#ASMSTART
15224 ; GFX940-NEXT: ; def s[2:3]
15225 ; GFX940-NEXT: ;;#ASMEND
15226 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
15227 ; GFX940-NEXT: ;;#ASMSTART
15228 ; GFX940-NEXT: ; def s[0:1]
15229 ; GFX940-NEXT: ;;#ASMEND
15230 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1
15231 ; GFX940-NEXT: ;;#ASMSTART
15232 ; GFX940-NEXT: ; use s[8:9]
15233 ; GFX940-NEXT: ;;#ASMEND
15234 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15235 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15236 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15237 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 2, i32 7>
15238 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15239 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15243 define void @s_shuffle_v3bf16_v4bf16__7_3_7() {
15244 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_3_7:
15246 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15247 ; GFX900-NEXT: ;;#ASMSTART
15248 ; GFX900-NEXT: ; def s[4:5]
15249 ; GFX900-NEXT: ;;#ASMEND
15250 ; GFX900-NEXT: ;;#ASMSTART
15251 ; GFX900-NEXT: ; def s[6:7]
15252 ; GFX900-NEXT: ;;#ASMEND
15253 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
15254 ; GFX900-NEXT: s_lshr_b32 s9, s7, 16
15255 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15256 ; GFX900-NEXT: ;;#ASMSTART
15257 ; GFX900-NEXT: ; use s[8:9]
15258 ; GFX900-NEXT: ;;#ASMEND
15259 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15261 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_3_7:
15263 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15264 ; GFX90A-NEXT: ;;#ASMSTART
15265 ; GFX90A-NEXT: ; def s[4:5]
15266 ; GFX90A-NEXT: ;;#ASMEND
15267 ; GFX90A-NEXT: ;;#ASMSTART
15268 ; GFX90A-NEXT: ; def s[6:7]
15269 ; GFX90A-NEXT: ;;#ASMEND
15270 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
15271 ; GFX90A-NEXT: s_lshr_b32 s9, s7, 16
15272 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15273 ; GFX90A-NEXT: ;;#ASMSTART
15274 ; GFX90A-NEXT: ; use s[8:9]
15275 ; GFX90A-NEXT: ;;#ASMEND
15276 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15278 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_7:
15280 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15281 ; GFX940-NEXT: ;;#ASMSTART
15282 ; GFX940-NEXT: ; def s[0:1]
15283 ; GFX940-NEXT: ;;#ASMEND
15284 ; GFX940-NEXT: ;;#ASMSTART
15285 ; GFX940-NEXT: ; def s[2:3]
15286 ; GFX940-NEXT: ;;#ASMEND
15287 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
15288 ; GFX940-NEXT: s_lshr_b32 s9, s3, 16
15289 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
15290 ; GFX940-NEXT: ;;#ASMSTART
15291 ; GFX940-NEXT: ; use s[8:9]
15292 ; GFX940-NEXT: ;;#ASMEND
15293 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15294 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15295 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15296 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 3, i32 7>
15297 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15298 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15302 define void @s_shuffle_v3bf16_v4bf16__7_4_7() {
15303 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_4_7:
15305 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15306 ; GFX900-NEXT: ;;#ASMSTART
15307 ; GFX900-NEXT: ; def s[4:5]
15308 ; GFX900-NEXT: ;;#ASMEND
15309 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
15310 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15311 ; GFX900-NEXT: ;;#ASMSTART
15312 ; GFX900-NEXT: ; use s[8:9]
15313 ; GFX900-NEXT: ;;#ASMEND
15314 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15316 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_4_7:
15318 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15319 ; GFX90A-NEXT: ;;#ASMSTART
15320 ; GFX90A-NEXT: ; def s[4:5]
15321 ; GFX90A-NEXT: ;;#ASMEND
15322 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
15323 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15324 ; GFX90A-NEXT: ;;#ASMSTART
15325 ; GFX90A-NEXT: ; use s[8:9]
15326 ; GFX90A-NEXT: ;;#ASMEND
15327 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15329 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_7:
15331 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15332 ; GFX940-NEXT: ;;#ASMSTART
15333 ; GFX940-NEXT: ; def s[0:1]
15334 ; GFX940-NEXT: ;;#ASMEND
15335 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
15336 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
15337 ; GFX940-NEXT: ;;#ASMSTART
15338 ; GFX940-NEXT: ; use s[8:9]
15339 ; GFX940-NEXT: ;;#ASMEND
15340 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15341 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15342 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15343 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 4, i32 7>
15344 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15345 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15349 define void @s_shuffle_v3bf16_v4bf16__7_5_7() {
15350 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_5_7:
15352 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15353 ; GFX900-NEXT: ;;#ASMSTART
15354 ; GFX900-NEXT: ; def s[4:5]
15355 ; GFX900-NEXT: ;;#ASMEND
15356 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
15357 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
15358 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15359 ; GFX900-NEXT: ;;#ASMSTART
15360 ; GFX900-NEXT: ; use s[8:9]
15361 ; GFX900-NEXT: ;;#ASMEND
15362 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15364 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_5_7:
15366 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15367 ; GFX90A-NEXT: ;;#ASMSTART
15368 ; GFX90A-NEXT: ; def s[4:5]
15369 ; GFX90A-NEXT: ;;#ASMEND
15370 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
15371 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
15372 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4
15373 ; GFX90A-NEXT: ;;#ASMSTART
15374 ; GFX90A-NEXT: ; use s[8:9]
15375 ; GFX90A-NEXT: ;;#ASMEND
15376 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15378 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_7:
15380 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15381 ; GFX940-NEXT: ;;#ASMSTART
15382 ; GFX940-NEXT: ; def s[0:1]
15383 ; GFX940-NEXT: ;;#ASMEND
15384 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
15385 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
15386 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0
15387 ; GFX940-NEXT: ;;#ASMSTART
15388 ; GFX940-NEXT: ; use s[8:9]
15389 ; GFX940-NEXT: ;;#ASMEND
15390 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15391 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15392 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15393 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
15394 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15395 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15399 define void @s_shuffle_v3bf16_v4bf16__7_6_7() {
15400 ; GFX900-LABEL: s_shuffle_v3bf16_v4bf16__7_6_7:
15402 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15403 ; GFX900-NEXT: ;;#ASMSTART
15404 ; GFX900-NEXT: ; def s[4:5]
15405 ; GFX900-NEXT: ;;#ASMEND
15406 ; GFX900-NEXT: s_lshr_b32 s9, s5, 16
15407 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5
15408 ; GFX900-NEXT: ;;#ASMSTART
15409 ; GFX900-NEXT: ; use s[8:9]
15410 ; GFX900-NEXT: ;;#ASMEND
15411 ; GFX900-NEXT: s_setpc_b64 s[30:31]
15413 ; GFX90A-LABEL: s_shuffle_v3bf16_v4bf16__7_6_7:
15415 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15416 ; GFX90A-NEXT: ;;#ASMSTART
15417 ; GFX90A-NEXT: ; def s[4:5]
15418 ; GFX90A-NEXT: ;;#ASMEND
15419 ; GFX90A-NEXT: s_lshr_b32 s9, s5, 16
15420 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5
15421 ; GFX90A-NEXT: ;;#ASMSTART
15422 ; GFX90A-NEXT: ; use s[8:9]
15423 ; GFX90A-NEXT: ;;#ASMEND
15424 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
15426 ; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_7:
15428 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15429 ; GFX940-NEXT: ;;#ASMSTART
15430 ; GFX940-NEXT: ; def s[0:1]
15431 ; GFX940-NEXT: ;;#ASMEND
15432 ; GFX940-NEXT: s_lshr_b32 s9, s1, 16
15433 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1
15434 ; GFX940-NEXT: ;;#ASMSTART
15435 ; GFX940-NEXT: ; use s[8:9]
15436 ; GFX940-NEXT: ;;#ASMEND
15437 ; GFX940-NEXT: s_setpc_b64 s[30:31]
15438 %vec0 = call <4 x bfloat> asm "; def $0", "=s"()
15439 %vec1 = call <4 x bfloat> asm "; def $0", "=s"()
15440 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> <i32 7, i32 6, i32 7>
15441 %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15442 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %extend3)
15445 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
15446 ; GFX90APLUS: {{.*}}