1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v4f16_v2f16__u_u_u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v4f16_v2f16__u_u_u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <2 x half> asm "; def $0", "=v"()
13 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> poison
14 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
18 define void @v_shuffle_v4f16_v2f16__0_u_u_u(ptr addrspace(1) inreg %ptr) {
19 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__0_u_u_u:
21 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
23 ; GFX900-NEXT: ;;#ASMSTART
24 ; GFX900-NEXT: ; def v0
25 ; GFX900-NEXT: ;;#ASMEND
26 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
27 ; GFX900-NEXT: s_waitcnt vmcnt(0)
28 ; GFX900-NEXT: s_setpc_b64 s[30:31]
30 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__0_u_u_u:
32 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
34 ; GFX90A-NEXT: ;;#ASMSTART
35 ; GFX90A-NEXT: ; def v0
36 ; GFX90A-NEXT: ;;#ASMEND
37 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
38 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
39 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
41 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_u_u_u:
43 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
45 ; GFX940-NEXT: ;;#ASMSTART
46 ; GFX940-NEXT: ; def v0
47 ; GFX940-NEXT: ;;#ASMEND
48 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
49 ; GFX940-NEXT: s_waitcnt vmcnt(0)
50 ; GFX940-NEXT: s_setpc_b64 s[30:31]
51 %vec0 = call <2 x half> asm "; def $0", "=v"()
52 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
53 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
57 define void @v_shuffle_v4f16_v2f16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
58 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__1_u_u_u:
60 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX900-NEXT: ;;#ASMSTART
62 ; GFX900-NEXT: ; def v0
63 ; GFX900-NEXT: ;;#ASMEND
64 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
65 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
66 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
67 ; GFX900-NEXT: s_waitcnt vmcnt(0)
68 ; GFX900-NEXT: s_setpc_b64 s[30:31]
70 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__1_u_u_u:
72 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX90A-NEXT: ;;#ASMSTART
74 ; GFX90A-NEXT: ; def v0
75 ; GFX90A-NEXT: ;;#ASMEND
76 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
77 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
78 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
79 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
80 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
82 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_u_u_u:
84 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX940-NEXT: ;;#ASMSTART
86 ; GFX940-NEXT: ; def v0
87 ; GFX940-NEXT: ;;#ASMEND
88 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
89 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
90 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
91 ; GFX940-NEXT: s_waitcnt vmcnt(0)
92 ; GFX940-NEXT: s_setpc_b64 s[30:31]
93 %vec0 = call <2 x half> asm "; def $0", "=v"()
94 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
95 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
99 define void @v_shuffle_v4f16_v2f16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
100 ; GFX9-LABEL: v_shuffle_v4f16_v2f16__2_u_u_u:
102 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
104 %vec0 = call <2 x half> asm "; def $0", "=v"()
105 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
106 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
110 define void @v_shuffle_v4f16_v2f16__3_u_u_u(ptr addrspace(1) inreg %ptr) {
111 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_u_u_u:
113 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX900-NEXT: ;;#ASMSTART
115 ; GFX900-NEXT: ; def v0
116 ; GFX900-NEXT: ;;#ASMEND
117 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
118 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
119 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
120 ; GFX900-NEXT: s_waitcnt vmcnt(0)
121 ; GFX900-NEXT: s_setpc_b64 s[30:31]
123 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_u_u_u:
125 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX90A-NEXT: ;;#ASMSTART
127 ; GFX90A-NEXT: ; def v0
128 ; GFX90A-NEXT: ;;#ASMEND
129 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
130 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
131 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
132 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
133 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
135 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_u_u:
137 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX940-NEXT: ;;#ASMSTART
139 ; GFX940-NEXT: ; def v0
140 ; GFX940-NEXT: ;;#ASMEND
141 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
142 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
143 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
144 ; GFX940-NEXT: s_waitcnt vmcnt(0)
145 ; GFX940-NEXT: s_setpc_b64 s[30:31]
146 %vec0 = call <2 x half> asm "; def $0", "=v"()
147 %vec1 = call <2 x half> asm "; def $0", "=v"()
148 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
149 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
153 define void @v_shuffle_v4f16_v2f16__3_0_u_u(ptr addrspace(1) inreg %ptr) {
154 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_0_u_u:
156 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX900-NEXT: ;;#ASMSTART
158 ; GFX900-NEXT: ; def v0
159 ; GFX900-NEXT: ;;#ASMEND
160 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
161 ; GFX900-NEXT: ;;#ASMSTART
162 ; GFX900-NEXT: ; def v2
163 ; GFX900-NEXT: ;;#ASMEND
164 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16
165 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
166 ; GFX900-NEXT: s_waitcnt vmcnt(0)
167 ; GFX900-NEXT: s_setpc_b64 s[30:31]
169 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_0_u_u:
171 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX90A-NEXT: ;;#ASMSTART
173 ; GFX90A-NEXT: ; def v0
174 ; GFX90A-NEXT: ;;#ASMEND
175 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
176 ; GFX90A-NEXT: ;;#ASMSTART
177 ; GFX90A-NEXT: ; def v2
178 ; GFX90A-NEXT: ;;#ASMEND
179 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16
180 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
181 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
182 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
184 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_u_u:
186 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX940-NEXT: ;;#ASMSTART
188 ; GFX940-NEXT: ; def v0
189 ; GFX940-NEXT: ;;#ASMEND
190 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
191 ; GFX940-NEXT: ;;#ASMSTART
192 ; GFX940-NEXT: ; def v2
193 ; GFX940-NEXT: ;;#ASMEND
194 ; GFX940-NEXT: s_nop 0
195 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16
196 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
197 ; GFX940-NEXT: s_waitcnt vmcnt(0)
198 ; GFX940-NEXT: s_setpc_b64 s[30:31]
199 %vec0 = call <2 x half> asm "; def $0", "=v"()
200 %vec1 = call <2 x half> asm "; def $0", "=v"()
201 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 poison, i32 poison>
202 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
206 define void @v_shuffle_v4f16_v2f16__3_1_u_u(ptr addrspace(1) inreg %ptr) {
207 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_1_u_u:
209 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX900-NEXT: ;;#ASMSTART
211 ; GFX900-NEXT: ; def v0
212 ; GFX900-NEXT: ;;#ASMEND
213 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
214 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
215 ; GFX900-NEXT: ;;#ASMSTART
216 ; GFX900-NEXT: ; def v2
217 ; GFX900-NEXT: ;;#ASMEND
218 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
219 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
220 ; GFX900-NEXT: s_waitcnt vmcnt(0)
221 ; GFX900-NEXT: s_setpc_b64 s[30:31]
223 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_1_u_u:
225 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226 ; GFX90A-NEXT: ;;#ASMSTART
227 ; GFX90A-NEXT: ; def v0
228 ; GFX90A-NEXT: ;;#ASMEND
229 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
230 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
231 ; GFX90A-NEXT: ;;#ASMSTART
232 ; GFX90A-NEXT: ; def v2
233 ; GFX90A-NEXT: ;;#ASMEND
234 ; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4
235 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
236 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
237 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
239 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_u_u:
241 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX940-NEXT: ;;#ASMSTART
243 ; GFX940-NEXT: ; def v0
244 ; GFX940-NEXT: ;;#ASMEND
245 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
246 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
247 ; GFX940-NEXT: ;;#ASMSTART
248 ; GFX940-NEXT: ; def v2
249 ; GFX940-NEXT: ;;#ASMEND
250 ; GFX940-NEXT: s_nop 0
251 ; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2
252 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
253 ; GFX940-NEXT: s_waitcnt vmcnt(0)
254 ; GFX940-NEXT: s_setpc_b64 s[30:31]
255 %vec0 = call <2 x half> asm "; def $0", "=v"()
256 %vec1 = call <2 x half> asm "; def $0", "=v"()
257 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 poison, i32 poison>
258 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
262 define void @v_shuffle_v4f16_v2f16__3_2_u_u(ptr addrspace(1) inreg %ptr) {
263 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_2_u_u:
265 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX900-NEXT: ;;#ASMSTART
267 ; GFX900-NEXT: ; def v0
268 ; GFX900-NEXT: ;;#ASMEND
269 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
270 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16
271 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
272 ; GFX900-NEXT: s_waitcnt vmcnt(0)
273 ; GFX900-NEXT: s_setpc_b64 s[30:31]
275 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_2_u_u:
277 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX90A-NEXT: ;;#ASMSTART
279 ; GFX90A-NEXT: ; def v0
280 ; GFX90A-NEXT: ;;#ASMEND
281 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
282 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16
283 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
284 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
285 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
287 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_u_u:
289 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX940-NEXT: ;;#ASMSTART
291 ; GFX940-NEXT: ; def v0
292 ; GFX940-NEXT: ;;#ASMEND
293 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
294 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16
295 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
296 ; GFX940-NEXT: s_waitcnt vmcnt(0)
297 ; GFX940-NEXT: s_setpc_b64 s[30:31]
298 %vec0 = call <2 x half> asm "; def $0", "=v"()
299 %vec1 = call <2 x half> asm "; def $0", "=v"()
300 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 poison, i32 poison>
301 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
305 define void @v_shuffle_v4f16_v2f16__3_3_u_u(ptr addrspace(1) inreg %ptr) {
306 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_u_u:
308 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GFX900-NEXT: ;;#ASMSTART
310 ; GFX900-NEXT: ; def v0
311 ; GFX900-NEXT: ;;#ASMEND
312 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
313 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
314 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
315 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
316 ; GFX900-NEXT: s_waitcnt vmcnt(0)
317 ; GFX900-NEXT: s_setpc_b64 s[30:31]
319 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_u_u:
321 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX90A-NEXT: ;;#ASMSTART
323 ; GFX90A-NEXT: ; def v0
324 ; GFX90A-NEXT: ;;#ASMEND
325 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
326 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
327 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
328 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
329 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
330 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
332 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_u:
334 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX940-NEXT: ;;#ASMSTART
336 ; GFX940-NEXT: ; def v0
337 ; GFX940-NEXT: ;;#ASMEND
338 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
339 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
340 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
341 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
342 ; GFX940-NEXT: s_waitcnt vmcnt(0)
343 ; GFX940-NEXT: s_setpc_b64 s[30:31]
344 %vec0 = call <2 x half> asm "; def $0", "=v"()
345 %vec1 = call <2 x half> asm "; def $0", "=v"()
346 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 poison>
347 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
351 define void @v_shuffle_v4f16_v2f16__3_3_0_u(ptr addrspace(1) inreg %ptr) {
352 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_0_u:
354 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX900-NEXT: ;;#ASMSTART
356 ; GFX900-NEXT: ; def v0
357 ; GFX900-NEXT: ;;#ASMEND
358 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
359 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
360 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
361 ; GFX900-NEXT: ;;#ASMSTART
362 ; GFX900-NEXT: ; def v1
363 ; GFX900-NEXT: ;;#ASMEND
364 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
365 ; GFX900-NEXT: s_waitcnt vmcnt(0)
366 ; GFX900-NEXT: s_setpc_b64 s[30:31]
368 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_0_u:
370 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371 ; GFX90A-NEXT: ;;#ASMSTART
372 ; GFX90A-NEXT: ; def v0
373 ; GFX90A-NEXT: ;;#ASMEND
374 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
375 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
376 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
377 ; GFX90A-NEXT: ;;#ASMSTART
378 ; GFX90A-NEXT: ; def v1
379 ; GFX90A-NEXT: ;;#ASMEND
380 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
381 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
382 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
384 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_u:
386 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; GFX940-NEXT: ;;#ASMSTART
388 ; GFX940-NEXT: ; def v0
389 ; GFX940-NEXT: ;;#ASMEND
390 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
391 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
392 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
393 ; GFX940-NEXT: ;;#ASMSTART
394 ; GFX940-NEXT: ; def v1
395 ; GFX940-NEXT: ;;#ASMEND
396 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
397 ; GFX940-NEXT: s_waitcnt vmcnt(0)
398 ; GFX940-NEXT: s_setpc_b64 s[30:31]
399 %vec0 = call <2 x half> asm "; def $0", "=v"()
400 %vec1 = call <2 x half> asm "; def $0", "=v"()
401 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 poison>
402 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
406 define void @v_shuffle_v4f16_v2f16__3_3_1_u(ptr addrspace(1) inreg %ptr) {
407 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_1_u:
409 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410 ; GFX900-NEXT: ;;#ASMSTART
411 ; GFX900-NEXT: ; def v0
412 ; GFX900-NEXT: ;;#ASMEND
413 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v0, 16
414 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
415 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
416 ; GFX900-NEXT: ;;#ASMSTART
417 ; GFX900-NEXT: ; def v3
418 ; GFX900-NEXT: ;;#ASMEND
419 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
420 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
421 ; GFX900-NEXT: s_waitcnt vmcnt(0)
422 ; GFX900-NEXT: s_setpc_b64 s[30:31]
424 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_1_u:
426 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427 ; GFX90A-NEXT: ;;#ASMSTART
428 ; GFX90A-NEXT: ; def v0
429 ; GFX90A-NEXT: ;;#ASMEND
430 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v0, 16
431 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
432 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
433 ; GFX90A-NEXT: ;;#ASMSTART
434 ; GFX90A-NEXT: ; def v3
435 ; GFX90A-NEXT: ;;#ASMEND
436 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
437 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
438 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
439 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
441 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_u:
443 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444 ; GFX940-NEXT: ;;#ASMSTART
445 ; GFX940-NEXT: ; def v0
446 ; GFX940-NEXT: ;;#ASMEND
447 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
448 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
449 ; GFX940-NEXT: ;;#ASMSTART
450 ; GFX940-NEXT: ; def v3
451 ; GFX940-NEXT: ;;#ASMEND
452 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16
453 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
454 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
455 ; GFX940-NEXT: s_waitcnt vmcnt(0)
456 ; GFX940-NEXT: s_setpc_b64 s[30:31]
457 %vec0 = call <2 x half> asm "; def $0", "=v"()
458 %vec1 = call <2 x half> asm "; def $0", "=v"()
459 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 poison>
460 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
464 define void @v_shuffle_v4f16_v2f16__3_3_2_u(ptr addrspace(1) inreg %ptr) {
465 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_2_u:
467 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
469 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
470 ; GFX900-NEXT: ;;#ASMSTART
471 ; GFX900-NEXT: ; def v1
472 ; GFX900-NEXT: ;;#ASMEND
473 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
474 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
475 ; GFX900-NEXT: s_waitcnt vmcnt(0)
476 ; GFX900-NEXT: s_setpc_b64 s[30:31]
478 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_2_u:
480 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
482 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
483 ; GFX90A-NEXT: ;;#ASMSTART
484 ; GFX90A-NEXT: ; def v1
485 ; GFX90A-NEXT: ;;#ASMEND
486 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
487 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
488 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
489 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
491 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_u:
493 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
495 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
496 ; GFX940-NEXT: ;;#ASMSTART
497 ; GFX940-NEXT: ; def v1
498 ; GFX940-NEXT: ;;#ASMEND
499 ; GFX940-NEXT: s_nop 0
500 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
501 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
502 ; GFX940-NEXT: s_waitcnt vmcnt(0)
503 ; GFX940-NEXT: s_setpc_b64 s[30:31]
504 %vec0 = call <2 x half> asm "; def $0", "=v"()
505 %vec1 = call <2 x half> asm "; def $0", "=v"()
506 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 poison>
507 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
511 define void @v_shuffle_v4f16_v2f16__3_3_3_u(ptr addrspace(1) inreg %ptr) {
512 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_3_u:
514 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515 ; GFX900-NEXT: ;;#ASMSTART
516 ; GFX900-NEXT: ; def v0
517 ; GFX900-NEXT: ;;#ASMEND
518 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v0, 16
519 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
520 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
521 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
522 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
523 ; GFX900-NEXT: s_waitcnt vmcnt(0)
524 ; GFX900-NEXT: s_setpc_b64 s[30:31]
526 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_3_u:
528 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; GFX90A-NEXT: ;;#ASMSTART
530 ; GFX90A-NEXT: ; def v0
531 ; GFX90A-NEXT: ;;#ASMEND
532 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v0, 16
533 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
534 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
535 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
536 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
537 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
538 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
540 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_u:
542 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543 ; GFX940-NEXT: ;;#ASMSTART
544 ; GFX940-NEXT: ; def v0
545 ; GFX940-NEXT: ;;#ASMEND
546 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
547 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
548 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16
549 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
550 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
551 ; GFX940-NEXT: s_waitcnt vmcnt(0)
552 ; GFX940-NEXT: s_setpc_b64 s[30:31]
553 %vec0 = call <2 x half> asm "; def $0", "=v"()
554 %vec1 = call <2 x half> asm "; def $0", "=v"()
555 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
556 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
560 define void @v_shuffle_v4f16_v2f16__3_3_3_0(ptr addrspace(1) inreg %ptr) {
561 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_3_0:
563 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564 ; GFX900-NEXT: ;;#ASMSTART
565 ; GFX900-NEXT: ; def v1
566 ; GFX900-NEXT: ;;#ASMEND
567 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
568 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
569 ; GFX900-NEXT: ;;#ASMSTART
570 ; GFX900-NEXT: ; def v3
571 ; GFX900-NEXT: ;;#ASMEND
572 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
573 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16
574 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
575 ; GFX900-NEXT: s_waitcnt vmcnt(0)
576 ; GFX900-NEXT: s_setpc_b64 s[30:31]
578 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_3_0:
580 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581 ; GFX90A-NEXT: ;;#ASMSTART
582 ; GFX90A-NEXT: ; def v1
583 ; GFX90A-NEXT: ;;#ASMEND
584 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
585 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
586 ; GFX90A-NEXT: ;;#ASMSTART
587 ; GFX90A-NEXT: ; def v3
588 ; GFX90A-NEXT: ;;#ASMEND
589 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
590 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16
591 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
592 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
593 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
595 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_0:
597 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; GFX940-NEXT: ;;#ASMSTART
599 ; GFX940-NEXT: ; def v1
600 ; GFX940-NEXT: ;;#ASMEND
601 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
602 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
603 ; GFX940-NEXT: ;;#ASMSTART
604 ; GFX940-NEXT: ; def v3
605 ; GFX940-NEXT: ;;#ASMEND
606 ; GFX940-NEXT: s_nop 0
607 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
608 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16
609 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
610 ; GFX940-NEXT: s_waitcnt vmcnt(0)
611 ; GFX940-NEXT: s_setpc_b64 s[30:31]
612 %vec0 = call <2 x half> asm "; def $0", "=v"()
613 %vec1 = call <2 x half> asm "; def $0", "=v"()
614 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
615 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
619 define void @v_shuffle_v4f16_v2f16__3_3_3_1(ptr addrspace(1) inreg %ptr) {
620 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_3_1:
622 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623 ; GFX900-NEXT: ;;#ASMSTART
624 ; GFX900-NEXT: ; def v0
625 ; GFX900-NEXT: ;;#ASMEND
626 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
627 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
628 ; GFX900-NEXT: ;;#ASMSTART
629 ; GFX900-NEXT: ; def v3
630 ; GFX900-NEXT: ;;#ASMEND
631 ; GFX900-NEXT: v_perm_b32 v1, v0, v3, s4
632 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
633 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
634 ; GFX900-NEXT: s_waitcnt vmcnt(0)
635 ; GFX900-NEXT: s_setpc_b64 s[30:31]
637 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_3_1:
639 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640 ; GFX90A-NEXT: ;;#ASMSTART
641 ; GFX90A-NEXT: ; def v0
642 ; GFX90A-NEXT: ;;#ASMEND
643 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
644 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
645 ; GFX90A-NEXT: ;;#ASMSTART
646 ; GFX90A-NEXT: ; def v3
647 ; GFX90A-NEXT: ;;#ASMEND
648 ; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4
649 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
650 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
651 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
652 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
654 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_1:
656 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX940-NEXT: ;;#ASMSTART
658 ; GFX940-NEXT: ; def v0
659 ; GFX940-NEXT: ;;#ASMEND
660 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
661 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
662 ; GFX940-NEXT: ;;#ASMSTART
663 ; GFX940-NEXT: ; def v3
664 ; GFX940-NEXT: ;;#ASMEND
665 ; GFX940-NEXT: s_nop 0
666 ; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2
667 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
668 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
669 ; GFX940-NEXT: s_waitcnt vmcnt(0)
670 ; GFX940-NEXT: s_setpc_b64 s[30:31]
671 %vec0 = call <2 x half> asm "; def $0", "=v"()
672 %vec1 = call <2 x half> asm "; def $0", "=v"()
673 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
674 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
678 define void @v_shuffle_v4f16_v2f16__3_3_3_2(ptr addrspace(1) inreg %ptr) {
679 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_3_2:
681 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682 ; GFX900-NEXT: ;;#ASMSTART
683 ; GFX900-NEXT: ; def v1
684 ; GFX900-NEXT: ;;#ASMEND
685 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
686 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
687 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
688 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
689 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
690 ; GFX900-NEXT: s_waitcnt vmcnt(0)
691 ; GFX900-NEXT: s_setpc_b64 s[30:31]
693 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_3_2:
695 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696 ; GFX90A-NEXT: ;;#ASMSTART
697 ; GFX90A-NEXT: ; def v1
698 ; GFX90A-NEXT: ;;#ASMEND
699 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
700 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
701 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
702 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
703 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
704 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
705 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
707 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_2:
709 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710 ; GFX940-NEXT: ;;#ASMSTART
711 ; GFX940-NEXT: ; def v1
712 ; GFX940-NEXT: ;;#ASMEND
713 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
714 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
715 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
716 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
717 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
718 ; GFX940-NEXT: s_waitcnt vmcnt(0)
719 ; GFX940-NEXT: s_setpc_b64 s[30:31]
720 %vec0 = call <2 x half> asm "; def $0", "=v"()
721 %vec1 = call <2 x half> asm "; def $0", "=v"()
722 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
723 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
727 define void @v_shuffle_v4f16_v2f16__3_3_3_3(ptr addrspace(1) inreg %ptr) {
728 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_3_3:
730 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731 ; GFX900-NEXT: ;;#ASMSTART
732 ; GFX900-NEXT: ; def v0
733 ; GFX900-NEXT: ;;#ASMEND
734 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
735 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
736 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
737 ; GFX900-NEXT: v_mov_b32_e32 v1, v0
738 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
739 ; GFX900-NEXT: s_waitcnt vmcnt(0)
740 ; GFX900-NEXT: s_setpc_b64 s[30:31]
742 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_3_3:
744 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745 ; GFX90A-NEXT: ;;#ASMSTART
746 ; GFX90A-NEXT: ; def v0
747 ; GFX90A-NEXT: ;;#ASMEND
748 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
749 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
750 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
751 ; GFX90A-NEXT: v_mov_b32_e32 v1, v0
752 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
753 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
754 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
756 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_3:
758 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759 ; GFX940-NEXT: ;;#ASMSTART
760 ; GFX940-NEXT: ; def v0
761 ; GFX940-NEXT: ;;#ASMEND
762 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
763 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
764 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
765 ; GFX940-NEXT: v_mov_b32_e32 v1, v0
766 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
767 ; GFX940-NEXT: s_waitcnt vmcnt(0)
768 ; GFX940-NEXT: s_setpc_b64 s[30:31]
769 %vec0 = call <2 x half> asm "; def $0", "=v"()
770 %vec1 = call <2 x half> asm "; def $0", "=v"()
771 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
772 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
776 define void @v_shuffle_v4f16_v2f16__u_0_0_0(ptr addrspace(1) inreg %ptr) {
777 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__u_0_0_0:
779 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
780 ; GFX900-NEXT: ;;#ASMSTART
781 ; GFX900-NEXT: ; def v0
782 ; GFX900-NEXT: ;;#ASMEND
783 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
784 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
785 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
786 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
787 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
788 ; GFX900-NEXT: s_waitcnt vmcnt(0)
789 ; GFX900-NEXT: s_setpc_b64 s[30:31]
791 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__u_0_0_0:
793 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794 ; GFX90A-NEXT: ;;#ASMSTART
795 ; GFX90A-NEXT: ; def v0
796 ; GFX90A-NEXT: ;;#ASMEND
797 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
798 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
799 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
800 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0
801 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
802 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
803 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
805 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__u_0_0_0:
807 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808 ; GFX940-NEXT: ;;#ASMSTART
809 ; GFX940-NEXT: ; def v0
810 ; GFX940-NEXT: ;;#ASMEND
811 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
812 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
813 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
814 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0
815 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
816 ; GFX940-NEXT: s_waitcnt vmcnt(0)
817 ; GFX940-NEXT: s_setpc_b64 s[30:31]
818 %vec0 = call <2 x half> asm "; def $0", "=v"()
819 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
820 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
824 define void @v_shuffle_v4f16_v2f16__0_0_0_0(ptr addrspace(1) inreg %ptr) {
825 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__0_0_0_0:
827 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
828 ; GFX900-NEXT: ;;#ASMSTART
829 ; GFX900-NEXT: ; def v0
830 ; GFX900-NEXT: ;;#ASMEND
831 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
832 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
833 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
834 ; GFX900-NEXT: v_mov_b32_e32 v1, v0
835 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
836 ; GFX900-NEXT: s_waitcnt vmcnt(0)
837 ; GFX900-NEXT: s_setpc_b64 s[30:31]
839 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__0_0_0_0:
841 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842 ; GFX90A-NEXT: ;;#ASMSTART
843 ; GFX90A-NEXT: ; def v0
844 ; GFX90A-NEXT: ;;#ASMEND
845 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
846 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
847 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
848 ; GFX90A-NEXT: v_mov_b32_e32 v1, v0
849 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
850 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
851 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
853 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_0_0_0:
855 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856 ; GFX940-NEXT: ;;#ASMSTART
857 ; GFX940-NEXT: ; def v0
858 ; GFX940-NEXT: ;;#ASMEND
859 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
860 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
861 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
862 ; GFX940-NEXT: v_mov_b32_e32 v1, v0
863 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
864 ; GFX940-NEXT: s_waitcnt vmcnt(0)
865 ; GFX940-NEXT: s_setpc_b64 s[30:31]
866 %vec0 = call <2 x half> asm "; def $0", "=v"()
867 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> zeroinitializer
868 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
872 define void @v_shuffle_v4f16_v2f16__1_0_0_0(ptr addrspace(1) inreg %ptr) {
873 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__1_0_0_0:
875 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
876 ; GFX900-NEXT: ;;#ASMSTART
877 ; GFX900-NEXT: ; def v0
878 ; GFX900-NEXT: ;;#ASMEND
879 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
880 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
881 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
882 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16
883 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
884 ; GFX900-NEXT: s_waitcnt vmcnt(0)
885 ; GFX900-NEXT: s_setpc_b64 s[30:31]
887 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__1_0_0_0:
889 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890 ; GFX90A-NEXT: ;;#ASMSTART
891 ; GFX90A-NEXT: ; def v0
892 ; GFX90A-NEXT: ;;#ASMEND
893 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
894 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
895 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
896 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16
897 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
898 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
899 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
901 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_0_0_0:
903 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904 ; GFX940-NEXT: ;;#ASMSTART
905 ; GFX940-NEXT: ; def v0
906 ; GFX940-NEXT: ;;#ASMEND
907 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
908 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
909 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
910 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16
911 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
912 ; GFX940-NEXT: s_waitcnt vmcnt(0)
913 ; GFX940-NEXT: s_setpc_b64 s[30:31]
914 %vec0 = call <2 x half> asm "; def $0", "=v"()
915 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
916 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
920 define void @v_shuffle_v4f16_v2f16__2_0_0_0(ptr addrspace(1) inreg %ptr) {
921 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__2_0_0_0:
923 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
924 ; GFX900-NEXT: ;;#ASMSTART
925 ; GFX900-NEXT: ; def v0
926 ; GFX900-NEXT: ;;#ASMEND
927 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
928 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
929 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
930 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
931 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
932 ; GFX900-NEXT: s_waitcnt vmcnt(0)
933 ; GFX900-NEXT: s_setpc_b64 s[30:31]
935 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__2_0_0_0:
937 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
938 ; GFX90A-NEXT: ;;#ASMSTART
939 ; GFX90A-NEXT: ; def v0
940 ; GFX90A-NEXT: ;;#ASMEND
941 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
942 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
943 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
944 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0
945 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
946 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
947 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
949 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__2_0_0_0:
951 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
952 ; GFX940-NEXT: ;;#ASMSTART
953 ; GFX940-NEXT: ; def v0
954 ; GFX940-NEXT: ;;#ASMEND
955 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
956 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
957 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
958 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0
959 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
960 ; GFX940-NEXT: s_waitcnt vmcnt(0)
961 ; GFX940-NEXT: s_setpc_b64 s[30:31]
962 %vec0 = call <2 x half> asm "; def $0", "=v"()
963 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
964 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
968 define void @v_shuffle_v4f16_v2f16__3_0_0_0(ptr addrspace(1) inreg %ptr) {
969 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_0_0_0:
971 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972 ; GFX900-NEXT: ;;#ASMSTART
973 ; GFX900-NEXT: ; def v0
974 ; GFX900-NEXT: ;;#ASMEND
975 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
976 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
977 ; GFX900-NEXT: ;;#ASMSTART
978 ; GFX900-NEXT: ; def v3
979 ; GFX900-NEXT: ;;#ASMEND
980 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
981 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16
982 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
983 ; GFX900-NEXT: s_waitcnt vmcnt(0)
984 ; GFX900-NEXT: s_setpc_b64 s[30:31]
986 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_0_0_0:
988 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989 ; GFX90A-NEXT: ;;#ASMSTART
990 ; GFX90A-NEXT: ; def v0
991 ; GFX90A-NEXT: ;;#ASMEND
992 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
993 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
994 ; GFX90A-NEXT: ;;#ASMSTART
995 ; GFX90A-NEXT: ; def v3
996 ; GFX90A-NEXT: ;;#ASMEND
997 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
998 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
999 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1000 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1001 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1003 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_0_0:
1005 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1006 ; GFX940-NEXT: ;;#ASMSTART
1007 ; GFX940-NEXT: ; def v0
1008 ; GFX940-NEXT: ;;#ASMEND
1009 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1010 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1011 ; GFX940-NEXT: ;;#ASMSTART
1012 ; GFX940-NEXT: ; def v3
1013 ; GFX940-NEXT: ;;#ASMEND
1014 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1015 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
1016 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1017 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1018 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1019 %vec0 = call <2 x half> asm "; def $0", "=v"()
1020 %vec1 = call <2 x half> asm "; def $0", "=v"()
1021 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
1022 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1026 define void @v_shuffle_v4f16_v2f16__3_u_0_0(ptr addrspace(1) inreg %ptr) {
1027 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_u_0_0:
1029 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030 ; GFX900-NEXT: ;;#ASMSTART
1031 ; GFX900-NEXT: ; def v0
1032 ; GFX900-NEXT: ;;#ASMEND
1033 ; GFX900-NEXT: ;;#ASMSTART
1034 ; GFX900-NEXT: ; def v1
1035 ; GFX900-NEXT: ;;#ASMEND
1036 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
1037 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1038 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1039 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
1040 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1041 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1042 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1044 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_u_0_0:
1046 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1047 ; GFX90A-NEXT: ;;#ASMSTART
1048 ; GFX90A-NEXT: ; def v0
1049 ; GFX90A-NEXT: ;;#ASMEND
1050 ; GFX90A-NEXT: ;;#ASMSTART
1051 ; GFX90A-NEXT: ; def v1
1052 ; GFX90A-NEXT: ;;#ASMEND
1053 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
1054 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1055 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1056 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
1057 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1058 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1059 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1061 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_0_0:
1063 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1064 ; GFX940-NEXT: ;;#ASMSTART
1065 ; GFX940-NEXT: ; def v1
1066 ; GFX940-NEXT: ;;#ASMEND
1067 ; GFX940-NEXT: ;;#ASMSTART
1068 ; GFX940-NEXT: ; def v0
1069 ; GFX940-NEXT: ;;#ASMEND
1070 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1071 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1072 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
1073 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
1074 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1075 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1076 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1077 %vec0 = call <2 x half> asm "; def $0", "=v"()
1078 %vec1 = call <2 x half> asm "; def $0", "=v"()
1079 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 0, i32 0>
1080 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1084 define void @v_shuffle_v4f16_v2f16__3_1_0_0(ptr addrspace(1) inreg %ptr) {
1085 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_1_0_0:
1087 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1088 ; GFX900-NEXT: ;;#ASMSTART
1089 ; GFX900-NEXT: ; def v0
1090 ; GFX900-NEXT: ;;#ASMEND
1091 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1092 ; GFX900-NEXT: ;;#ASMSTART
1093 ; GFX900-NEXT: ; def v1
1094 ; GFX900-NEXT: ;;#ASMEND
1095 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
1096 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1097 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1098 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
1099 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1100 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1101 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1103 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_1_0_0:
1105 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GFX90A-NEXT: ;;#ASMSTART
1107 ; GFX90A-NEXT: ; def v0
1108 ; GFX90A-NEXT: ;;#ASMEND
1109 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1110 ; GFX90A-NEXT: ;;#ASMSTART
1111 ; GFX90A-NEXT: ; def v1
1112 ; GFX90A-NEXT: ;;#ASMEND
1113 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
1114 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1115 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1116 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
1117 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1118 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1119 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1121 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_0_0:
1123 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124 ; GFX940-NEXT: ;;#ASMSTART
1125 ; GFX940-NEXT: ; def v0
1126 ; GFX940-NEXT: ;;#ASMEND
1127 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1128 ; GFX940-NEXT: ;;#ASMSTART
1129 ; GFX940-NEXT: ; def v1
1130 ; GFX940-NEXT: ;;#ASMEND
1131 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1132 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
1133 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1134 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
1135 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1136 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1137 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1138 %vec0 = call <2 x half> asm "; def $0", "=v"()
1139 %vec1 = call <2 x half> asm "; def $0", "=v"()
1140 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 0, i32 0>
1141 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1145 define void @v_shuffle_v4f16_v2f16__3_2_0_0(ptr addrspace(1) inreg %ptr) {
1146 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_2_0_0:
1148 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149 ; GFX900-NEXT: ;;#ASMSTART
1150 ; GFX900-NEXT: ; def v0
1151 ; GFX900-NEXT: ;;#ASMEND
1152 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1153 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1154 ; GFX900-NEXT: ;;#ASMSTART
1155 ; GFX900-NEXT: ; def v3
1156 ; GFX900-NEXT: ;;#ASMEND
1157 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1158 ; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16
1159 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1160 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1161 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1163 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_2_0_0:
1165 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166 ; GFX90A-NEXT: ;;#ASMSTART
1167 ; GFX90A-NEXT: ; def v0
1168 ; GFX90A-NEXT: ;;#ASMEND
1169 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1170 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1171 ; GFX90A-NEXT: ;;#ASMSTART
1172 ; GFX90A-NEXT: ; def v3
1173 ; GFX90A-NEXT: ;;#ASMEND
1174 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1175 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16
1176 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1177 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1178 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1180 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_0_0:
1182 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183 ; GFX940-NEXT: ;;#ASMSTART
1184 ; GFX940-NEXT: ; def v0
1185 ; GFX940-NEXT: ;;#ASMEND
1186 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1187 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1188 ; GFX940-NEXT: ;;#ASMSTART
1189 ; GFX940-NEXT: ; def v3
1190 ; GFX940-NEXT: ;;#ASMEND
1191 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1192 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16
1193 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1194 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1195 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1196 %vec0 = call <2 x half> asm "; def $0", "=v"()
1197 %vec1 = call <2 x half> asm "; def $0", "=v"()
1198 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 0, i32 0>
1199 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1203 define void @v_shuffle_v4f16_v2f16__3_3_0_0(ptr addrspace(1) inreg %ptr) {
1204 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_0_0:
1206 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1207 ; GFX900-NEXT: ;;#ASMSTART
1208 ; GFX900-NEXT: ; def v0
1209 ; GFX900-NEXT: ;;#ASMEND
1210 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1211 ; GFX900-NEXT: ;;#ASMSTART
1212 ; GFX900-NEXT: ; def v1
1213 ; GFX900-NEXT: ;;#ASMEND
1214 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
1215 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1216 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1217 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
1218 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1219 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1220 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1222 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_0_0:
1224 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225 ; GFX90A-NEXT: ;;#ASMSTART
1226 ; GFX90A-NEXT: ; def v0
1227 ; GFX90A-NEXT: ;;#ASMEND
1228 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1229 ; GFX90A-NEXT: ;;#ASMSTART
1230 ; GFX90A-NEXT: ; def v1
1231 ; GFX90A-NEXT: ;;#ASMEND
1232 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
1233 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1234 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1235 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
1236 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1237 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1238 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1240 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_0:
1242 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1243 ; GFX940-NEXT: ;;#ASMSTART
1244 ; GFX940-NEXT: ; def v0
1245 ; GFX940-NEXT: ;;#ASMEND
1246 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1247 ; GFX940-NEXT: ;;#ASMSTART
1248 ; GFX940-NEXT: ; def v1
1249 ; GFX940-NEXT: ;;#ASMEND
1250 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
1251 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1252 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1253 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
1254 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1255 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1256 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1257 %vec0 = call <2 x half> asm "; def $0", "=v"()
1258 %vec1 = call <2 x half> asm "; def $0", "=v"()
1259 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
1260 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1264 define void @v_shuffle_v4f16_v2f16__3_3_u_0(ptr addrspace(1) inreg %ptr) {
1265 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_u_0:
1267 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1268 ; GFX900-NEXT: ;;#ASMSTART
1269 ; GFX900-NEXT: ; def v1
1270 ; GFX900-NEXT: ;;#ASMEND
1271 ; GFX900-NEXT: ;;#ASMSTART
1272 ; GFX900-NEXT: ; def v0
1273 ; GFX900-NEXT: ;;#ASMEND
1274 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1275 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1276 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
1277 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1278 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1279 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1280 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1282 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_u_0:
1284 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285 ; GFX90A-NEXT: ;;#ASMSTART
1286 ; GFX90A-NEXT: ; def v1
1287 ; GFX90A-NEXT: ;;#ASMEND
1288 ; GFX90A-NEXT: ;;#ASMSTART
1289 ; GFX90A-NEXT: ; def v0
1290 ; GFX90A-NEXT: ;;#ASMEND
1291 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1292 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1293 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
1294 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1295 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1296 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1297 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1299 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_0:
1301 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302 ; GFX940-NEXT: ;;#ASMSTART
1303 ; GFX940-NEXT: ; def v1
1304 ; GFX940-NEXT: ;;#ASMEND
1305 ; GFX940-NEXT: ;;#ASMSTART
1306 ; GFX940-NEXT: ; def v0
1307 ; GFX940-NEXT: ;;#ASMEND
1308 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1309 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1310 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
1311 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1312 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1313 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1314 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1315 %vec0 = call <2 x half> asm "; def $0", "=v"()
1316 %vec1 = call <2 x half> asm "; def $0", "=v"()
1317 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 0>
1318 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1322 define void @v_shuffle_v4f16_v2f16__3_3_1_0(ptr addrspace(1) inreg %ptr) {
1323 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_1_0:
1325 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1326 ; GFX900-NEXT: ;;#ASMSTART
1327 ; GFX900-NEXT: ; def v1
1328 ; GFX900-NEXT: ;;#ASMEND
1329 ; GFX900-NEXT: ;;#ASMSTART
1330 ; GFX900-NEXT: ; def v0
1331 ; GFX900-NEXT: ;;#ASMEND
1332 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1333 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1334 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
1335 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
1336 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1337 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1338 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1340 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_1_0:
1342 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343 ; GFX90A-NEXT: ;;#ASMSTART
1344 ; GFX90A-NEXT: ; def v1
1345 ; GFX90A-NEXT: ;;#ASMEND
1346 ; GFX90A-NEXT: ;;#ASMSTART
1347 ; GFX90A-NEXT: ; def v0
1348 ; GFX90A-NEXT: ;;#ASMEND
1349 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1350 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1351 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
1352 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
1353 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1354 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1355 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1357 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_0:
1359 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360 ; GFX940-NEXT: ;;#ASMSTART
1361 ; GFX940-NEXT: ; def v1
1362 ; GFX940-NEXT: ;;#ASMEND
1363 ; GFX940-NEXT: ;;#ASMSTART
1364 ; GFX940-NEXT: ; def v0
1365 ; GFX940-NEXT: ;;#ASMEND
1366 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1367 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1368 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
1369 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
1370 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1371 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1372 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1373 %vec0 = call <2 x half> asm "; def $0", "=v"()
1374 %vec1 = call <2 x half> asm "; def $0", "=v"()
1375 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
1376 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1380 define void @v_shuffle_v4f16_v2f16__3_3_2_0(ptr addrspace(1) inreg %ptr) {
1381 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_2_0:
1383 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1384 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1385 ; GFX900-NEXT: ;;#ASMSTART
1386 ; GFX900-NEXT: ; def v0
1387 ; GFX900-NEXT: ;;#ASMEND
1388 ; GFX900-NEXT: ;;#ASMSTART
1389 ; GFX900-NEXT: ; def v3
1390 ; GFX900-NEXT: ;;#ASMEND
1391 ; GFX900-NEXT: v_perm_b32 v1, v0, v3, s4
1392 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1393 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1394 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
1395 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1396 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1397 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1399 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_2_0:
1401 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1403 ; GFX90A-NEXT: ;;#ASMSTART
1404 ; GFX90A-NEXT: ; def v0
1405 ; GFX90A-NEXT: ;;#ASMEND
1406 ; GFX90A-NEXT: ;;#ASMSTART
1407 ; GFX90A-NEXT: ; def v3
1408 ; GFX90A-NEXT: ;;#ASMEND
1409 ; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4
1410 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1411 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1412 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
1413 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1414 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1415 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1417 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_0:
1419 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1421 ; GFX940-NEXT: ;;#ASMSTART
1422 ; GFX940-NEXT: ; def v0
1423 ; GFX940-NEXT: ;;#ASMEND
1424 ; GFX940-NEXT: ;;#ASMSTART
1425 ; GFX940-NEXT: ; def v3
1426 ; GFX940-NEXT: ;;#ASMEND
1427 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1428 ; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2
1429 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1430 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
1431 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1432 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1433 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1434 %vec0 = call <2 x half> asm "; def $0", "=v"()
1435 %vec1 = call <2 x half> asm "; def $0", "=v"()
1436 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 0>
1437 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1441 define void @v_shuffle_v4f16_v2f16__u_1_1_1(ptr addrspace(1) inreg %ptr) {
1442 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__u_1_1_1:
1444 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1446 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1447 ; GFX900-NEXT: ;;#ASMSTART
1448 ; GFX900-NEXT: ; def v0
1449 ; GFX900-NEXT: ;;#ASMEND
1450 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1451 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1452 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1453 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1455 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__u_1_1_1:
1457 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1458 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1459 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1460 ; GFX90A-NEXT: ;;#ASMSTART
1461 ; GFX90A-NEXT: ; def v0
1462 ; GFX90A-NEXT: ;;#ASMEND
1463 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1464 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1465 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1466 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1468 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__u_1_1_1:
1470 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1471 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1472 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1473 ; GFX940-NEXT: ;;#ASMSTART
1474 ; GFX940-NEXT: ; def v0
1475 ; GFX940-NEXT: ;;#ASMEND
1476 ; GFX940-NEXT: s_nop 0
1477 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1478 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1479 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1480 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1481 %vec0 = call <2 x half> asm "; def $0", "=v"()
1482 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
1483 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1487 define void @v_shuffle_v4f16_v2f16__0_1_1_1(ptr addrspace(1) inreg %ptr) {
1488 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__0_1_1_1:
1490 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1492 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1493 ; GFX900-NEXT: ;;#ASMSTART
1494 ; GFX900-NEXT: ; def v0
1495 ; GFX900-NEXT: ;;#ASMEND
1496 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1497 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1498 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1499 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1501 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__0_1_1_1:
1503 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1504 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1505 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1506 ; GFX90A-NEXT: ;;#ASMSTART
1507 ; GFX90A-NEXT: ; def v0
1508 ; GFX90A-NEXT: ;;#ASMEND
1509 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1510 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1511 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1512 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1514 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_1_1_1:
1516 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1518 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1519 ; GFX940-NEXT: ;;#ASMSTART
1520 ; GFX940-NEXT: ; def v0
1521 ; GFX940-NEXT: ;;#ASMEND
1522 ; GFX940-NEXT: s_nop 0
1523 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1524 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1525 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1526 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1527 %vec0 = call <2 x half> asm "; def $0", "=v"()
1528 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
1529 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1533 define void @v_shuffle_v4f16_v2f16__1_1_1_1(ptr addrspace(1) inreg %ptr) {
1534 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__1_1_1_1:
1536 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537 ; GFX900-NEXT: ;;#ASMSTART
1538 ; GFX900-NEXT: ; def v0
1539 ; GFX900-NEXT: ;;#ASMEND
1540 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1541 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
1542 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1543 ; GFX900-NEXT: v_mov_b32_e32 v1, v0
1544 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1545 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1546 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1548 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__1_1_1_1:
1550 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1551 ; GFX90A-NEXT: ;;#ASMSTART
1552 ; GFX90A-NEXT: ; def v0
1553 ; GFX90A-NEXT: ;;#ASMEND
1554 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1555 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
1556 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1557 ; GFX90A-NEXT: v_mov_b32_e32 v1, v0
1558 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1559 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1560 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1562 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_1_1_1:
1564 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1565 ; GFX940-NEXT: ;;#ASMSTART
1566 ; GFX940-NEXT: ; def v0
1567 ; GFX940-NEXT: ;;#ASMEND
1568 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1569 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
1570 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1571 ; GFX940-NEXT: v_mov_b32_e32 v1, v0
1572 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1573 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1574 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1575 %vec0 = call <2 x half> asm "; def $0", "=v"()
1576 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1577 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1581 define void @v_shuffle_v4f16_v2f16__2_1_1_1(ptr addrspace(1) inreg %ptr) {
1582 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__2_1_1_1:
1584 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1586 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1587 ; GFX900-NEXT: ;;#ASMSTART
1588 ; GFX900-NEXT: ; def v0
1589 ; GFX900-NEXT: ;;#ASMEND
1590 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1591 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1592 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1593 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1595 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__2_1_1_1:
1597 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1598 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1599 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1600 ; GFX90A-NEXT: ;;#ASMSTART
1601 ; GFX90A-NEXT: ; def v0
1602 ; GFX90A-NEXT: ;;#ASMEND
1603 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1604 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1605 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1606 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1608 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__2_1_1_1:
1610 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1611 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1612 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1613 ; GFX940-NEXT: ;;#ASMSTART
1614 ; GFX940-NEXT: ; def v0
1615 ; GFX940-NEXT: ;;#ASMEND
1616 ; GFX940-NEXT: s_nop 0
1617 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1618 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1619 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1620 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1621 %vec0 = call <2 x half> asm "; def $0", "=v"()
1622 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
1623 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1627 define void @v_shuffle_v4f16_v2f16__3_1_1_1(ptr addrspace(1) inreg %ptr) {
1628 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_1_1_1:
1630 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1631 ; GFX900-NEXT: ;;#ASMSTART
1632 ; GFX900-NEXT: ; def v1
1633 ; GFX900-NEXT: ;;#ASMEND
1634 ; GFX900-NEXT: ;;#ASMSTART
1635 ; GFX900-NEXT: ; def v0
1636 ; GFX900-NEXT: ;;#ASMEND
1637 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1638 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1639 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
1640 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
1641 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1642 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1643 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1645 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_1_1_1:
1647 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648 ; GFX90A-NEXT: ;;#ASMSTART
1649 ; GFX90A-NEXT: ; def v1
1650 ; GFX90A-NEXT: ;;#ASMEND
1651 ; GFX90A-NEXT: ;;#ASMSTART
1652 ; GFX90A-NEXT: ; def v0
1653 ; GFX90A-NEXT: ;;#ASMEND
1654 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1655 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1656 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
1657 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
1658 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1659 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1660 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1662 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_1_1:
1664 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1665 ; GFX940-NEXT: ;;#ASMSTART
1666 ; GFX940-NEXT: ; def v1
1667 ; GFX940-NEXT: ;;#ASMEND
1668 ; GFX940-NEXT: ;;#ASMSTART
1669 ; GFX940-NEXT: ; def v0
1670 ; GFX940-NEXT: ;;#ASMEND
1671 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1672 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1673 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
1674 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
1675 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1676 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1677 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1678 %vec0 = call <2 x half> asm "; def $0", "=v"()
1679 %vec1 = call <2 x half> asm "; def $0", "=v"()
1680 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
1681 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1685 define void @v_shuffle_v4f16_v2f16__3_u_1_1(ptr addrspace(1) inreg %ptr) {
1686 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_u_1_1:
1688 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689 ; GFX900-NEXT: ;;#ASMSTART
1690 ; GFX900-NEXT: ; def v0
1691 ; GFX900-NEXT: ;;#ASMEND
1692 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1693 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1694 ; GFX900-NEXT: ;;#ASMSTART
1695 ; GFX900-NEXT: ; def v3
1696 ; GFX900-NEXT: ;;#ASMEND
1697 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1698 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16
1699 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1700 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1701 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1703 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_u_1_1:
1705 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1706 ; GFX90A-NEXT: ;;#ASMSTART
1707 ; GFX90A-NEXT: ; def v0
1708 ; GFX90A-NEXT: ;;#ASMEND
1709 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1710 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1711 ; GFX90A-NEXT: ;;#ASMSTART
1712 ; GFX90A-NEXT: ; def v3
1713 ; GFX90A-NEXT: ;;#ASMEND
1714 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1715 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16
1716 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1717 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1718 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1720 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_1_1:
1722 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1723 ; GFX940-NEXT: ;;#ASMSTART
1724 ; GFX940-NEXT: ; def v0
1725 ; GFX940-NEXT: ;;#ASMEND
1726 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1727 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1728 ; GFX940-NEXT: ;;#ASMSTART
1729 ; GFX940-NEXT: ; def v3
1730 ; GFX940-NEXT: ;;#ASMEND
1731 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1732 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16
1733 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1734 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1735 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1736 %vec0 = call <2 x half> asm "; def $0", "=v"()
1737 %vec1 = call <2 x half> asm "; def $0", "=v"()
1738 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 1, i32 1>
1739 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1743 define void @v_shuffle_v4f16_v2f16__3_0_1_1(ptr addrspace(1) inreg %ptr) {
1744 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_0_1_1:
1746 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1747 ; GFX900-NEXT: ;;#ASMSTART
1748 ; GFX900-NEXT: ; def v0
1749 ; GFX900-NEXT: ;;#ASMEND
1750 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1751 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1752 ; GFX900-NEXT: ;;#ASMSTART
1753 ; GFX900-NEXT: ; def v3
1754 ; GFX900-NEXT: ;;#ASMEND
1755 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1756 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16
1757 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1758 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1759 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1761 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_0_1_1:
1763 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764 ; GFX90A-NEXT: ;;#ASMSTART
1765 ; GFX90A-NEXT: ; def v0
1766 ; GFX90A-NEXT: ;;#ASMEND
1767 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1768 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1769 ; GFX90A-NEXT: ;;#ASMSTART
1770 ; GFX90A-NEXT: ; def v3
1771 ; GFX90A-NEXT: ;;#ASMEND
1772 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1773 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
1774 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1775 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1776 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1778 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_1_1:
1780 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1781 ; GFX940-NEXT: ;;#ASMSTART
1782 ; GFX940-NEXT: ; def v0
1783 ; GFX940-NEXT: ;;#ASMEND
1784 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1785 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1786 ; GFX940-NEXT: ;;#ASMSTART
1787 ; GFX940-NEXT: ; def v3
1788 ; GFX940-NEXT: ;;#ASMEND
1789 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1790 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
1791 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1792 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1793 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1794 %vec0 = call <2 x half> asm "; def $0", "=v"()
1795 %vec1 = call <2 x half> asm "; def $0", "=v"()
1796 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 1, i32 1>
1797 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1801 define void @v_shuffle_v4f16_v2f16__3_2_1_1(ptr addrspace(1) inreg %ptr) {
1802 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_2_1_1:
1804 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1805 ; GFX900-NEXT: ;;#ASMSTART
1806 ; GFX900-NEXT: ; def v0
1807 ; GFX900-NEXT: ;;#ASMEND
1808 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1809 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1810 ; GFX900-NEXT: ;;#ASMSTART
1811 ; GFX900-NEXT: ; def v3
1812 ; GFX900-NEXT: ;;#ASMEND
1813 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1814 ; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16
1815 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1816 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1817 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1819 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_2_1_1:
1821 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1822 ; GFX90A-NEXT: ;;#ASMSTART
1823 ; GFX90A-NEXT: ; def v0
1824 ; GFX90A-NEXT: ;;#ASMEND
1825 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1826 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1827 ; GFX90A-NEXT: ;;#ASMSTART
1828 ; GFX90A-NEXT: ; def v3
1829 ; GFX90A-NEXT: ;;#ASMEND
1830 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1831 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16
1832 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1833 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1834 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1836 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_1_1:
1838 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1839 ; GFX940-NEXT: ;;#ASMSTART
1840 ; GFX940-NEXT: ; def v0
1841 ; GFX940-NEXT: ;;#ASMEND
1842 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1843 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1844 ; GFX940-NEXT: ;;#ASMSTART
1845 ; GFX940-NEXT: ; def v3
1846 ; GFX940-NEXT: ;;#ASMEND
1847 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1848 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16
1849 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1850 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1851 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1852 %vec0 = call <2 x half> asm "; def $0", "=v"()
1853 %vec1 = call <2 x half> asm "; def $0", "=v"()
1854 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 1, i32 1>
1855 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1859 define void @v_shuffle_v4f16_v2f16__3_3_1_1(ptr addrspace(1) inreg %ptr) {
1860 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_1_1:
1862 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1863 ; GFX900-NEXT: ;;#ASMSTART
1864 ; GFX900-NEXT: ; def v0
1865 ; GFX900-NEXT: ;;#ASMEND
1866 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1867 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1868 ; GFX900-NEXT: ;;#ASMSTART
1869 ; GFX900-NEXT: ; def v3
1870 ; GFX900-NEXT: ;;#ASMEND
1871 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
1872 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
1873 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1874 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1875 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1877 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_1_1:
1879 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1880 ; GFX90A-NEXT: ;;#ASMSTART
1881 ; GFX90A-NEXT: ; def v0
1882 ; GFX90A-NEXT: ;;#ASMEND
1883 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1884 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1885 ; GFX90A-NEXT: ;;#ASMSTART
1886 ; GFX90A-NEXT: ; def v3
1887 ; GFX90A-NEXT: ;;#ASMEND
1888 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
1889 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
1890 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1891 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1892 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1894 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_1:
1896 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1897 ; GFX940-NEXT: ;;#ASMSTART
1898 ; GFX940-NEXT: ; def v0
1899 ; GFX940-NEXT: ;;#ASMEND
1900 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1901 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1902 ; GFX940-NEXT: ;;#ASMSTART
1903 ; GFX940-NEXT: ; def v3
1904 ; GFX940-NEXT: ;;#ASMEND
1905 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
1906 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
1907 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1908 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1909 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1910 %vec0 = call <2 x half> asm "; def $0", "=v"()
1911 %vec1 = call <2 x half> asm "; def $0", "=v"()
1912 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
1913 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1917 define void @v_shuffle_v4f16_v2f16__3_3_u_1(ptr addrspace(1) inreg %ptr) {
1918 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_u_1:
1920 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1921 ; GFX900-NEXT: ;;#ASMSTART
1922 ; GFX900-NEXT: ; def v0
1923 ; GFX900-NEXT: ;;#ASMEND
1924 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1925 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1926 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
1927 ; GFX900-NEXT: ;;#ASMSTART
1928 ; GFX900-NEXT: ; def v1
1929 ; GFX900-NEXT: ;;#ASMEND
1930 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1931 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1932 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1934 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_u_1:
1936 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1937 ; GFX90A-NEXT: ;;#ASMSTART
1938 ; GFX90A-NEXT: ; def v0
1939 ; GFX90A-NEXT: ;;#ASMEND
1940 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1941 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1942 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
1943 ; GFX90A-NEXT: ;;#ASMSTART
1944 ; GFX90A-NEXT: ; def v1
1945 ; GFX90A-NEXT: ;;#ASMEND
1946 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1947 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1948 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1950 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_1:
1952 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1953 ; GFX940-NEXT: ;;#ASMSTART
1954 ; GFX940-NEXT: ; def v0
1955 ; GFX940-NEXT: ;;#ASMEND
1956 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1957 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1958 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
1959 ; GFX940-NEXT: ;;#ASMSTART
1960 ; GFX940-NEXT: ; def v1
1961 ; GFX940-NEXT: ;;#ASMEND
1962 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1963 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1964 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1965 %vec0 = call <2 x half> asm "; def $0", "=v"()
1966 %vec1 = call <2 x half> asm "; def $0", "=v"()
1967 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
1968 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
1972 define void @v_shuffle_v4f16_v2f16__3_3_0_1(ptr addrspace(1) inreg %ptr) {
1973 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_0_1:
1975 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1976 ; GFX900-NEXT: ;;#ASMSTART
1977 ; GFX900-NEXT: ; def v0
1978 ; GFX900-NEXT: ;;#ASMEND
1979 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1980 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1981 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
1982 ; GFX900-NEXT: ;;#ASMSTART
1983 ; GFX900-NEXT: ; def v1
1984 ; GFX900-NEXT: ;;#ASMEND
1985 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
1986 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1987 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1989 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_0_1:
1991 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992 ; GFX90A-NEXT: ;;#ASMSTART
1993 ; GFX90A-NEXT: ; def v0
1994 ; GFX90A-NEXT: ;;#ASMEND
1995 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1996 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1997 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
1998 ; GFX90A-NEXT: ;;#ASMSTART
1999 ; GFX90A-NEXT: ; def v1
2000 ; GFX90A-NEXT: ;;#ASMEND
2001 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2002 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2003 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2005 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_1:
2007 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008 ; GFX940-NEXT: ;;#ASMSTART
2009 ; GFX940-NEXT: ; def v0
2010 ; GFX940-NEXT: ;;#ASMEND
2011 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2012 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2013 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
2014 ; GFX940-NEXT: ;;#ASMSTART
2015 ; GFX940-NEXT: ; def v1
2016 ; GFX940-NEXT: ;;#ASMEND
2017 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2018 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2019 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2020 %vec0 = call <2 x half> asm "; def $0", "=v"()
2021 %vec1 = call <2 x half> asm "; def $0", "=v"()
2022 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 1>
2023 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2027 define void @v_shuffle_v4f16_v2f16__3_3_2_1(ptr addrspace(1) inreg %ptr) {
2028 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_2_1:
2030 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2031 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2032 ; GFX900-NEXT: ;;#ASMSTART
2033 ; GFX900-NEXT: ; def v0
2034 ; GFX900-NEXT: ;;#ASMEND
2035 ; GFX900-NEXT: ;;#ASMSTART
2036 ; GFX900-NEXT: ; def v3
2037 ; GFX900-NEXT: ;;#ASMEND
2038 ; GFX900-NEXT: v_bfi_b32 v1, s4, v3, v0
2039 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2040 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2041 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
2042 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2043 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2044 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2046 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_2_1:
2048 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2049 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2050 ; GFX90A-NEXT: ;;#ASMSTART
2051 ; GFX90A-NEXT: ; def v0
2052 ; GFX90A-NEXT: ;;#ASMEND
2053 ; GFX90A-NEXT: ;;#ASMSTART
2054 ; GFX90A-NEXT: ; def v3
2055 ; GFX90A-NEXT: ;;#ASMEND
2056 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0
2057 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2058 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2059 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
2060 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2061 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2062 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2064 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_1:
2066 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2067 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2068 ; GFX940-NEXT: ;;#ASMSTART
2069 ; GFX940-NEXT: ; def v0
2070 ; GFX940-NEXT: ;;#ASMEND
2071 ; GFX940-NEXT: ;;#ASMSTART
2072 ; GFX940-NEXT: ; def v3
2073 ; GFX940-NEXT: ;;#ASMEND
2074 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2075 ; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0
2076 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2077 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
2078 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2079 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2080 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2081 %vec0 = call <2 x half> asm "; def $0", "=v"()
2082 %vec1 = call <2 x half> asm "; def $0", "=v"()
2083 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 1>
2084 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2088 define void @v_shuffle_v4f16_v2f16__u_2_2_2(ptr addrspace(1) inreg %ptr) {
2089 ; GFX9-LABEL: v_shuffle_v4f16_v2f16__u_2_2_2:
2091 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2092 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2093 %vec0 = call <2 x half> asm "; def $0", "=v"()
2094 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
2095 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2099 define void @v_shuffle_v4f16_v2f16__0_2_2_2(ptr addrspace(1) inreg %ptr) {
2100 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__0_2_2_2:
2102 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2103 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
2104 ; GFX900-NEXT: ;;#ASMSTART
2105 ; GFX900-NEXT: ; def v0
2106 ; GFX900-NEXT: ;;#ASMEND
2107 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
2108 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2109 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2111 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__0_2_2_2:
2113 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2114 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
2115 ; GFX90A-NEXT: ;;#ASMSTART
2116 ; GFX90A-NEXT: ; def v0
2117 ; GFX90A-NEXT: ;;#ASMEND
2118 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
2119 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2120 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2122 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_2_2_2:
2124 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2125 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
2126 ; GFX940-NEXT: ;;#ASMSTART
2127 ; GFX940-NEXT: ; def v0
2128 ; GFX940-NEXT: ;;#ASMEND
2129 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
2130 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2131 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2132 %vec0 = call <2 x half> asm "; def $0", "=v"()
2133 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
2134 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2138 define void @v_shuffle_v4f16_v2f16__1_2_2_2(ptr addrspace(1) inreg %ptr) {
2139 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__1_2_2_2:
2141 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2142 ; GFX900-NEXT: ;;#ASMSTART
2143 ; GFX900-NEXT: ; def v0
2144 ; GFX900-NEXT: ;;#ASMEND
2145 ; GFX900-NEXT: v_mov_b32_e32 v1, 0
2146 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
2147 ; GFX900-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
2148 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2149 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2151 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__1_2_2_2:
2153 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2154 ; GFX90A-NEXT: ;;#ASMSTART
2155 ; GFX90A-NEXT: ; def v0
2156 ; GFX90A-NEXT: ;;#ASMEND
2157 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0
2158 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
2159 ; GFX90A-NEXT: global_store_dwordx2 v1, v[0:1], s[16:17]
2160 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2161 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2163 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_2_2_2:
2165 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2166 ; GFX940-NEXT: ;;#ASMSTART
2167 ; GFX940-NEXT: ; def v0
2168 ; GFX940-NEXT: ;;#ASMEND
2169 ; GFX940-NEXT: v_mov_b32_e32 v1, 0
2170 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
2171 ; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1
2172 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2173 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2174 %vec0 = call <2 x half> asm "; def $0", "=v"()
2175 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
2176 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2180 define void @v_shuffle_v4f16_v2f16__2_2_2_2(ptr addrspace(1) inreg %ptr) {
2181 ; GFX9-LABEL: v_shuffle_v4f16_v2f16__2_2_2_2:
2183 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2184 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2185 %vec0 = call <2 x half> asm "; def $0", "=v"()
2186 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
2187 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2191 define void @v_shuffle_v4f16_v2f16__3_2_2_2(ptr addrspace(1) inreg %ptr) {
2192 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_2_2_2:
2194 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195 ; GFX900-NEXT: ;;#ASMSTART
2196 ; GFX900-NEXT: ; def v0
2197 ; GFX900-NEXT: ;;#ASMEND
2198 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2199 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2200 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
2201 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16
2202 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2203 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2204 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2206 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_2_2_2:
2208 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2209 ; GFX90A-NEXT: ;;#ASMSTART
2210 ; GFX90A-NEXT: ; def v0
2211 ; GFX90A-NEXT: ;;#ASMEND
2212 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2213 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2214 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
2215 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16
2216 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2217 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2218 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2220 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_2_2:
2222 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2223 ; GFX940-NEXT: ;;#ASMSTART
2224 ; GFX940-NEXT: ; def v0
2225 ; GFX940-NEXT: ;;#ASMEND
2226 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2227 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2228 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
2229 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16
2230 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2231 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2232 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2233 %vec0 = call <2 x half> asm "; def $0", "=v"()
2234 %vec1 = call <2 x half> asm "; def $0", "=v"()
2235 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
2236 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2240 define void @v_shuffle_v4f16_v2f16__3_u_2_2(ptr addrspace(1) inreg %ptr) {
2241 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_u_2_2:
2243 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244 ; GFX900-NEXT: ;;#ASMSTART
2245 ; GFX900-NEXT: ; def v1
2246 ; GFX900-NEXT: ;;#ASMEND
2247 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
2248 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2249 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2250 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
2251 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2252 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2253 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2255 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_u_2_2:
2257 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2258 ; GFX90A-NEXT: ;;#ASMSTART
2259 ; GFX90A-NEXT: ; def v1
2260 ; GFX90A-NEXT: ;;#ASMEND
2261 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
2262 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2263 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2264 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
2265 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2266 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2267 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2269 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_2_2:
2271 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2272 ; GFX940-NEXT: ;;#ASMSTART
2273 ; GFX940-NEXT: ; def v1
2274 ; GFX940-NEXT: ;;#ASMEND
2275 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2276 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2277 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
2278 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
2279 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2280 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2281 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2282 %vec0 = call <2 x half> asm "; def $0", "=v"()
2283 %vec1 = call <2 x half> asm "; def $0", "=v"()
2284 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
2285 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2289 define void @v_shuffle_v4f16_v2f16__3_0_2_2(ptr addrspace(1) inreg %ptr) {
2290 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_0_2_2:
2292 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2293 ; GFX900-NEXT: ;;#ASMSTART
2294 ; GFX900-NEXT: ; def v0
2295 ; GFX900-NEXT: ;;#ASMEND
2296 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2297 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2298 ; GFX900-NEXT: ;;#ASMSTART
2299 ; GFX900-NEXT: ; def v3
2300 ; GFX900-NEXT: ;;#ASMEND
2301 ; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4
2302 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16
2303 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2304 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2305 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2307 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_0_2_2:
2309 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310 ; GFX90A-NEXT: ;;#ASMSTART
2311 ; GFX90A-NEXT: ; def v0
2312 ; GFX90A-NEXT: ;;#ASMEND
2313 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2314 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2315 ; GFX90A-NEXT: ;;#ASMSTART
2316 ; GFX90A-NEXT: ; def v3
2317 ; GFX90A-NEXT: ;;#ASMEND
2318 ; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4
2319 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
2320 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2321 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2322 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2324 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_2_2:
2326 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2327 ; GFX940-NEXT: ;;#ASMSTART
2328 ; GFX940-NEXT: ; def v0
2329 ; GFX940-NEXT: ;;#ASMEND
2330 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2331 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2332 ; GFX940-NEXT: ;;#ASMSTART
2333 ; GFX940-NEXT: ; def v3
2334 ; GFX940-NEXT: ;;#ASMEND
2335 ; GFX940-NEXT: s_nop 0
2336 ; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2
2337 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
2338 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2339 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2340 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2341 %vec0 = call <2 x half> asm "; def $0", "=v"()
2342 %vec1 = call <2 x half> asm "; def $0", "=v"()
2343 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 2, i32 2>
2344 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2348 define void @v_shuffle_v4f16_v2f16__3_1_2_2(ptr addrspace(1) inreg %ptr) {
2349 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_1_2_2:
2351 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2352 ; GFX900-NEXT: ;;#ASMSTART
2353 ; GFX900-NEXT: ; def v0
2354 ; GFX900-NEXT: ;;#ASMEND
2355 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2356 ; GFX900-NEXT: ;;#ASMSTART
2357 ; GFX900-NEXT: ; def v1
2358 ; GFX900-NEXT: ;;#ASMEND
2359 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
2360 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2361 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2362 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
2363 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2364 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2365 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2367 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_1_2_2:
2369 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370 ; GFX90A-NEXT: ;;#ASMSTART
2371 ; GFX90A-NEXT: ; def v0
2372 ; GFX90A-NEXT: ;;#ASMEND
2373 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2374 ; GFX90A-NEXT: ;;#ASMSTART
2375 ; GFX90A-NEXT: ; def v1
2376 ; GFX90A-NEXT: ;;#ASMEND
2377 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
2378 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2379 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2380 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
2381 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2382 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2383 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2385 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_2_2:
2387 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2388 ; GFX940-NEXT: ;;#ASMSTART
2389 ; GFX940-NEXT: ; def v0
2390 ; GFX940-NEXT: ;;#ASMEND
2391 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2392 ; GFX940-NEXT: ;;#ASMSTART
2393 ; GFX940-NEXT: ; def v1
2394 ; GFX940-NEXT: ;;#ASMEND
2395 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2396 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
2397 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2398 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
2399 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2400 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2401 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2402 %vec0 = call <2 x half> asm "; def $0", "=v"()
2403 %vec1 = call <2 x half> asm "; def $0", "=v"()
2404 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
2405 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2409 define void @v_shuffle_v4f16_v2f16__3_3_2_2(ptr addrspace(1) inreg %ptr) {
2410 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_2_2:
2412 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2413 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2414 ; GFX900-NEXT: ;;#ASMSTART
2415 ; GFX900-NEXT: ; def v1
2416 ; GFX900-NEXT: ;;#ASMEND
2417 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
2418 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2419 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2420 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
2421 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2422 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2423 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2425 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_2_2:
2427 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2428 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2429 ; GFX90A-NEXT: ;;#ASMSTART
2430 ; GFX90A-NEXT: ; def v1
2431 ; GFX90A-NEXT: ;;#ASMEND
2432 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
2433 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2434 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2435 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
2436 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2437 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2438 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2440 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_2:
2442 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2443 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2444 ; GFX940-NEXT: ;;#ASMSTART
2445 ; GFX940-NEXT: ; def v1
2446 ; GFX940-NEXT: ;;#ASMEND
2447 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2448 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
2449 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2450 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
2451 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2452 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2453 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2454 %vec0 = call <2 x half> asm "; def $0", "=v"()
2455 %vec1 = call <2 x half> asm "; def $0", "=v"()
2456 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 2>
2457 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2461 define void @v_shuffle_v4f16_v2f16__3_3_u_2(ptr addrspace(1) inreg %ptr) {
2462 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_u_2:
2464 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2465 ; GFX900-NEXT: ;;#ASMSTART
2466 ; GFX900-NEXT: ; def v1
2467 ; GFX900-NEXT: ;;#ASMEND
2468 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2469 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2470 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
2471 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2472 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2473 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2474 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2476 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_u_2:
2478 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2479 ; GFX90A-NEXT: ;;#ASMSTART
2480 ; GFX90A-NEXT: ; def v1
2481 ; GFX90A-NEXT: ;;#ASMEND
2482 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2483 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2484 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
2485 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2486 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2487 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2488 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2490 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_2:
2492 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2493 ; GFX940-NEXT: ;;#ASMSTART
2494 ; GFX940-NEXT: ; def v1
2495 ; GFX940-NEXT: ;;#ASMEND
2496 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2497 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2498 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
2499 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2500 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2501 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2502 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2503 %vec0 = call <2 x half> asm "; def $0", "=v"()
2504 %vec1 = call <2 x half> asm "; def $0", "=v"()
2505 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 2>
2506 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2510 define void @v_shuffle_v4f16_v2f16__3_3_0_2(ptr addrspace(1) inreg %ptr) {
2511 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_0_2:
2513 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2514 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2515 ; GFX900-NEXT: ;;#ASMSTART
2516 ; GFX900-NEXT: ; def v0
2517 ; GFX900-NEXT: ;;#ASMEND
2518 ; GFX900-NEXT: ;;#ASMSTART
2519 ; GFX900-NEXT: ; def v3
2520 ; GFX900-NEXT: ;;#ASMEND
2521 ; GFX900-NEXT: v_perm_b32 v1, v3, v0, s4
2522 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2523 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2524 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
2525 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2526 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2527 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2529 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_0_2:
2531 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2532 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2533 ; GFX90A-NEXT: ;;#ASMSTART
2534 ; GFX90A-NEXT: ; def v0
2535 ; GFX90A-NEXT: ;;#ASMEND
2536 ; GFX90A-NEXT: ;;#ASMSTART
2537 ; GFX90A-NEXT: ; def v3
2538 ; GFX90A-NEXT: ;;#ASMEND
2539 ; GFX90A-NEXT: v_perm_b32 v1, v3, v0, s4
2540 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2541 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2542 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
2543 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2544 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2545 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2547 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_2:
2549 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2550 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2551 ; GFX940-NEXT: ;;#ASMSTART
2552 ; GFX940-NEXT: ; def v0
2553 ; GFX940-NEXT: ;;#ASMEND
2554 ; GFX940-NEXT: ;;#ASMSTART
2555 ; GFX940-NEXT: ; def v3
2556 ; GFX940-NEXT: ;;#ASMEND
2557 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2558 ; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2
2559 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2560 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
2561 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2562 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2563 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2564 %vec0 = call <2 x half> asm "; def $0", "=v"()
2565 %vec1 = call <2 x half> asm "; def $0", "=v"()
2566 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 2>
2567 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2571 define void @v_shuffle_v4f16_v2f16__3_3_1_2(ptr addrspace(1) inreg %ptr) {
2572 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_1_2:
2574 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2575 ; GFX900-NEXT: ;;#ASMSTART
2576 ; GFX900-NEXT: ; def v1
2577 ; GFX900-NEXT: ;;#ASMEND
2578 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2579 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2580 ; GFX900-NEXT: ;;#ASMSTART
2581 ; GFX900-NEXT: ; def v3
2582 ; GFX900-NEXT: ;;#ASMEND
2583 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
2584 ; GFX900-NEXT: v_alignbit_b32 v1, v3, v1, 16
2585 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2586 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2587 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2589 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_1_2:
2591 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2592 ; GFX90A-NEXT: ;;#ASMSTART
2593 ; GFX90A-NEXT: ; def v1
2594 ; GFX90A-NEXT: ;;#ASMEND
2595 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2596 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2597 ; GFX90A-NEXT: ;;#ASMSTART
2598 ; GFX90A-NEXT: ; def v3
2599 ; GFX90A-NEXT: ;;#ASMEND
2600 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
2601 ; GFX90A-NEXT: v_alignbit_b32 v1, v3, v1, 16
2602 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2603 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2604 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2606 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_2:
2608 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2609 ; GFX940-NEXT: ;;#ASMSTART
2610 ; GFX940-NEXT: ; def v1
2611 ; GFX940-NEXT: ;;#ASMEND
2612 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2613 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2614 ; GFX940-NEXT: ;;#ASMSTART
2615 ; GFX940-NEXT: ; def v3
2616 ; GFX940-NEXT: ;;#ASMEND
2617 ; GFX940-NEXT: s_nop 0
2618 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
2619 ; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16
2620 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2621 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2622 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2623 %vec0 = call <2 x half> asm "; def $0", "=v"()
2624 %vec1 = call <2 x half> asm "; def $0", "=v"()
2625 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 2>
2626 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2630 define void @v_shuffle_v4f16_v2f16__u_3_3_3(ptr addrspace(1) inreg %ptr) {
2631 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__u_3_3_3:
2633 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2634 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2635 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2636 ; GFX900-NEXT: ;;#ASMSTART
2637 ; GFX900-NEXT: ; def v0
2638 ; GFX900-NEXT: ;;#ASMEND
2639 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
2640 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2641 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2642 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2644 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__u_3_3_3:
2646 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2647 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2648 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2649 ; GFX90A-NEXT: ;;#ASMSTART
2650 ; GFX90A-NEXT: ; def v0
2651 ; GFX90A-NEXT: ;;#ASMEND
2652 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
2653 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2654 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2655 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2657 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__u_3_3_3:
2659 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2660 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2661 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2662 ; GFX940-NEXT: ;;#ASMSTART
2663 ; GFX940-NEXT: ; def v0
2664 ; GFX940-NEXT: ;;#ASMEND
2665 ; GFX940-NEXT: s_nop 0
2666 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
2667 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2668 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2669 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2670 %vec0 = call <2 x half> asm "; def $0", "=v"()
2671 %vec1 = call <2 x half> asm "; def $0", "=v"()
2672 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
2673 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2677 define void @v_shuffle_v4f16_v2f16__0_3_3_3(ptr addrspace(1) inreg %ptr) {
2678 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__0_3_3_3:
2680 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2681 ; GFX900-NEXT: ;;#ASMSTART
2682 ; GFX900-NEXT: ; def v0
2683 ; GFX900-NEXT: ;;#ASMEND
2684 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2685 ; GFX900-NEXT: ;;#ASMSTART
2686 ; GFX900-NEXT: ; def v1
2687 ; GFX900-NEXT: ;;#ASMEND
2688 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
2689 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2690 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2691 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
2692 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2693 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2694 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2696 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__0_3_3_3:
2698 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2699 ; GFX90A-NEXT: ;;#ASMSTART
2700 ; GFX90A-NEXT: ; def v0
2701 ; GFX90A-NEXT: ;;#ASMEND
2702 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2703 ; GFX90A-NEXT: ;;#ASMSTART
2704 ; GFX90A-NEXT: ; def v1
2705 ; GFX90A-NEXT: ;;#ASMEND
2706 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
2707 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2708 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2709 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
2710 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2711 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2712 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2714 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_3_3_3:
2716 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2717 ; GFX940-NEXT: ;;#ASMSTART
2718 ; GFX940-NEXT: ; def v0
2719 ; GFX940-NEXT: ;;#ASMEND
2720 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2721 ; GFX940-NEXT: ;;#ASMSTART
2722 ; GFX940-NEXT: ; def v1
2723 ; GFX940-NEXT: ;;#ASMEND
2724 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2725 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
2726 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2727 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
2728 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2729 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2730 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2731 %vec0 = call <2 x half> asm "; def $0", "=v"()
2732 %vec1 = call <2 x half> asm "; def $0", "=v"()
2733 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
2734 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2738 define void @v_shuffle_v4f16_v2f16__1_3_3_3(ptr addrspace(1) inreg %ptr) {
2739 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__1_3_3_3:
2741 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2742 ; GFX900-NEXT: ;;#ASMSTART
2743 ; GFX900-NEXT: ; def v0
2744 ; GFX900-NEXT: ;;#ASMEND
2745 ; GFX900-NEXT: ;;#ASMSTART
2746 ; GFX900-NEXT: ; def v1
2747 ; GFX900-NEXT: ;;#ASMEND
2748 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2749 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2750 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
2751 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
2752 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2753 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2754 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2756 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__1_3_3_3:
2758 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2759 ; GFX90A-NEXT: ;;#ASMSTART
2760 ; GFX90A-NEXT: ; def v0
2761 ; GFX90A-NEXT: ;;#ASMEND
2762 ; GFX90A-NEXT: ;;#ASMSTART
2763 ; GFX90A-NEXT: ; def v1
2764 ; GFX90A-NEXT: ;;#ASMEND
2765 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2766 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2767 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
2768 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
2769 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2770 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2771 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2773 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_3_3_3:
2775 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2776 ; GFX940-NEXT: ;;#ASMSTART
2777 ; GFX940-NEXT: ; def v0
2778 ; GFX940-NEXT: ;;#ASMEND
2779 ; GFX940-NEXT: ;;#ASMSTART
2780 ; GFX940-NEXT: ; def v1
2781 ; GFX940-NEXT: ;;#ASMEND
2782 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2783 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2784 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
2785 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
2786 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2787 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2788 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2789 %vec0 = call <2 x half> asm "; def $0", "=v"()
2790 %vec1 = call <2 x half> asm "; def $0", "=v"()
2791 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
2792 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2796 define void @v_shuffle_v4f16_v2f16__2_3_3_3(ptr addrspace(1) inreg %ptr) {
2797 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__2_3_3_3:
2799 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2800 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2801 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2802 ; GFX900-NEXT: ;;#ASMSTART
2803 ; GFX900-NEXT: ; def v0
2804 ; GFX900-NEXT: ;;#ASMEND
2805 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
2806 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2807 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2808 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2810 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__2_3_3_3:
2812 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2813 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2814 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2815 ; GFX90A-NEXT: ;;#ASMSTART
2816 ; GFX90A-NEXT: ; def v0
2817 ; GFX90A-NEXT: ;;#ASMEND
2818 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
2819 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2820 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2821 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2823 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__2_3_3_3:
2825 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2826 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2827 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2828 ; GFX940-NEXT: ;;#ASMSTART
2829 ; GFX940-NEXT: ; def v0
2830 ; GFX940-NEXT: ;;#ASMEND
2831 ; GFX940-NEXT: s_nop 0
2832 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
2833 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2834 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2835 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2836 %vec0 = call <2 x half> asm "; def $0", "=v"()
2837 %vec1 = call <2 x half> asm "; def $0", "=v"()
2838 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
2839 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2843 define void @v_shuffle_v4f16_v2f16__3_u_3_3(ptr addrspace(1) inreg %ptr) {
2844 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_u_3_3:
2846 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2847 ; GFX900-NEXT: ;;#ASMSTART
2848 ; GFX900-NEXT: ; def v0
2849 ; GFX900-NEXT: ;;#ASMEND
2850 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2851 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2852 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
2853 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
2854 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2855 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2856 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2858 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_u_3_3:
2860 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2861 ; GFX90A-NEXT: ;;#ASMSTART
2862 ; GFX90A-NEXT: ; def v0
2863 ; GFX90A-NEXT: ;;#ASMEND
2864 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2865 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2866 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
2867 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
2868 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2869 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2870 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2872 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_3_3:
2874 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2875 ; GFX940-NEXT: ;;#ASMSTART
2876 ; GFX940-NEXT: ; def v0
2877 ; GFX940-NEXT: ;;#ASMEND
2878 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2879 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2880 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
2881 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
2882 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2883 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2884 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2885 %vec0 = call <2 x half> asm "; def $0", "=v"()
2886 %vec1 = call <2 x half> asm "; def $0", "=v"()
2887 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
2888 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2892 define void @v_shuffle_v4f16_v2f16__3_0_3_3(ptr addrspace(1) inreg %ptr) {
2893 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_0_3_3:
2895 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2896 ; GFX900-NEXT: ;;#ASMSTART
2897 ; GFX900-NEXT: ; def v0
2898 ; GFX900-NEXT: ;;#ASMEND
2899 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2900 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2901 ; GFX900-NEXT: ;;#ASMSTART
2902 ; GFX900-NEXT: ; def v3
2903 ; GFX900-NEXT: ;;#ASMEND
2904 ; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4
2905 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16
2906 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2907 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2908 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2910 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_0_3_3:
2912 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2913 ; GFX90A-NEXT: ;;#ASMSTART
2914 ; GFX90A-NEXT: ; def v0
2915 ; GFX90A-NEXT: ;;#ASMEND
2916 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2917 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2918 ; GFX90A-NEXT: ;;#ASMSTART
2919 ; GFX90A-NEXT: ; def v3
2920 ; GFX90A-NEXT: ;;#ASMEND
2921 ; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4
2922 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
2923 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2924 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2925 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2927 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_3_3:
2929 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2930 ; GFX940-NEXT: ;;#ASMSTART
2931 ; GFX940-NEXT: ; def v0
2932 ; GFX940-NEXT: ;;#ASMEND
2933 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2934 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2935 ; GFX940-NEXT: ;;#ASMSTART
2936 ; GFX940-NEXT: ; def v3
2937 ; GFX940-NEXT: ;;#ASMEND
2938 ; GFX940-NEXT: s_nop 0
2939 ; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2
2940 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
2941 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2942 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2943 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2944 %vec0 = call <2 x half> asm "; def $0", "=v"()
2945 %vec1 = call <2 x half> asm "; def $0", "=v"()
2946 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 3, i32 3>
2947 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
2951 define void @v_shuffle_v4f16_v2f16__3_1_3_3(ptr addrspace(1) inreg %ptr) {
2952 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_1_3_3:
2954 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2955 ; GFX900-NEXT: ;;#ASMSTART
2956 ; GFX900-NEXT: ; def v0
2957 ; GFX900-NEXT: ;;#ASMEND
2958 ; GFX900-NEXT: ;;#ASMSTART
2959 ; GFX900-NEXT: ; def v1
2960 ; GFX900-NEXT: ;;#ASMEND
2961 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2962 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2963 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
2964 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
2965 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2966 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2967 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2969 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_1_3_3:
2971 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2972 ; GFX90A-NEXT: ;;#ASMSTART
2973 ; GFX90A-NEXT: ; def v0
2974 ; GFX90A-NEXT: ;;#ASMEND
2975 ; GFX90A-NEXT: ;;#ASMSTART
2976 ; GFX90A-NEXT: ; def v1
2977 ; GFX90A-NEXT: ;;#ASMEND
2978 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2979 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2980 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
2981 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
2982 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
2983 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2984 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2986 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_3_3:
2988 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2989 ; GFX940-NEXT: ;;#ASMSTART
2990 ; GFX940-NEXT: ; def v0
2991 ; GFX940-NEXT: ;;#ASMEND
2992 ; GFX940-NEXT: ;;#ASMSTART
2993 ; GFX940-NEXT: ; def v1
2994 ; GFX940-NEXT: ;;#ASMEND
2995 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2996 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2997 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
2998 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
2999 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3000 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3001 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3002 %vec0 = call <2 x half> asm "; def $0", "=v"()
3003 %vec1 = call <2 x half> asm "; def $0", "=v"()
3004 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 3, i32 3>
3005 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
3009 define void @v_shuffle_v4f16_v2f16__3_2_3_3(ptr addrspace(1) inreg %ptr) {
3010 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_2_3_3:
3012 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3013 ; GFX900-NEXT: ;;#ASMSTART
3014 ; GFX900-NEXT: ; def v0
3015 ; GFX900-NEXT: ;;#ASMEND
3016 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3017 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3018 ; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4
3019 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16
3020 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3021 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3022 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3024 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_2_3_3:
3026 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3027 ; GFX90A-NEXT: ;;#ASMSTART
3028 ; GFX90A-NEXT: ; def v0
3029 ; GFX90A-NEXT: ;;#ASMEND
3030 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3031 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3032 ; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4
3033 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16
3034 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3035 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3036 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3038 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_3_3:
3040 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3041 ; GFX940-NEXT: ;;#ASMSTART
3042 ; GFX940-NEXT: ; def v0
3043 ; GFX940-NEXT: ;;#ASMEND
3044 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3045 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3046 ; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2
3047 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16
3048 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3049 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3050 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3051 %vec0 = call <2 x half> asm "; def $0", "=v"()
3052 %vec1 = call <2 x half> asm "; def $0", "=v"()
3053 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
3054 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
3058 define void @v_shuffle_v4f16_v2f16__3_3_u_3(ptr addrspace(1) inreg %ptr) {
3059 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_u_3:
3061 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3062 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3063 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3064 ; GFX900-NEXT: ;;#ASMSTART
3065 ; GFX900-NEXT: ; def v1
3066 ; GFX900-NEXT: ;;#ASMEND
3067 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
3068 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3069 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3070 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3072 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_u_3:
3074 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3075 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3076 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3077 ; GFX90A-NEXT: ;;#ASMSTART
3078 ; GFX90A-NEXT: ; def v1
3079 ; GFX90A-NEXT: ;;#ASMEND
3080 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
3081 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3082 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3083 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3085 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_3:
3087 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3088 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3089 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3090 ; GFX940-NEXT: ;;#ASMSTART
3091 ; GFX940-NEXT: ; def v1
3092 ; GFX940-NEXT: ;;#ASMEND
3093 ; GFX940-NEXT: s_nop 0
3094 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
3095 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3096 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3097 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3098 %vec0 = call <2 x half> asm "; def $0", "=v"()
3099 %vec1 = call <2 x half> asm "; def $0", "=v"()
3100 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 3>
3101 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
3105 define void @v_shuffle_v4f16_v2f16__3_3_0_3(ptr addrspace(1) inreg %ptr) {
3106 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_0_3:
3108 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3109 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3110 ; GFX900-NEXT: ;;#ASMSTART
3111 ; GFX900-NEXT: ; def v0
3112 ; GFX900-NEXT: ;;#ASMEND
3113 ; GFX900-NEXT: ;;#ASMSTART
3114 ; GFX900-NEXT: ; def v3
3115 ; GFX900-NEXT: ;;#ASMEND
3116 ; GFX900-NEXT: v_bfi_b32 v1, s4, v0, v3
3117 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3118 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3119 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
3120 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3121 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3122 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3124 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_0_3:
3126 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3127 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3128 ; GFX90A-NEXT: ;;#ASMSTART
3129 ; GFX90A-NEXT: ; def v0
3130 ; GFX90A-NEXT: ;;#ASMEND
3131 ; GFX90A-NEXT: ;;#ASMSTART
3132 ; GFX90A-NEXT: ; def v3
3133 ; GFX90A-NEXT: ;;#ASMEND
3134 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v0, v3
3135 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3136 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3137 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
3138 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3139 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3140 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3142 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_3:
3144 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3145 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3146 ; GFX940-NEXT: ;;#ASMSTART
3147 ; GFX940-NEXT: ; def v0
3148 ; GFX940-NEXT: ;;#ASMEND
3149 ; GFX940-NEXT: ;;#ASMSTART
3150 ; GFX940-NEXT: ; def v3
3151 ; GFX940-NEXT: ;;#ASMEND
3152 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3153 ; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3
3154 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3155 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
3156 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3157 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3158 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3159 %vec0 = call <2 x half> asm "; def $0", "=v"()
3160 %vec1 = call <2 x half> asm "; def $0", "=v"()
3161 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 3>
3162 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
3166 define void @v_shuffle_v4f16_v2f16__3_3_1_3(ptr addrspace(1) inreg %ptr) {
3167 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_1_3:
3169 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3170 ; GFX900-NEXT: ;;#ASMSTART
3171 ; GFX900-NEXT: ; def v0
3172 ; GFX900-NEXT: ;;#ASMEND
3173 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3174 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3175 ; GFX900-NEXT: ;;#ASMSTART
3176 ; GFX900-NEXT: ; def v3
3177 ; GFX900-NEXT: ;;#ASMEND
3178 ; GFX900-NEXT: v_perm_b32 v1, v3, v0, s4
3179 ; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4
3180 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3181 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3182 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3184 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_1_3:
3186 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3187 ; GFX90A-NEXT: ;;#ASMSTART
3188 ; GFX90A-NEXT: ; def v0
3189 ; GFX90A-NEXT: ;;#ASMEND
3190 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3191 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3192 ; GFX90A-NEXT: ;;#ASMSTART
3193 ; GFX90A-NEXT: ; def v3
3194 ; GFX90A-NEXT: ;;#ASMEND
3195 ; GFX90A-NEXT: v_perm_b32 v1, v3, v0, s4
3196 ; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4
3197 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3198 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3199 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3201 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_3:
3203 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3204 ; GFX940-NEXT: ;;#ASMSTART
3205 ; GFX940-NEXT: ; def v0
3206 ; GFX940-NEXT: ;;#ASMEND
3207 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3208 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3209 ; GFX940-NEXT: ;;#ASMSTART
3210 ; GFX940-NEXT: ; def v3
3211 ; GFX940-NEXT: ;;#ASMEND
3212 ; GFX940-NEXT: s_nop 0
3213 ; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2
3214 ; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2
3215 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3216 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3217 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3218 %vec0 = call <2 x half> asm "; def $0", "=v"()
3219 %vec1 = call <2 x half> asm "; def $0", "=v"()
3220 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 3>
3221 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
3225 define void @v_shuffle_v4f16_v2f16__3_3_2_3(ptr addrspace(1) inreg %ptr) {
3226 ; GFX900-LABEL: v_shuffle_v4f16_v2f16__3_3_2_3:
3228 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3229 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3230 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3231 ; GFX900-NEXT: ;;#ASMSTART
3232 ; GFX900-NEXT: ; def v1
3233 ; GFX900-NEXT: ;;#ASMEND
3234 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
3235 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3236 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3237 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3239 ; GFX90A-LABEL: v_shuffle_v4f16_v2f16__3_3_2_3:
3241 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3242 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3243 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3244 ; GFX90A-NEXT: ;;#ASMSTART
3245 ; GFX90A-NEXT: ; def v1
3246 ; GFX90A-NEXT: ;;#ASMEND
3247 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
3248 ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17]
3249 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3250 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3252 ; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_3:
3254 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3255 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3256 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3257 ; GFX940-NEXT: ;;#ASMSTART
3258 ; GFX940-NEXT: ; def v1
3259 ; GFX940-NEXT: ;;#ASMEND
3260 ; GFX940-NEXT: s_nop 0
3261 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
3262 ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3263 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3264 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3265 %vec0 = call <2 x half> asm "; def $0", "=v"()
3266 %vec1 = call <2 x half> asm "; def $0", "=v"()
3267 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 3>
3268 store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8
3272 define void @s_shuffle_v4f16_v2f16__u_u_u_u() {
3273 ; GFX9-LABEL: s_shuffle_v4f16_v2f16__u_u_u_u:
3275 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3276 ; GFX9-NEXT: ;;#ASMSTART
3277 ; GFX9-NEXT: ; use s[8:9]
3278 ; GFX9-NEXT: ;;#ASMEND
3279 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3280 %vec0 = call <2 x half> asm "; def $0", "=s"()
3281 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> poison
3282 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3286 define void @s_shuffle_v4f16_v2f16__0_u_u_u() {
3287 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__0_u_u_u:
3289 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3290 ; GFX900-NEXT: ;;#ASMSTART
3291 ; GFX900-NEXT: ; def s8
3292 ; GFX900-NEXT: ;;#ASMEND
3293 ; GFX900-NEXT: ;;#ASMSTART
3294 ; GFX900-NEXT: ; use s[8:9]
3295 ; GFX900-NEXT: ;;#ASMEND
3296 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3298 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__0_u_u_u:
3300 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3301 ; GFX90A-NEXT: ;;#ASMSTART
3302 ; GFX90A-NEXT: ; def s8
3303 ; GFX90A-NEXT: ;;#ASMEND
3304 ; GFX90A-NEXT: ;;#ASMSTART
3305 ; GFX90A-NEXT: ; use s[8:9]
3306 ; GFX90A-NEXT: ;;#ASMEND
3307 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3309 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_u_u_u:
3311 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3312 ; GFX940-NEXT: ;;#ASMSTART
3313 ; GFX940-NEXT: ; def s8
3314 ; GFX940-NEXT: ;;#ASMEND
3315 ; GFX940-NEXT: s_nop 0
3316 ; GFX940-NEXT: ;;#ASMSTART
3317 ; GFX940-NEXT: ; use s[8:9]
3318 ; GFX940-NEXT: ;;#ASMEND
3319 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3320 %vec0 = call <2 x half> asm "; def $0", "=s"()
3321 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
3322 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3326 define void @s_shuffle_v4f16_v2f16__1_u_u_u() {
3327 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__1_u_u_u:
3329 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3330 ; GFX900-NEXT: ;;#ASMSTART
3331 ; GFX900-NEXT: ; def s4
3332 ; GFX900-NEXT: ;;#ASMEND
3333 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
3334 ; GFX900-NEXT: ;;#ASMSTART
3335 ; GFX900-NEXT: ; use s[8:9]
3336 ; GFX900-NEXT: ;;#ASMEND
3337 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3339 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__1_u_u_u:
3341 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3342 ; GFX90A-NEXT: ;;#ASMSTART
3343 ; GFX90A-NEXT: ; def s4
3344 ; GFX90A-NEXT: ;;#ASMEND
3345 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
3346 ; GFX90A-NEXT: ;;#ASMSTART
3347 ; GFX90A-NEXT: ; use s[8:9]
3348 ; GFX90A-NEXT: ;;#ASMEND
3349 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3351 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_u_u_u:
3353 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3354 ; GFX940-NEXT: ;;#ASMSTART
3355 ; GFX940-NEXT: ; def s0
3356 ; GFX940-NEXT: ;;#ASMEND
3357 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
3358 ; GFX940-NEXT: ;;#ASMSTART
3359 ; GFX940-NEXT: ; use s[8:9]
3360 ; GFX940-NEXT: ;;#ASMEND
3361 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3362 %vec0 = call <2 x half> asm "; def $0", "=s"()
3363 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
3364 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3368 define void @s_shuffle_v4f16_v2f16__2_u_u_u() {
3369 ; GFX9-LABEL: s_shuffle_v4f16_v2f16__2_u_u_u:
3371 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3372 ; GFX9-NEXT: ;;#ASMSTART
3373 ; GFX9-NEXT: ; use s[8:9]
3374 ; GFX9-NEXT: ;;#ASMEND
3375 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3376 %vec0 = call <2 x half> asm "; def $0", "=s"()
3377 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
3378 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3382 define void @s_shuffle_v4f16_v2f16__3_u_u_u() {
3383 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_u_u_u:
3385 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3386 ; GFX900-NEXT: ;;#ASMSTART
3387 ; GFX900-NEXT: ; def s4
3388 ; GFX900-NEXT: ;;#ASMEND
3389 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
3390 ; GFX900-NEXT: ;;#ASMSTART
3391 ; GFX900-NEXT: ; use s[8:9]
3392 ; GFX900-NEXT: ;;#ASMEND
3393 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3395 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_u_u_u:
3397 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3398 ; GFX90A-NEXT: ;;#ASMSTART
3399 ; GFX90A-NEXT: ; def s4
3400 ; GFX90A-NEXT: ;;#ASMEND
3401 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
3402 ; GFX90A-NEXT: ;;#ASMSTART
3403 ; GFX90A-NEXT: ; use s[8:9]
3404 ; GFX90A-NEXT: ;;#ASMEND
3405 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3407 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_u_u:
3409 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3410 ; GFX940-NEXT: ;;#ASMSTART
3411 ; GFX940-NEXT: ; def s0
3412 ; GFX940-NEXT: ;;#ASMEND
3413 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
3414 ; GFX940-NEXT: ;;#ASMSTART
3415 ; GFX940-NEXT: ; use s[8:9]
3416 ; GFX940-NEXT: ;;#ASMEND
3417 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3418 %vec0 = call <2 x half> asm "; def $0", "=s"()
3419 %vec1 = call <2 x half> asm "; def $0", "=s"()
3420 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
3421 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3425 define void @s_shuffle_v4f16_v2f16__3_0_u_u() {
3426 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_0_u_u:
3428 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3429 ; GFX900-NEXT: ;;#ASMSTART
3430 ; GFX900-NEXT: ; def s5
3431 ; GFX900-NEXT: ;;#ASMEND
3432 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
3433 ; GFX900-NEXT: ;;#ASMSTART
3434 ; GFX900-NEXT: ; def s4
3435 ; GFX900-NEXT: ;;#ASMEND
3436 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3437 ; GFX900-NEXT: ;;#ASMSTART
3438 ; GFX900-NEXT: ; use s[8:9]
3439 ; GFX900-NEXT: ;;#ASMEND
3440 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3442 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_0_u_u:
3444 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3445 ; GFX90A-NEXT: ;;#ASMSTART
3446 ; GFX90A-NEXT: ; def s5
3447 ; GFX90A-NEXT: ;;#ASMEND
3448 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
3449 ; GFX90A-NEXT: ;;#ASMSTART
3450 ; GFX90A-NEXT: ; def s4
3451 ; GFX90A-NEXT: ;;#ASMEND
3452 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3453 ; GFX90A-NEXT: ;;#ASMSTART
3454 ; GFX90A-NEXT: ; use s[8:9]
3455 ; GFX90A-NEXT: ;;#ASMEND
3456 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3458 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_u_u:
3460 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3461 ; GFX940-NEXT: ;;#ASMSTART
3462 ; GFX940-NEXT: ; def s1
3463 ; GFX940-NEXT: ;;#ASMEND
3464 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
3465 ; GFX940-NEXT: ;;#ASMSTART
3466 ; GFX940-NEXT: ; def s0
3467 ; GFX940-NEXT: ;;#ASMEND
3468 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
3469 ; GFX940-NEXT: ;;#ASMSTART
3470 ; GFX940-NEXT: ; use s[8:9]
3471 ; GFX940-NEXT: ;;#ASMEND
3472 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3473 %vec0 = call <2 x half> asm "; def $0", "=s"()
3474 %vec1 = call <2 x half> asm "; def $0", "=s"()
3475 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 poison, i32 poison>
3476 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3480 define void @s_shuffle_v4f16_v2f16__3_1_u_u() {
3481 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_1_u_u:
3483 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3484 ; GFX900-NEXT: ;;#ASMSTART
3485 ; GFX900-NEXT: ; def s4
3486 ; GFX900-NEXT: ;;#ASMEND
3487 ; GFX900-NEXT: ;;#ASMSTART
3488 ; GFX900-NEXT: ; def s5
3489 ; GFX900-NEXT: ;;#ASMEND
3490 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
3491 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
3492 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3493 ; GFX900-NEXT: ;;#ASMSTART
3494 ; GFX900-NEXT: ; use s[8:9]
3495 ; GFX900-NEXT: ;;#ASMEND
3496 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3498 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_1_u_u:
3500 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3501 ; GFX90A-NEXT: ;;#ASMSTART
3502 ; GFX90A-NEXT: ; def s4
3503 ; GFX90A-NEXT: ;;#ASMEND
3504 ; GFX90A-NEXT: ;;#ASMSTART
3505 ; GFX90A-NEXT: ; def s5
3506 ; GFX90A-NEXT: ;;#ASMEND
3507 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
3508 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
3509 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3510 ; GFX90A-NEXT: ;;#ASMSTART
3511 ; GFX90A-NEXT: ; use s[8:9]
3512 ; GFX90A-NEXT: ;;#ASMEND
3513 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3515 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_u_u:
3517 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3518 ; GFX940-NEXT: ;;#ASMSTART
3519 ; GFX940-NEXT: ; def s0
3520 ; GFX940-NEXT: ;;#ASMEND
3521 ; GFX940-NEXT: ;;#ASMSTART
3522 ; GFX940-NEXT: ; def s1
3523 ; GFX940-NEXT: ;;#ASMEND
3524 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
3525 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
3526 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
3527 ; GFX940-NEXT: ;;#ASMSTART
3528 ; GFX940-NEXT: ; use s[8:9]
3529 ; GFX940-NEXT: ;;#ASMEND
3530 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3531 %vec0 = call <2 x half> asm "; def $0", "=s"()
3532 %vec1 = call <2 x half> asm "; def $0", "=s"()
3533 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 poison, i32 poison>
3534 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3538 define void @s_shuffle_v4f16_v2f16__3_2_u_u() {
3539 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_2_u_u:
3541 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3542 ; GFX900-NEXT: ;;#ASMSTART
3543 ; GFX900-NEXT: ; def s4
3544 ; GFX900-NEXT: ;;#ASMEND
3545 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
3546 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3547 ; GFX900-NEXT: ;;#ASMSTART
3548 ; GFX900-NEXT: ; use s[8:9]
3549 ; GFX900-NEXT: ;;#ASMEND
3550 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3552 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_2_u_u:
3554 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3555 ; GFX90A-NEXT: ;;#ASMSTART
3556 ; GFX90A-NEXT: ; def s4
3557 ; GFX90A-NEXT: ;;#ASMEND
3558 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
3559 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3560 ; GFX90A-NEXT: ;;#ASMSTART
3561 ; GFX90A-NEXT: ; use s[8:9]
3562 ; GFX90A-NEXT: ;;#ASMEND
3563 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3565 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_u_u:
3567 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3568 ; GFX940-NEXT: ;;#ASMSTART
3569 ; GFX940-NEXT: ; def s0
3570 ; GFX940-NEXT: ;;#ASMEND
3571 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
3572 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
3573 ; GFX940-NEXT: ;;#ASMSTART
3574 ; GFX940-NEXT: ; use s[8:9]
3575 ; GFX940-NEXT: ;;#ASMEND
3576 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3577 %vec0 = call <2 x half> asm "; def $0", "=s"()
3578 %vec1 = call <2 x half> asm "; def $0", "=s"()
3579 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 poison, i32 poison>
3580 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3584 define void @s_shuffle_v4f16_v2f16__3_3_u_u() {
3585 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_u_u:
3587 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3588 ; GFX900-NEXT: ;;#ASMSTART
3589 ; GFX900-NEXT: ; def s4
3590 ; GFX900-NEXT: ;;#ASMEND
3591 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
3592 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3593 ; GFX900-NEXT: ;;#ASMSTART
3594 ; GFX900-NEXT: ; use s[8:9]
3595 ; GFX900-NEXT: ;;#ASMEND
3596 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3598 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_u_u:
3600 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3601 ; GFX90A-NEXT: ;;#ASMSTART
3602 ; GFX90A-NEXT: ; def s4
3603 ; GFX90A-NEXT: ;;#ASMEND
3604 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
3605 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3606 ; GFX90A-NEXT: ;;#ASMSTART
3607 ; GFX90A-NEXT: ; use s[8:9]
3608 ; GFX90A-NEXT: ;;#ASMEND
3609 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3611 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_u:
3613 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3614 ; GFX940-NEXT: ;;#ASMSTART
3615 ; GFX940-NEXT: ; def s0
3616 ; GFX940-NEXT: ;;#ASMEND
3617 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
3618 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
3619 ; GFX940-NEXT: ;;#ASMSTART
3620 ; GFX940-NEXT: ; use s[8:9]
3621 ; GFX940-NEXT: ;;#ASMEND
3622 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3623 %vec0 = call <2 x half> asm "; def $0", "=s"()
3624 %vec1 = call <2 x half> asm "; def $0", "=s"()
3625 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 poison>
3626 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3630 define void @s_shuffle_v4f16_v2f16__3_3_0_u() {
3631 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_0_u:
3633 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3634 ; GFX900-NEXT: ;;#ASMSTART
3635 ; GFX900-NEXT: ; def s4
3636 ; GFX900-NEXT: ;;#ASMEND
3637 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
3638 ; GFX900-NEXT: ;;#ASMSTART
3639 ; GFX900-NEXT: ; def s9
3640 ; GFX900-NEXT: ;;#ASMEND
3641 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3642 ; GFX900-NEXT: ;;#ASMSTART
3643 ; GFX900-NEXT: ; use s[8:9]
3644 ; GFX900-NEXT: ;;#ASMEND
3645 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3647 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_0_u:
3649 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3650 ; GFX90A-NEXT: ;;#ASMSTART
3651 ; GFX90A-NEXT: ; def s4
3652 ; GFX90A-NEXT: ;;#ASMEND
3653 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
3654 ; GFX90A-NEXT: ;;#ASMSTART
3655 ; GFX90A-NEXT: ; def s9
3656 ; GFX90A-NEXT: ;;#ASMEND
3657 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3658 ; GFX90A-NEXT: ;;#ASMSTART
3659 ; GFX90A-NEXT: ; use s[8:9]
3660 ; GFX90A-NEXT: ;;#ASMEND
3661 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3663 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_u:
3665 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3666 ; GFX940-NEXT: ;;#ASMSTART
3667 ; GFX940-NEXT: ; def s0
3668 ; GFX940-NEXT: ;;#ASMEND
3669 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
3670 ; GFX940-NEXT: ;;#ASMSTART
3671 ; GFX940-NEXT: ; def s9
3672 ; GFX940-NEXT: ;;#ASMEND
3673 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
3674 ; GFX940-NEXT: ;;#ASMSTART
3675 ; GFX940-NEXT: ; use s[8:9]
3676 ; GFX940-NEXT: ;;#ASMEND
3677 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3678 %vec0 = call <2 x half> asm "; def $0", "=s"()
3679 %vec1 = call <2 x half> asm "; def $0", "=s"()
3680 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 poison>
3681 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3685 define void @s_shuffle_v4f16_v2f16__3_3_1_u() {
3686 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_1_u:
3688 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3689 ; GFX900-NEXT: ;;#ASMSTART
3690 ; GFX900-NEXT: ; def s4
3691 ; GFX900-NEXT: ;;#ASMEND
3692 ; GFX900-NEXT: ;;#ASMSTART
3693 ; GFX900-NEXT: ; def s5
3694 ; GFX900-NEXT: ;;#ASMEND
3695 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
3696 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
3697 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3698 ; GFX900-NEXT: ;;#ASMSTART
3699 ; GFX900-NEXT: ; use s[8:9]
3700 ; GFX900-NEXT: ;;#ASMEND
3701 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3703 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_1_u:
3705 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3706 ; GFX90A-NEXT: ;;#ASMSTART
3707 ; GFX90A-NEXT: ; def s4
3708 ; GFX90A-NEXT: ;;#ASMEND
3709 ; GFX90A-NEXT: ;;#ASMSTART
3710 ; GFX90A-NEXT: ; def s5
3711 ; GFX90A-NEXT: ;;#ASMEND
3712 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
3713 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
3714 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3715 ; GFX90A-NEXT: ;;#ASMSTART
3716 ; GFX90A-NEXT: ; use s[8:9]
3717 ; GFX90A-NEXT: ;;#ASMEND
3718 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3720 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_u:
3722 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3723 ; GFX940-NEXT: ;;#ASMSTART
3724 ; GFX940-NEXT: ; def s0
3725 ; GFX940-NEXT: ;;#ASMEND
3726 ; GFX940-NEXT: ;;#ASMSTART
3727 ; GFX940-NEXT: ; def s1
3728 ; GFX940-NEXT: ;;#ASMEND
3729 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
3730 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
3731 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
3732 ; GFX940-NEXT: ;;#ASMSTART
3733 ; GFX940-NEXT: ; use s[8:9]
3734 ; GFX940-NEXT: ;;#ASMEND
3735 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3736 %vec0 = call <2 x half> asm "; def $0", "=s"()
3737 %vec1 = call <2 x half> asm "; def $0", "=s"()
3738 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 poison>
3739 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3743 define void @s_shuffle_v4f16_v2f16__3_3_2_u() {
3744 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_2_u:
3746 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3747 ; GFX900-NEXT: ;;#ASMSTART
3748 ; GFX900-NEXT: ; def s9
3749 ; GFX900-NEXT: ;;#ASMEND
3750 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
3751 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3752 ; GFX900-NEXT: ;;#ASMSTART
3753 ; GFX900-NEXT: ; use s[8:9]
3754 ; GFX900-NEXT: ;;#ASMEND
3755 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3757 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_2_u:
3759 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3760 ; GFX90A-NEXT: ;;#ASMSTART
3761 ; GFX90A-NEXT: ; def s9
3762 ; GFX90A-NEXT: ;;#ASMEND
3763 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
3764 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
3765 ; GFX90A-NEXT: ;;#ASMSTART
3766 ; GFX90A-NEXT: ; use s[8:9]
3767 ; GFX90A-NEXT: ;;#ASMEND
3768 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3770 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_u:
3772 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3773 ; GFX940-NEXT: ;;#ASMSTART
3774 ; GFX940-NEXT: ; def s9
3775 ; GFX940-NEXT: ;;#ASMEND
3776 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
3777 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
3778 ; GFX940-NEXT: ;;#ASMSTART
3779 ; GFX940-NEXT: ; use s[8:9]
3780 ; GFX940-NEXT: ;;#ASMEND
3781 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3782 %vec0 = call <2 x half> asm "; def $0", "=s"()
3783 %vec1 = call <2 x half> asm "; def $0", "=s"()
3784 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 poison>
3785 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3789 define void @s_shuffle_v4f16_v2f16__3_3_3_u() {
3790 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_3_u:
3792 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3793 ; GFX900-NEXT: ;;#ASMSTART
3794 ; GFX900-NEXT: ; def s4
3795 ; GFX900-NEXT: ;;#ASMEND
3796 ; GFX900-NEXT: s_lshr_b32 s9, s4, 16
3797 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9
3798 ; GFX900-NEXT: ;;#ASMSTART
3799 ; GFX900-NEXT: ; use s[8:9]
3800 ; GFX900-NEXT: ;;#ASMEND
3801 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3803 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_3_u:
3805 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3806 ; GFX90A-NEXT: ;;#ASMSTART
3807 ; GFX90A-NEXT: ; def s4
3808 ; GFX90A-NEXT: ;;#ASMEND
3809 ; GFX90A-NEXT: s_lshr_b32 s9, s4, 16
3810 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9
3811 ; GFX90A-NEXT: ;;#ASMSTART
3812 ; GFX90A-NEXT: ; use s[8:9]
3813 ; GFX90A-NEXT: ;;#ASMEND
3814 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3816 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_u:
3818 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3819 ; GFX940-NEXT: ;;#ASMSTART
3820 ; GFX940-NEXT: ; def s0
3821 ; GFX940-NEXT: ;;#ASMEND
3822 ; GFX940-NEXT: s_lshr_b32 s9, s0, 16
3823 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9
3824 ; GFX940-NEXT: ;;#ASMSTART
3825 ; GFX940-NEXT: ; use s[8:9]
3826 ; GFX940-NEXT: ;;#ASMEND
3827 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3828 %vec0 = call <2 x half> asm "; def $0", "=s"()
3829 %vec1 = call <2 x half> asm "; def $0", "=s"()
3830 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
3831 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3835 define void @s_shuffle_v4f16_v2f16__3_3_3_0() {
3836 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_3_0:
3838 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3839 ; GFX900-NEXT: ;;#ASMSTART
3840 ; GFX900-NEXT: ; def s5
3841 ; GFX900-NEXT: ;;#ASMEND
3842 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
3843 ; GFX900-NEXT: ;;#ASMSTART
3844 ; GFX900-NEXT: ; def s4
3845 ; GFX900-NEXT: ;;#ASMEND
3846 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4
3847 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
3848 ; GFX900-NEXT: ;;#ASMSTART
3849 ; GFX900-NEXT: ; use s[8:9]
3850 ; GFX900-NEXT: ;;#ASMEND
3851 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3853 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_3_0:
3855 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3856 ; GFX90A-NEXT: ;;#ASMSTART
3857 ; GFX90A-NEXT: ; def s5
3858 ; GFX90A-NEXT: ;;#ASMEND
3859 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
3860 ; GFX90A-NEXT: ;;#ASMSTART
3861 ; GFX90A-NEXT: ; def s4
3862 ; GFX90A-NEXT: ;;#ASMEND
3863 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4
3864 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
3865 ; GFX90A-NEXT: ;;#ASMSTART
3866 ; GFX90A-NEXT: ; use s[8:9]
3867 ; GFX90A-NEXT: ;;#ASMEND
3868 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3870 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_0:
3872 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3873 ; GFX940-NEXT: ;;#ASMSTART
3874 ; GFX940-NEXT: ; def s1
3875 ; GFX940-NEXT: ;;#ASMEND
3876 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
3877 ; GFX940-NEXT: ;;#ASMSTART
3878 ; GFX940-NEXT: ; def s0
3879 ; GFX940-NEXT: ;;#ASMEND
3880 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0
3881 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
3882 ; GFX940-NEXT: ;;#ASMSTART
3883 ; GFX940-NEXT: ; use s[8:9]
3884 ; GFX940-NEXT: ;;#ASMEND
3885 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3886 %vec0 = call <2 x half> asm "; def $0", "=s"()
3887 %vec1 = call <2 x half> asm "; def $0", "=s"()
3888 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
3889 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3893 define void @s_shuffle_v4f16_v2f16__3_3_3_1() {
3894 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_3_1:
3896 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3897 ; GFX900-NEXT: ;;#ASMSTART
3898 ; GFX900-NEXT: ; def s4
3899 ; GFX900-NEXT: ;;#ASMEND
3900 ; GFX900-NEXT: ;;#ASMSTART
3901 ; GFX900-NEXT: ; def s5
3902 ; GFX900-NEXT: ;;#ASMEND
3903 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
3904 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
3905 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4
3906 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
3907 ; GFX900-NEXT: ;;#ASMSTART
3908 ; GFX900-NEXT: ; use s[8:9]
3909 ; GFX900-NEXT: ;;#ASMEND
3910 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3912 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_3_1:
3914 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3915 ; GFX90A-NEXT: ;;#ASMSTART
3916 ; GFX90A-NEXT: ; def s4
3917 ; GFX90A-NEXT: ;;#ASMEND
3918 ; GFX90A-NEXT: ;;#ASMSTART
3919 ; GFX90A-NEXT: ; def s5
3920 ; GFX90A-NEXT: ;;#ASMEND
3921 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
3922 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
3923 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4
3924 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
3925 ; GFX90A-NEXT: ;;#ASMSTART
3926 ; GFX90A-NEXT: ; use s[8:9]
3927 ; GFX90A-NEXT: ;;#ASMEND
3928 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3930 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_1:
3932 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3933 ; GFX940-NEXT: ;;#ASMSTART
3934 ; GFX940-NEXT: ; def s0
3935 ; GFX940-NEXT: ;;#ASMEND
3936 ; GFX940-NEXT: ;;#ASMSTART
3937 ; GFX940-NEXT: ; def s1
3938 ; GFX940-NEXT: ;;#ASMEND
3939 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
3940 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
3941 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0
3942 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
3943 ; GFX940-NEXT: ;;#ASMSTART
3944 ; GFX940-NEXT: ; use s[8:9]
3945 ; GFX940-NEXT: ;;#ASMEND
3946 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3947 %vec0 = call <2 x half> asm "; def $0", "=s"()
3948 %vec1 = call <2 x half> asm "; def $0", "=s"()
3949 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
3950 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
3954 define void @s_shuffle_v4f16_v2f16__3_3_3_2() {
3955 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_3_2:
3957 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3958 ; GFX900-NEXT: ;;#ASMSTART
3959 ; GFX900-NEXT: ; def s4
3960 ; GFX900-NEXT: ;;#ASMEND
3961 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
3962 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4
3963 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
3964 ; GFX900-NEXT: ;;#ASMSTART
3965 ; GFX900-NEXT: ; use s[8:9]
3966 ; GFX900-NEXT: ;;#ASMEND
3967 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3969 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_3_2:
3971 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3972 ; GFX90A-NEXT: ;;#ASMSTART
3973 ; GFX90A-NEXT: ; def s4
3974 ; GFX90A-NEXT: ;;#ASMEND
3975 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
3976 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4
3977 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
3978 ; GFX90A-NEXT: ;;#ASMSTART
3979 ; GFX90A-NEXT: ; use s[8:9]
3980 ; GFX90A-NEXT: ;;#ASMEND
3981 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3983 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_2:
3985 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3986 ; GFX940-NEXT: ;;#ASMSTART
3987 ; GFX940-NEXT: ; def s0
3988 ; GFX940-NEXT: ;;#ASMEND
3989 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
3990 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0
3991 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
3992 ; GFX940-NEXT: ;;#ASMSTART
3993 ; GFX940-NEXT: ; use s[8:9]
3994 ; GFX940-NEXT: ;;#ASMEND
3995 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3996 %vec0 = call <2 x half> asm "; def $0", "=s"()
3997 %vec1 = call <2 x half> asm "; def $0", "=s"()
3998 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
3999 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4003 define void @s_shuffle_v4f16_v2f16__3_3_3_3() {
4004 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_3_3:
4006 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4007 ; GFX900-NEXT: ;;#ASMSTART
4008 ; GFX900-NEXT: ; def s4
4009 ; GFX900-NEXT: ;;#ASMEND
4010 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
4011 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4012 ; GFX900-NEXT: s_mov_b32 s9, s8
4013 ; GFX900-NEXT: ;;#ASMSTART
4014 ; GFX900-NEXT: ; use s[8:9]
4015 ; GFX900-NEXT: ;;#ASMEND
4016 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4018 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_3_3:
4020 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4021 ; GFX90A-NEXT: ;;#ASMSTART
4022 ; GFX90A-NEXT: ; def s4
4023 ; GFX90A-NEXT: ;;#ASMEND
4024 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
4025 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4026 ; GFX90A-NEXT: s_mov_b32 s9, s8
4027 ; GFX90A-NEXT: ;;#ASMSTART
4028 ; GFX90A-NEXT: ; use s[8:9]
4029 ; GFX90A-NEXT: ;;#ASMEND
4030 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4032 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_3:
4034 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4035 ; GFX940-NEXT: ;;#ASMSTART
4036 ; GFX940-NEXT: ; def s0
4037 ; GFX940-NEXT: ;;#ASMEND
4038 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
4039 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
4040 ; GFX940-NEXT: s_mov_b32 s9, s8
4041 ; GFX940-NEXT: ;;#ASMSTART
4042 ; GFX940-NEXT: ; use s[8:9]
4043 ; GFX940-NEXT: ;;#ASMEND
4044 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4045 %vec0 = call <2 x half> asm "; def $0", "=s"()
4046 %vec1 = call <2 x half> asm "; def $0", "=s"()
4047 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4048 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4052 define void @s_shuffle_v4f16_v2f16__u_0_0_0() {
4053 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__u_0_0_0:
4055 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4056 ; GFX900-NEXT: ;;#ASMSTART
4057 ; GFX900-NEXT: ; def s4
4058 ; GFX900-NEXT: ;;#ASMEND
4059 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4060 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
4061 ; GFX900-NEXT: ;;#ASMSTART
4062 ; GFX900-NEXT: ; use s[8:9]
4063 ; GFX900-NEXT: ;;#ASMEND
4064 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4066 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__u_0_0_0:
4068 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4069 ; GFX90A-NEXT: ;;#ASMSTART
4070 ; GFX90A-NEXT: ; def s4
4071 ; GFX90A-NEXT: ;;#ASMEND
4072 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4073 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
4074 ; GFX90A-NEXT: ;;#ASMSTART
4075 ; GFX90A-NEXT: ; use s[8:9]
4076 ; GFX90A-NEXT: ;;#ASMEND
4077 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4079 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__u_0_0_0:
4081 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4082 ; GFX940-NEXT: ;;#ASMSTART
4083 ; GFX940-NEXT: ; def s0
4084 ; GFX940-NEXT: ;;#ASMEND
4085 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4086 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
4087 ; GFX940-NEXT: ;;#ASMSTART
4088 ; GFX940-NEXT: ; use s[8:9]
4089 ; GFX940-NEXT: ;;#ASMEND
4090 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4091 %vec0 = call <2 x half> asm "; def $0", "=s"()
4092 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
4093 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4097 define void @s_shuffle_v4f16_v2f16__0_0_0_0() {
4098 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__0_0_0_0:
4100 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4101 ; GFX900-NEXT: ;;#ASMSTART
4102 ; GFX900-NEXT: ; def s4
4103 ; GFX900-NEXT: ;;#ASMEND
4104 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4105 ; GFX900-NEXT: s_mov_b32 s9, s8
4106 ; GFX900-NEXT: ;;#ASMSTART
4107 ; GFX900-NEXT: ; use s[8:9]
4108 ; GFX900-NEXT: ;;#ASMEND
4109 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4111 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__0_0_0_0:
4113 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4114 ; GFX90A-NEXT: ;;#ASMSTART
4115 ; GFX90A-NEXT: ; def s4
4116 ; GFX90A-NEXT: ;;#ASMEND
4117 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4118 ; GFX90A-NEXT: s_mov_b32 s9, s8
4119 ; GFX90A-NEXT: ;;#ASMSTART
4120 ; GFX90A-NEXT: ; use s[8:9]
4121 ; GFX90A-NEXT: ;;#ASMEND
4122 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4124 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_0_0_0:
4126 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4127 ; GFX940-NEXT: ;;#ASMSTART
4128 ; GFX940-NEXT: ; def s0
4129 ; GFX940-NEXT: ;;#ASMEND
4130 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
4131 ; GFX940-NEXT: s_mov_b32 s9, s8
4132 ; GFX940-NEXT: ;;#ASMSTART
4133 ; GFX940-NEXT: ; use s[8:9]
4134 ; GFX940-NEXT: ;;#ASMEND
4135 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4136 %vec0 = call <2 x half> asm "; def $0", "=s"()
4137 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> zeroinitializer
4138 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4142 define void @s_shuffle_v4f16_v2f16__1_0_0_0() {
4143 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__1_0_0_0:
4145 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4146 ; GFX900-NEXT: ;;#ASMSTART
4147 ; GFX900-NEXT: ; def s4
4148 ; GFX900-NEXT: ;;#ASMEND
4149 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
4150 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4151 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4152 ; GFX900-NEXT: ;;#ASMSTART
4153 ; GFX900-NEXT: ; use s[8:9]
4154 ; GFX900-NEXT: ;;#ASMEND
4155 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4157 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__1_0_0_0:
4159 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4160 ; GFX90A-NEXT: ;;#ASMSTART
4161 ; GFX90A-NEXT: ; def s4
4162 ; GFX90A-NEXT: ;;#ASMEND
4163 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
4164 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4165 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4166 ; GFX90A-NEXT: ;;#ASMSTART
4167 ; GFX90A-NEXT: ; use s[8:9]
4168 ; GFX90A-NEXT: ;;#ASMEND
4169 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4171 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_0_0_0:
4173 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4174 ; GFX940-NEXT: ;;#ASMSTART
4175 ; GFX940-NEXT: ; def s0
4176 ; GFX940-NEXT: ;;#ASMEND
4177 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
4178 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4179 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4180 ; GFX940-NEXT: ;;#ASMSTART
4181 ; GFX940-NEXT: ; use s[8:9]
4182 ; GFX940-NEXT: ;;#ASMEND
4183 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4184 %vec0 = call <2 x half> asm "; def $0", "=s"()
4185 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
4186 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4190 define void @s_shuffle_v4f16_v2f16__2_0_0_0() {
4191 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__2_0_0_0:
4193 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4194 ; GFX900-NEXT: ;;#ASMSTART
4195 ; GFX900-NEXT: ; def s4
4196 ; GFX900-NEXT: ;;#ASMEND
4197 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4198 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
4199 ; GFX900-NEXT: ;;#ASMSTART
4200 ; GFX900-NEXT: ; use s[8:9]
4201 ; GFX900-NEXT: ;;#ASMEND
4202 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4204 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__2_0_0_0:
4206 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4207 ; GFX90A-NEXT: ;;#ASMSTART
4208 ; GFX90A-NEXT: ; def s4
4209 ; GFX90A-NEXT: ;;#ASMEND
4210 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4211 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
4212 ; GFX90A-NEXT: ;;#ASMSTART
4213 ; GFX90A-NEXT: ; use s[8:9]
4214 ; GFX90A-NEXT: ;;#ASMEND
4215 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4217 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__2_0_0_0:
4219 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4220 ; GFX940-NEXT: ;;#ASMSTART
4221 ; GFX940-NEXT: ; def s0
4222 ; GFX940-NEXT: ;;#ASMEND
4223 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4224 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
4225 ; GFX940-NEXT: ;;#ASMSTART
4226 ; GFX940-NEXT: ; use s[8:9]
4227 ; GFX940-NEXT: ;;#ASMEND
4228 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4229 %vec0 = call <2 x half> asm "; def $0", "=s"()
4230 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
4231 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4235 define void @s_shuffle_v4f16_v2f16__3_0_0_0() {
4236 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_0_0_0:
4238 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4239 ; GFX900-NEXT: ;;#ASMSTART
4240 ; GFX900-NEXT: ; def s5
4241 ; GFX900-NEXT: ;;#ASMEND
4242 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
4243 ; GFX900-NEXT: ;;#ASMSTART
4244 ; GFX900-NEXT: ; def s4
4245 ; GFX900-NEXT: ;;#ASMEND
4246 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4247 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4248 ; GFX900-NEXT: ;;#ASMSTART
4249 ; GFX900-NEXT: ; use s[8:9]
4250 ; GFX900-NEXT: ;;#ASMEND
4251 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4253 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_0_0_0:
4255 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4256 ; GFX90A-NEXT: ;;#ASMSTART
4257 ; GFX90A-NEXT: ; def s5
4258 ; GFX90A-NEXT: ;;#ASMEND
4259 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
4260 ; GFX90A-NEXT: ;;#ASMSTART
4261 ; GFX90A-NEXT: ; def s4
4262 ; GFX90A-NEXT: ;;#ASMEND
4263 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4264 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4265 ; GFX90A-NEXT: ;;#ASMSTART
4266 ; GFX90A-NEXT: ; use s[8:9]
4267 ; GFX90A-NEXT: ;;#ASMEND
4268 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4270 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_0_0:
4272 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4273 ; GFX940-NEXT: ;;#ASMSTART
4274 ; GFX940-NEXT: ; def s1
4275 ; GFX940-NEXT: ;;#ASMEND
4276 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
4277 ; GFX940-NEXT: ;;#ASMSTART
4278 ; GFX940-NEXT: ; def s0
4279 ; GFX940-NEXT: ;;#ASMEND
4280 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4281 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4282 ; GFX940-NEXT: ;;#ASMSTART
4283 ; GFX940-NEXT: ; use s[8:9]
4284 ; GFX940-NEXT: ;;#ASMEND
4285 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4286 %vec0 = call <2 x half> asm "; def $0", "=s"()
4287 %vec1 = call <2 x half> asm "; def $0", "=s"()
4288 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
4289 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4293 define void @s_shuffle_v4f16_v2f16__3_u_0_0() {
4294 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_u_0_0:
4296 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4297 ; GFX900-NEXT: ;;#ASMSTART
4298 ; GFX900-NEXT: ; def s4
4299 ; GFX900-NEXT: ;;#ASMEND
4300 ; GFX900-NEXT: ;;#ASMSTART
4301 ; GFX900-NEXT: ; def s5
4302 ; GFX900-NEXT: ;;#ASMEND
4303 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
4304 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4305 ; GFX900-NEXT: ;;#ASMSTART
4306 ; GFX900-NEXT: ; use s[8:9]
4307 ; GFX900-NEXT: ;;#ASMEND
4308 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4310 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_u_0_0:
4312 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4313 ; GFX90A-NEXT: ;;#ASMSTART
4314 ; GFX90A-NEXT: ; def s4
4315 ; GFX90A-NEXT: ;;#ASMEND
4316 ; GFX90A-NEXT: ;;#ASMSTART
4317 ; GFX90A-NEXT: ; def s5
4318 ; GFX90A-NEXT: ;;#ASMEND
4319 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
4320 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4321 ; GFX90A-NEXT: ;;#ASMSTART
4322 ; GFX90A-NEXT: ; use s[8:9]
4323 ; GFX90A-NEXT: ;;#ASMEND
4324 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4326 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_0_0:
4328 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4329 ; GFX940-NEXT: ;;#ASMSTART
4330 ; GFX940-NEXT: ; def s0
4331 ; GFX940-NEXT: ;;#ASMEND
4332 ; GFX940-NEXT: ;;#ASMSTART
4333 ; GFX940-NEXT: ; def s1
4334 ; GFX940-NEXT: ;;#ASMEND
4335 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
4336 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4337 ; GFX940-NEXT: ;;#ASMSTART
4338 ; GFX940-NEXT: ; use s[8:9]
4339 ; GFX940-NEXT: ;;#ASMEND
4340 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4341 %vec0 = call <2 x half> asm "; def $0", "=s"()
4342 %vec1 = call <2 x half> asm "; def $0", "=s"()
4343 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 0, i32 0>
4344 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4348 define void @s_shuffle_v4f16_v2f16__3_1_0_0() {
4349 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_1_0_0:
4351 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4352 ; GFX900-NEXT: ;;#ASMSTART
4353 ; GFX900-NEXT: ; def s5
4354 ; GFX900-NEXT: ;;#ASMEND
4355 ; GFX900-NEXT: ;;#ASMSTART
4356 ; GFX900-NEXT: ; def s4
4357 ; GFX900-NEXT: ;;#ASMEND
4358 ; GFX900-NEXT: s_lshr_b32 s6, s4, 16
4359 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
4360 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6
4361 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4362 ; GFX900-NEXT: ;;#ASMSTART
4363 ; GFX900-NEXT: ; use s[8:9]
4364 ; GFX900-NEXT: ;;#ASMEND
4365 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4367 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_1_0_0:
4369 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4370 ; GFX90A-NEXT: ;;#ASMSTART
4371 ; GFX90A-NEXT: ; def s5
4372 ; GFX90A-NEXT: ;;#ASMEND
4373 ; GFX90A-NEXT: ;;#ASMSTART
4374 ; GFX90A-NEXT: ; def s4
4375 ; GFX90A-NEXT: ;;#ASMEND
4376 ; GFX90A-NEXT: s_lshr_b32 s6, s4, 16
4377 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
4378 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6
4379 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4380 ; GFX90A-NEXT: ;;#ASMSTART
4381 ; GFX90A-NEXT: ; use s[8:9]
4382 ; GFX90A-NEXT: ;;#ASMEND
4383 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4385 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_0_0:
4387 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4388 ; GFX940-NEXT: ;;#ASMSTART
4389 ; GFX940-NEXT: ; def s1
4390 ; GFX940-NEXT: ;;#ASMEND
4391 ; GFX940-NEXT: ;;#ASMSTART
4392 ; GFX940-NEXT: ; def s0
4393 ; GFX940-NEXT: ;;#ASMEND
4394 ; GFX940-NEXT: s_lshr_b32 s2, s0, 16
4395 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
4396 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2
4397 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4398 ; GFX940-NEXT: ;;#ASMSTART
4399 ; GFX940-NEXT: ; use s[8:9]
4400 ; GFX940-NEXT: ;;#ASMEND
4401 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4402 %vec0 = call <2 x half> asm "; def $0", "=s"()
4403 %vec1 = call <2 x half> asm "; def $0", "=s"()
4404 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 0, i32 0>
4405 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4409 define void @s_shuffle_v4f16_v2f16__3_2_0_0() {
4410 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_2_0_0:
4412 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4413 ; GFX900-NEXT: ;;#ASMSTART
4414 ; GFX900-NEXT: ; def s5
4415 ; GFX900-NEXT: ;;#ASMEND
4416 ; GFX900-NEXT: s_lshr_b32 s6, s5, 16
4417 ; GFX900-NEXT: ;;#ASMSTART
4418 ; GFX900-NEXT: ; def s4
4419 ; GFX900-NEXT: ;;#ASMEND
4420 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
4421 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4422 ; GFX900-NEXT: ;;#ASMSTART
4423 ; GFX900-NEXT: ; use s[8:9]
4424 ; GFX900-NEXT: ;;#ASMEND
4425 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4427 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_2_0_0:
4429 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4430 ; GFX90A-NEXT: ;;#ASMSTART
4431 ; GFX90A-NEXT: ; def s5
4432 ; GFX90A-NEXT: ;;#ASMEND
4433 ; GFX90A-NEXT: s_lshr_b32 s6, s5, 16
4434 ; GFX90A-NEXT: ;;#ASMSTART
4435 ; GFX90A-NEXT: ; def s4
4436 ; GFX90A-NEXT: ;;#ASMEND
4437 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
4438 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4439 ; GFX90A-NEXT: ;;#ASMSTART
4440 ; GFX90A-NEXT: ; use s[8:9]
4441 ; GFX90A-NEXT: ;;#ASMEND
4442 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4444 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_0_0:
4446 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4447 ; GFX940-NEXT: ;;#ASMSTART
4448 ; GFX940-NEXT: ; def s1
4449 ; GFX940-NEXT: ;;#ASMEND
4450 ; GFX940-NEXT: s_lshr_b32 s2, s1, 16
4451 ; GFX940-NEXT: ;;#ASMSTART
4452 ; GFX940-NEXT: ; def s0
4453 ; GFX940-NEXT: ;;#ASMEND
4454 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
4455 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4456 ; GFX940-NEXT: ;;#ASMSTART
4457 ; GFX940-NEXT: ; use s[8:9]
4458 ; GFX940-NEXT: ;;#ASMEND
4459 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4460 %vec0 = call <2 x half> asm "; def $0", "=s"()
4461 %vec1 = call <2 x half> asm "; def $0", "=s"()
4462 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 0, i32 0>
4463 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4467 define void @s_shuffle_v4f16_v2f16__3_3_0_0() {
4468 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_0_0:
4470 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4471 ; GFX900-NEXT: ;;#ASMSTART
4472 ; GFX900-NEXT: ; def s5
4473 ; GFX900-NEXT: ;;#ASMEND
4474 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
4475 ; GFX900-NEXT: ;;#ASMSTART
4476 ; GFX900-NEXT: ; def s4
4477 ; GFX900-NEXT: ;;#ASMEND
4478 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
4479 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4480 ; GFX900-NEXT: ;;#ASMSTART
4481 ; GFX900-NEXT: ; use s[8:9]
4482 ; GFX900-NEXT: ;;#ASMEND
4483 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4485 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_0_0:
4487 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4488 ; GFX90A-NEXT: ;;#ASMSTART
4489 ; GFX90A-NEXT: ; def s5
4490 ; GFX90A-NEXT: ;;#ASMEND
4491 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
4492 ; GFX90A-NEXT: ;;#ASMSTART
4493 ; GFX90A-NEXT: ; def s4
4494 ; GFX90A-NEXT: ;;#ASMEND
4495 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
4496 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4497 ; GFX90A-NEXT: ;;#ASMSTART
4498 ; GFX90A-NEXT: ; use s[8:9]
4499 ; GFX90A-NEXT: ;;#ASMEND
4500 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4502 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_0:
4504 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4505 ; GFX940-NEXT: ;;#ASMSTART
4506 ; GFX940-NEXT: ; def s1
4507 ; GFX940-NEXT: ;;#ASMEND
4508 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
4509 ; GFX940-NEXT: ;;#ASMSTART
4510 ; GFX940-NEXT: ; def s0
4511 ; GFX940-NEXT: ;;#ASMEND
4512 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
4513 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4514 ; GFX940-NEXT: ;;#ASMSTART
4515 ; GFX940-NEXT: ; use s[8:9]
4516 ; GFX940-NEXT: ;;#ASMEND
4517 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4518 %vec0 = call <2 x half> asm "; def $0", "=s"()
4519 %vec1 = call <2 x half> asm "; def $0", "=s"()
4520 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
4521 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4525 define void @s_shuffle_v4f16_v2f16__3_3_u_0() {
4526 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_u_0:
4528 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4529 ; GFX900-NEXT: ;;#ASMSTART
4530 ; GFX900-NEXT: ; def s5
4531 ; GFX900-NEXT: ;;#ASMEND
4532 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
4533 ; GFX900-NEXT: ;;#ASMSTART
4534 ; GFX900-NEXT: ; def s4
4535 ; GFX900-NEXT: ;;#ASMEND
4536 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
4537 ; GFX900-NEXT: s_lshl_b32 s9, s4, 16
4538 ; GFX900-NEXT: ;;#ASMSTART
4539 ; GFX900-NEXT: ; use s[8:9]
4540 ; GFX900-NEXT: ;;#ASMEND
4541 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4543 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_u_0:
4545 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4546 ; GFX90A-NEXT: ;;#ASMSTART
4547 ; GFX90A-NEXT: ; def s5
4548 ; GFX90A-NEXT: ;;#ASMEND
4549 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
4550 ; GFX90A-NEXT: ;;#ASMSTART
4551 ; GFX90A-NEXT: ; def s4
4552 ; GFX90A-NEXT: ;;#ASMEND
4553 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
4554 ; GFX90A-NEXT: s_lshl_b32 s9, s4, 16
4555 ; GFX90A-NEXT: ;;#ASMSTART
4556 ; GFX90A-NEXT: ; use s[8:9]
4557 ; GFX90A-NEXT: ;;#ASMEND
4558 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4560 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_0:
4562 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4563 ; GFX940-NEXT: ;;#ASMSTART
4564 ; GFX940-NEXT: ; def s1
4565 ; GFX940-NEXT: ;;#ASMEND
4566 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
4567 ; GFX940-NEXT: ;;#ASMSTART
4568 ; GFX940-NEXT: ; def s0
4569 ; GFX940-NEXT: ;;#ASMEND
4570 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
4571 ; GFX940-NEXT: s_lshl_b32 s9, s0, 16
4572 ; GFX940-NEXT: ;;#ASMSTART
4573 ; GFX940-NEXT: ; use s[8:9]
4574 ; GFX940-NEXT: ;;#ASMEND
4575 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4576 %vec0 = call <2 x half> asm "; def $0", "=s"()
4577 %vec1 = call <2 x half> asm "; def $0", "=s"()
4578 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 0>
4579 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4583 define void @s_shuffle_v4f16_v2f16__3_3_1_0() {
4584 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_1_0:
4586 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4587 ; GFX900-NEXT: ;;#ASMSTART
4588 ; GFX900-NEXT: ; def s4
4589 ; GFX900-NEXT: ;;#ASMEND
4590 ; GFX900-NEXT: s_lshr_b32 s6, s4, 16
4591 ; GFX900-NEXT: ;;#ASMSTART
4592 ; GFX900-NEXT: ; def s5
4593 ; GFX900-NEXT: ;;#ASMEND
4594 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s4
4595 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
4596 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4597 ; GFX900-NEXT: ;;#ASMSTART
4598 ; GFX900-NEXT: ; use s[8:9]
4599 ; GFX900-NEXT: ;;#ASMEND
4600 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4602 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_1_0:
4604 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4605 ; GFX90A-NEXT: ;;#ASMSTART
4606 ; GFX90A-NEXT: ; def s4
4607 ; GFX90A-NEXT: ;;#ASMEND
4608 ; GFX90A-NEXT: s_lshr_b32 s6, s4, 16
4609 ; GFX90A-NEXT: ;;#ASMSTART
4610 ; GFX90A-NEXT: ; def s5
4611 ; GFX90A-NEXT: ;;#ASMEND
4612 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s4
4613 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
4614 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4615 ; GFX90A-NEXT: ;;#ASMSTART
4616 ; GFX90A-NEXT: ; use s[8:9]
4617 ; GFX90A-NEXT: ;;#ASMEND
4618 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4620 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_0:
4622 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4623 ; GFX940-NEXT: ;;#ASMSTART
4624 ; GFX940-NEXT: ; def s0
4625 ; GFX940-NEXT: ;;#ASMEND
4626 ; GFX940-NEXT: s_lshr_b32 s2, s0, 16
4627 ; GFX940-NEXT: ;;#ASMSTART
4628 ; GFX940-NEXT: ; def s1
4629 ; GFX940-NEXT: ;;#ASMEND
4630 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0
4631 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
4632 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
4633 ; GFX940-NEXT: ;;#ASMSTART
4634 ; GFX940-NEXT: ; use s[8:9]
4635 ; GFX940-NEXT: ;;#ASMEND
4636 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4637 %vec0 = call <2 x half> asm "; def $0", "=s"()
4638 %vec1 = call <2 x half> asm "; def $0", "=s"()
4639 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
4640 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4644 define void @s_shuffle_v4f16_v2f16__3_3_2_0() {
4645 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_2_0:
4647 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4648 ; GFX900-NEXT: ;;#ASMSTART
4649 ; GFX900-NEXT: ; def s4
4650 ; GFX900-NEXT: ;;#ASMEND
4651 ; GFX900-NEXT: ;;#ASMSTART
4652 ; GFX900-NEXT: ; def s5
4653 ; GFX900-NEXT: ;;#ASMEND
4654 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4
4655 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
4656 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4657 ; GFX900-NEXT: ;;#ASMSTART
4658 ; GFX900-NEXT: ; use s[8:9]
4659 ; GFX900-NEXT: ;;#ASMEND
4660 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4662 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_2_0:
4664 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4665 ; GFX90A-NEXT: ;;#ASMSTART
4666 ; GFX90A-NEXT: ; def s4
4667 ; GFX90A-NEXT: ;;#ASMEND
4668 ; GFX90A-NEXT: ;;#ASMSTART
4669 ; GFX90A-NEXT: ; def s5
4670 ; GFX90A-NEXT: ;;#ASMEND
4671 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4
4672 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
4673 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4674 ; GFX90A-NEXT: ;;#ASMSTART
4675 ; GFX90A-NEXT: ; use s[8:9]
4676 ; GFX90A-NEXT: ;;#ASMEND
4677 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4679 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_0:
4681 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4682 ; GFX940-NEXT: ;;#ASMSTART
4683 ; GFX940-NEXT: ; def s0
4684 ; GFX940-NEXT: ;;#ASMEND
4685 ; GFX940-NEXT: ;;#ASMSTART
4686 ; GFX940-NEXT: ; def s1
4687 ; GFX940-NEXT: ;;#ASMEND
4688 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0
4689 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
4690 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
4691 ; GFX940-NEXT: ;;#ASMSTART
4692 ; GFX940-NEXT: ; use s[8:9]
4693 ; GFX940-NEXT: ;;#ASMEND
4694 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4695 %vec0 = call <2 x half> asm "; def $0", "=s"()
4696 %vec1 = call <2 x half> asm "; def $0", "=s"()
4697 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 0>
4698 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4702 define void @s_shuffle_v4f16_v2f16__u_1_1_1() {
4703 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__u_1_1_1:
4705 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4706 ; GFX900-NEXT: ;;#ASMSTART
4707 ; GFX900-NEXT: ; def s8
4708 ; GFX900-NEXT: ;;#ASMEND
4709 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
4710 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4711 ; GFX900-NEXT: ;;#ASMSTART
4712 ; GFX900-NEXT: ; use s[8:9]
4713 ; GFX900-NEXT: ;;#ASMEND
4714 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4716 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__u_1_1_1:
4718 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4719 ; GFX90A-NEXT: ;;#ASMSTART
4720 ; GFX90A-NEXT: ; def s8
4721 ; GFX90A-NEXT: ;;#ASMEND
4722 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
4723 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4724 ; GFX90A-NEXT: ;;#ASMSTART
4725 ; GFX90A-NEXT: ; use s[8:9]
4726 ; GFX90A-NEXT: ;;#ASMEND
4727 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4729 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__u_1_1_1:
4731 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4732 ; GFX940-NEXT: ;;#ASMSTART
4733 ; GFX940-NEXT: ; def s8
4734 ; GFX940-NEXT: ;;#ASMEND
4735 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
4736 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4737 ; GFX940-NEXT: ;;#ASMSTART
4738 ; GFX940-NEXT: ; use s[8:9]
4739 ; GFX940-NEXT: ;;#ASMEND
4740 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4741 %vec0 = call <2 x half> asm "; def $0", "=s"()
4742 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
4743 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4747 define void @s_shuffle_v4f16_v2f16__0_1_1_1() {
4748 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__0_1_1_1:
4750 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4751 ; GFX900-NEXT: ;;#ASMSTART
4752 ; GFX900-NEXT: ; def s8
4753 ; GFX900-NEXT: ;;#ASMEND
4754 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
4755 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4756 ; GFX900-NEXT: ;;#ASMSTART
4757 ; GFX900-NEXT: ; use s[8:9]
4758 ; GFX900-NEXT: ;;#ASMEND
4759 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4761 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__0_1_1_1:
4763 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4764 ; GFX90A-NEXT: ;;#ASMSTART
4765 ; GFX90A-NEXT: ; def s8
4766 ; GFX90A-NEXT: ;;#ASMEND
4767 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
4768 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4769 ; GFX90A-NEXT: ;;#ASMSTART
4770 ; GFX90A-NEXT: ; use s[8:9]
4771 ; GFX90A-NEXT: ;;#ASMEND
4772 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4774 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_1_1_1:
4776 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4777 ; GFX940-NEXT: ;;#ASMSTART
4778 ; GFX940-NEXT: ; def s8
4779 ; GFX940-NEXT: ;;#ASMEND
4780 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
4781 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4782 ; GFX940-NEXT: ;;#ASMSTART
4783 ; GFX940-NEXT: ; use s[8:9]
4784 ; GFX940-NEXT: ;;#ASMEND
4785 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4786 %vec0 = call <2 x half> asm "; def $0", "=s"()
4787 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
4788 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4792 define void @s_shuffle_v4f16_v2f16__1_1_1_1() {
4793 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__1_1_1_1:
4795 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4796 ; GFX900-NEXT: ;;#ASMSTART
4797 ; GFX900-NEXT: ; def s4
4798 ; GFX900-NEXT: ;;#ASMEND
4799 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
4800 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4801 ; GFX900-NEXT: s_mov_b32 s9, s8
4802 ; GFX900-NEXT: ;;#ASMSTART
4803 ; GFX900-NEXT: ; use s[8:9]
4804 ; GFX900-NEXT: ;;#ASMEND
4805 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4807 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__1_1_1_1:
4809 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4810 ; GFX90A-NEXT: ;;#ASMSTART
4811 ; GFX90A-NEXT: ; def s4
4812 ; GFX90A-NEXT: ;;#ASMEND
4813 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
4814 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4815 ; GFX90A-NEXT: s_mov_b32 s9, s8
4816 ; GFX90A-NEXT: ;;#ASMSTART
4817 ; GFX90A-NEXT: ; use s[8:9]
4818 ; GFX90A-NEXT: ;;#ASMEND
4819 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4821 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_1_1_1:
4823 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4824 ; GFX940-NEXT: ;;#ASMSTART
4825 ; GFX940-NEXT: ; def s0
4826 ; GFX940-NEXT: ;;#ASMEND
4827 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
4828 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
4829 ; GFX940-NEXT: s_mov_b32 s9, s8
4830 ; GFX940-NEXT: ;;#ASMSTART
4831 ; GFX940-NEXT: ; use s[8:9]
4832 ; GFX940-NEXT: ;;#ASMEND
4833 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4834 %vec0 = call <2 x half> asm "; def $0", "=s"()
4835 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
4836 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4840 define void @s_shuffle_v4f16_v2f16__2_1_1_1() {
4841 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__2_1_1_1:
4843 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4844 ; GFX900-NEXT: ;;#ASMSTART
4845 ; GFX900-NEXT: ; def s8
4846 ; GFX900-NEXT: ;;#ASMEND
4847 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
4848 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4849 ; GFX900-NEXT: ;;#ASMSTART
4850 ; GFX900-NEXT: ; use s[8:9]
4851 ; GFX900-NEXT: ;;#ASMEND
4852 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4854 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__2_1_1_1:
4856 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4857 ; GFX90A-NEXT: ;;#ASMSTART
4858 ; GFX90A-NEXT: ; def s8
4859 ; GFX90A-NEXT: ;;#ASMEND
4860 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
4861 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4862 ; GFX90A-NEXT: ;;#ASMSTART
4863 ; GFX90A-NEXT: ; use s[8:9]
4864 ; GFX90A-NEXT: ;;#ASMEND
4865 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4867 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__2_1_1_1:
4869 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4870 ; GFX940-NEXT: ;;#ASMSTART
4871 ; GFX940-NEXT: ; def s8
4872 ; GFX940-NEXT: ;;#ASMEND
4873 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
4874 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4875 ; GFX940-NEXT: ;;#ASMSTART
4876 ; GFX940-NEXT: ; use s[8:9]
4877 ; GFX940-NEXT: ;;#ASMEND
4878 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4879 %vec0 = call <2 x half> asm "; def $0", "=s"()
4880 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
4881 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4885 define void @s_shuffle_v4f16_v2f16__3_1_1_1() {
4886 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_1_1_1:
4888 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4889 ; GFX900-NEXT: ;;#ASMSTART
4890 ; GFX900-NEXT: ; def s4
4891 ; GFX900-NEXT: ;;#ASMEND
4892 ; GFX900-NEXT: ;;#ASMSTART
4893 ; GFX900-NEXT: ; def s5
4894 ; GFX900-NEXT: ;;#ASMEND
4895 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
4896 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
4897 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4898 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4899 ; GFX900-NEXT: ;;#ASMSTART
4900 ; GFX900-NEXT: ; use s[8:9]
4901 ; GFX900-NEXT: ;;#ASMEND
4902 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4904 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_1_1_1:
4906 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4907 ; GFX90A-NEXT: ;;#ASMSTART
4908 ; GFX90A-NEXT: ; def s4
4909 ; GFX90A-NEXT: ;;#ASMEND
4910 ; GFX90A-NEXT: ;;#ASMSTART
4911 ; GFX90A-NEXT: ; def s5
4912 ; GFX90A-NEXT: ;;#ASMEND
4913 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
4914 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
4915 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4916 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4917 ; GFX90A-NEXT: ;;#ASMSTART
4918 ; GFX90A-NEXT: ; use s[8:9]
4919 ; GFX90A-NEXT: ;;#ASMEND
4920 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4922 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_1_1:
4924 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4925 ; GFX940-NEXT: ;;#ASMSTART
4926 ; GFX940-NEXT: ; def s0
4927 ; GFX940-NEXT: ;;#ASMEND
4928 ; GFX940-NEXT: ;;#ASMSTART
4929 ; GFX940-NEXT: ; def s1
4930 ; GFX940-NEXT: ;;#ASMEND
4931 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
4932 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
4933 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4934 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4935 ; GFX940-NEXT: ;;#ASMSTART
4936 ; GFX940-NEXT: ; use s[8:9]
4937 ; GFX940-NEXT: ;;#ASMEND
4938 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4939 %vec0 = call <2 x half> asm "; def $0", "=s"()
4940 %vec1 = call <2 x half> asm "; def $0", "=s"()
4941 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
4942 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
4946 define void @s_shuffle_v4f16_v2f16__3_u_1_1() {
4947 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_u_1_1:
4949 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4950 ; GFX900-NEXT: ;;#ASMSTART
4951 ; GFX900-NEXT: ; def s4
4952 ; GFX900-NEXT: ;;#ASMEND
4953 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
4954 ; GFX900-NEXT: ;;#ASMSTART
4955 ; GFX900-NEXT: ; def s5
4956 ; GFX900-NEXT: ;;#ASMEND
4957 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4958 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
4959 ; GFX900-NEXT: ;;#ASMSTART
4960 ; GFX900-NEXT: ; use s[8:9]
4961 ; GFX900-NEXT: ;;#ASMEND
4962 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4964 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_u_1_1:
4966 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4967 ; GFX90A-NEXT: ;;#ASMSTART
4968 ; GFX90A-NEXT: ; def s4
4969 ; GFX90A-NEXT: ;;#ASMEND
4970 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
4971 ; GFX90A-NEXT: ;;#ASMSTART
4972 ; GFX90A-NEXT: ; def s5
4973 ; GFX90A-NEXT: ;;#ASMEND
4974 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
4975 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
4976 ; GFX90A-NEXT: ;;#ASMSTART
4977 ; GFX90A-NEXT: ; use s[8:9]
4978 ; GFX90A-NEXT: ;;#ASMEND
4979 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4981 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_1_1:
4983 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4984 ; GFX940-NEXT: ;;#ASMSTART
4985 ; GFX940-NEXT: ; def s0
4986 ; GFX940-NEXT: ;;#ASMEND
4987 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
4988 ; GFX940-NEXT: ;;#ASMSTART
4989 ; GFX940-NEXT: ; def s1
4990 ; GFX940-NEXT: ;;#ASMEND
4991 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
4992 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
4993 ; GFX940-NEXT: ;;#ASMSTART
4994 ; GFX940-NEXT: ; use s[8:9]
4995 ; GFX940-NEXT: ;;#ASMEND
4996 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4997 %vec0 = call <2 x half> asm "; def $0", "=s"()
4998 %vec1 = call <2 x half> asm "; def $0", "=s"()
4999 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 1, i32 1>
5000 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5004 define void @s_shuffle_v4f16_v2f16__3_0_1_1() {
5005 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_0_1_1:
5007 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5008 ; GFX900-NEXT: ;;#ASMSTART
5009 ; GFX900-NEXT: ; def s5
5010 ; GFX900-NEXT: ;;#ASMEND
5011 ; GFX900-NEXT: ;;#ASMSTART
5012 ; GFX900-NEXT: ; def s4
5013 ; GFX900-NEXT: ;;#ASMEND
5014 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
5015 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5016 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5017 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5018 ; GFX900-NEXT: ;;#ASMSTART
5019 ; GFX900-NEXT: ; use s[8:9]
5020 ; GFX900-NEXT: ;;#ASMEND
5021 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5023 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_0_1_1:
5025 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5026 ; GFX90A-NEXT: ;;#ASMSTART
5027 ; GFX90A-NEXT: ; def s5
5028 ; GFX90A-NEXT: ;;#ASMEND
5029 ; GFX90A-NEXT: ;;#ASMSTART
5030 ; GFX90A-NEXT: ; def s4
5031 ; GFX90A-NEXT: ;;#ASMEND
5032 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
5033 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5034 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5035 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5036 ; GFX90A-NEXT: ;;#ASMSTART
5037 ; GFX90A-NEXT: ; use s[8:9]
5038 ; GFX90A-NEXT: ;;#ASMEND
5039 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5041 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_1_1:
5043 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5044 ; GFX940-NEXT: ;;#ASMSTART
5045 ; GFX940-NEXT: ; def s1
5046 ; GFX940-NEXT: ;;#ASMEND
5047 ; GFX940-NEXT: ;;#ASMSTART
5048 ; GFX940-NEXT: ; def s0
5049 ; GFX940-NEXT: ;;#ASMEND
5050 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
5051 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
5052 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5053 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
5054 ; GFX940-NEXT: ;;#ASMSTART
5055 ; GFX940-NEXT: ; use s[8:9]
5056 ; GFX940-NEXT: ;;#ASMEND
5057 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5058 %vec0 = call <2 x half> asm "; def $0", "=s"()
5059 %vec1 = call <2 x half> asm "; def $0", "=s"()
5060 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 1, i32 1>
5061 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5065 define void @s_shuffle_v4f16_v2f16__3_2_1_1() {
5066 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_2_1_1:
5068 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5069 ; GFX900-NEXT: ;;#ASMSTART
5070 ; GFX900-NEXT: ; def s4
5071 ; GFX900-NEXT: ;;#ASMEND
5072 ; GFX900-NEXT: ;;#ASMSTART
5073 ; GFX900-NEXT: ; def s5
5074 ; GFX900-NEXT: ;;#ASMEND
5075 ; GFX900-NEXT: s_lshr_b32 s6, s5, 16
5076 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5077 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5
5078 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5079 ; GFX900-NEXT: ;;#ASMSTART
5080 ; GFX900-NEXT: ; use s[8:9]
5081 ; GFX900-NEXT: ;;#ASMEND
5082 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5084 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_2_1_1:
5086 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5087 ; GFX90A-NEXT: ;;#ASMSTART
5088 ; GFX90A-NEXT: ; def s4
5089 ; GFX90A-NEXT: ;;#ASMEND
5090 ; GFX90A-NEXT: ;;#ASMSTART
5091 ; GFX90A-NEXT: ; def s5
5092 ; GFX90A-NEXT: ;;#ASMEND
5093 ; GFX90A-NEXT: s_lshr_b32 s6, s5, 16
5094 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5095 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5
5096 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5097 ; GFX90A-NEXT: ;;#ASMSTART
5098 ; GFX90A-NEXT: ; use s[8:9]
5099 ; GFX90A-NEXT: ;;#ASMEND
5100 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5102 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_1_1:
5104 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5105 ; GFX940-NEXT: ;;#ASMSTART
5106 ; GFX940-NEXT: ; def s0
5107 ; GFX940-NEXT: ;;#ASMEND
5108 ; GFX940-NEXT: ;;#ASMSTART
5109 ; GFX940-NEXT: ; def s1
5110 ; GFX940-NEXT: ;;#ASMEND
5111 ; GFX940-NEXT: s_lshr_b32 s2, s1, 16
5112 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5113 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1
5114 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
5115 ; GFX940-NEXT: ;;#ASMSTART
5116 ; GFX940-NEXT: ; use s[8:9]
5117 ; GFX940-NEXT: ;;#ASMEND
5118 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5119 %vec0 = call <2 x half> asm "; def $0", "=s"()
5120 %vec1 = call <2 x half> asm "; def $0", "=s"()
5121 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 1, i32 1>
5122 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5126 define void @s_shuffle_v4f16_v2f16__3_3_1_1() {
5127 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_1_1:
5129 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5130 ; GFX900-NEXT: ;;#ASMSTART
5131 ; GFX900-NEXT: ; def s4
5132 ; GFX900-NEXT: ;;#ASMEND
5133 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5134 ; GFX900-NEXT: ;;#ASMSTART
5135 ; GFX900-NEXT: ; def s5
5136 ; GFX900-NEXT: ;;#ASMEND
5137 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5138 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
5139 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5140 ; GFX900-NEXT: ;;#ASMSTART
5141 ; GFX900-NEXT: ; use s[8:9]
5142 ; GFX900-NEXT: ;;#ASMEND
5143 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5145 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_1_1:
5147 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5148 ; GFX90A-NEXT: ;;#ASMSTART
5149 ; GFX90A-NEXT: ; def s4
5150 ; GFX90A-NEXT: ;;#ASMEND
5151 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5152 ; GFX90A-NEXT: ;;#ASMSTART
5153 ; GFX90A-NEXT: ; def s5
5154 ; GFX90A-NEXT: ;;#ASMEND
5155 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5156 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
5157 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5158 ; GFX90A-NEXT: ;;#ASMSTART
5159 ; GFX90A-NEXT: ; use s[8:9]
5160 ; GFX90A-NEXT: ;;#ASMEND
5161 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5163 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_1:
5165 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5166 ; GFX940-NEXT: ;;#ASMSTART
5167 ; GFX940-NEXT: ; def s0
5168 ; GFX940-NEXT: ;;#ASMEND
5169 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5170 ; GFX940-NEXT: ;;#ASMSTART
5171 ; GFX940-NEXT: ; def s1
5172 ; GFX940-NEXT: ;;#ASMEND
5173 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
5174 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
5175 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
5176 ; GFX940-NEXT: ;;#ASMSTART
5177 ; GFX940-NEXT: ; use s[8:9]
5178 ; GFX940-NEXT: ;;#ASMEND
5179 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5180 %vec0 = call <2 x half> asm "; def $0", "=s"()
5181 %vec1 = call <2 x half> asm "; def $0", "=s"()
5182 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
5183 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5187 define void @s_shuffle_v4f16_v2f16__3_3_u_1() {
5188 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_u_1:
5190 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5191 ; GFX900-NEXT: ;;#ASMSTART
5192 ; GFX900-NEXT: ; def s4
5193 ; GFX900-NEXT: ;;#ASMEND
5194 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5195 ; GFX900-NEXT: ;;#ASMSTART
5196 ; GFX900-NEXT: ; def s9
5197 ; GFX900-NEXT: ;;#ASMEND
5198 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5199 ; GFX900-NEXT: ;;#ASMSTART
5200 ; GFX900-NEXT: ; use s[8:9]
5201 ; GFX900-NEXT: ;;#ASMEND
5202 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5204 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_u_1:
5206 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5207 ; GFX90A-NEXT: ;;#ASMSTART
5208 ; GFX90A-NEXT: ; def s4
5209 ; GFX90A-NEXT: ;;#ASMEND
5210 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5211 ; GFX90A-NEXT: ;;#ASMSTART
5212 ; GFX90A-NEXT: ; def s9
5213 ; GFX90A-NEXT: ;;#ASMEND
5214 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5215 ; GFX90A-NEXT: ;;#ASMSTART
5216 ; GFX90A-NEXT: ; use s[8:9]
5217 ; GFX90A-NEXT: ;;#ASMEND
5218 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5220 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_1:
5222 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5223 ; GFX940-NEXT: ;;#ASMSTART
5224 ; GFX940-NEXT: ; def s0
5225 ; GFX940-NEXT: ;;#ASMEND
5226 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5227 ; GFX940-NEXT: ;;#ASMSTART
5228 ; GFX940-NEXT: ; def s9
5229 ; GFX940-NEXT: ;;#ASMEND
5230 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
5231 ; GFX940-NEXT: ;;#ASMSTART
5232 ; GFX940-NEXT: ; use s[8:9]
5233 ; GFX940-NEXT: ;;#ASMEND
5234 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5235 %vec0 = call <2 x half> asm "; def $0", "=s"()
5236 %vec1 = call <2 x half> asm "; def $0", "=s"()
5237 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
5238 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5242 define void @s_shuffle_v4f16_v2f16__3_3_0_1() {
5243 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_0_1:
5245 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5246 ; GFX900-NEXT: ;;#ASMSTART
5247 ; GFX900-NEXT: ; def s4
5248 ; GFX900-NEXT: ;;#ASMEND
5249 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5250 ; GFX900-NEXT: ;;#ASMSTART
5251 ; GFX900-NEXT: ; def s9
5252 ; GFX900-NEXT: ;;#ASMEND
5253 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5254 ; GFX900-NEXT: ;;#ASMSTART
5255 ; GFX900-NEXT: ; use s[8:9]
5256 ; GFX900-NEXT: ;;#ASMEND
5257 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5259 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_0_1:
5261 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5262 ; GFX90A-NEXT: ;;#ASMSTART
5263 ; GFX90A-NEXT: ; def s4
5264 ; GFX90A-NEXT: ;;#ASMEND
5265 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5266 ; GFX90A-NEXT: ;;#ASMSTART
5267 ; GFX90A-NEXT: ; def s9
5268 ; GFX90A-NEXT: ;;#ASMEND
5269 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5270 ; GFX90A-NEXT: ;;#ASMSTART
5271 ; GFX90A-NEXT: ; use s[8:9]
5272 ; GFX90A-NEXT: ;;#ASMEND
5273 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5275 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_1:
5277 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5278 ; GFX940-NEXT: ;;#ASMSTART
5279 ; GFX940-NEXT: ; def s0
5280 ; GFX940-NEXT: ;;#ASMEND
5281 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5282 ; GFX940-NEXT: ;;#ASMSTART
5283 ; GFX940-NEXT: ; def s9
5284 ; GFX940-NEXT: ;;#ASMEND
5285 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
5286 ; GFX940-NEXT: ;;#ASMSTART
5287 ; GFX940-NEXT: ; use s[8:9]
5288 ; GFX940-NEXT: ;;#ASMEND
5289 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5290 %vec0 = call <2 x half> asm "; def $0", "=s"()
5291 %vec1 = call <2 x half> asm "; def $0", "=s"()
5292 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 1>
5293 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5297 define void @s_shuffle_v4f16_v2f16__3_3_2_1() {
5298 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_2_1:
5300 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5301 ; GFX900-NEXT: ;;#ASMSTART
5302 ; GFX900-NEXT: ; def s4
5303 ; GFX900-NEXT: ;;#ASMEND
5304 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5305 ; GFX900-NEXT: ;;#ASMSTART
5306 ; GFX900-NEXT: ; def s5
5307 ; GFX900-NEXT: ;;#ASMEND
5308 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4
5309 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
5310 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5311 ; GFX900-NEXT: ;;#ASMSTART
5312 ; GFX900-NEXT: ; use s[8:9]
5313 ; GFX900-NEXT: ;;#ASMEND
5314 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5316 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_2_1:
5318 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5319 ; GFX90A-NEXT: ;;#ASMSTART
5320 ; GFX90A-NEXT: ; def s4
5321 ; GFX90A-NEXT: ;;#ASMEND
5322 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5323 ; GFX90A-NEXT: ;;#ASMSTART
5324 ; GFX90A-NEXT: ; def s5
5325 ; GFX90A-NEXT: ;;#ASMEND
5326 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4
5327 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
5328 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5329 ; GFX90A-NEXT: ;;#ASMSTART
5330 ; GFX90A-NEXT: ; use s[8:9]
5331 ; GFX90A-NEXT: ;;#ASMEND
5332 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5334 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_1:
5336 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5337 ; GFX940-NEXT: ;;#ASMSTART
5338 ; GFX940-NEXT: ; def s0
5339 ; GFX940-NEXT: ;;#ASMEND
5340 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5341 ; GFX940-NEXT: ;;#ASMSTART
5342 ; GFX940-NEXT: ; def s1
5343 ; GFX940-NEXT: ;;#ASMEND
5344 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0
5345 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
5346 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
5347 ; GFX940-NEXT: ;;#ASMSTART
5348 ; GFX940-NEXT: ; use s[8:9]
5349 ; GFX940-NEXT: ;;#ASMEND
5350 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5351 %vec0 = call <2 x half> asm "; def $0", "=s"()
5352 %vec1 = call <2 x half> asm "; def $0", "=s"()
5353 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 1>
5354 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5358 define void @s_shuffle_v4f16_v2f16__u_2_2_2() {
5359 ; GFX9-LABEL: s_shuffle_v4f16_v2f16__u_2_2_2:
5361 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5362 ; GFX9-NEXT: ;;#ASMSTART
5363 ; GFX9-NEXT: ; use s[8:9]
5364 ; GFX9-NEXT: ;;#ASMEND
5365 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5366 %vec0 = call <2 x half> asm "; def $0", "=s"()
5367 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
5368 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5372 define void @s_shuffle_v4f16_v2f16__0_2_2_2() {
5373 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__0_2_2_2:
5375 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5376 ; GFX900-NEXT: ;;#ASMSTART
5377 ; GFX900-NEXT: ; def s8
5378 ; GFX900-NEXT: ;;#ASMEND
5379 ; GFX900-NEXT: ;;#ASMSTART
5380 ; GFX900-NEXT: ; use s[8:9]
5381 ; GFX900-NEXT: ;;#ASMEND
5382 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5384 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__0_2_2_2:
5386 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5387 ; GFX90A-NEXT: ;;#ASMSTART
5388 ; GFX90A-NEXT: ; def s8
5389 ; GFX90A-NEXT: ;;#ASMEND
5390 ; GFX90A-NEXT: ;;#ASMSTART
5391 ; GFX90A-NEXT: ; use s[8:9]
5392 ; GFX90A-NEXT: ;;#ASMEND
5393 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5395 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_2_2_2:
5397 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5398 ; GFX940-NEXT: ;;#ASMSTART
5399 ; GFX940-NEXT: ; def s8
5400 ; GFX940-NEXT: ;;#ASMEND
5401 ; GFX940-NEXT: s_nop 0
5402 ; GFX940-NEXT: ;;#ASMSTART
5403 ; GFX940-NEXT: ; use s[8:9]
5404 ; GFX940-NEXT: ;;#ASMEND
5405 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5406 %vec0 = call <2 x half> asm "; def $0", "=s"()
5407 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
5408 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5412 define void @s_shuffle_v4f16_v2f16__1_2_2_2() {
5413 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__1_2_2_2:
5415 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5416 ; GFX900-NEXT: ;;#ASMSTART
5417 ; GFX900-NEXT: ; def s4
5418 ; GFX900-NEXT: ;;#ASMEND
5419 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
5420 ; GFX900-NEXT: ;;#ASMSTART
5421 ; GFX900-NEXT: ; use s[8:9]
5422 ; GFX900-NEXT: ;;#ASMEND
5423 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5425 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__1_2_2_2:
5427 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5428 ; GFX90A-NEXT: ;;#ASMSTART
5429 ; GFX90A-NEXT: ; def s4
5430 ; GFX90A-NEXT: ;;#ASMEND
5431 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
5432 ; GFX90A-NEXT: ;;#ASMSTART
5433 ; GFX90A-NEXT: ; use s[8:9]
5434 ; GFX90A-NEXT: ;;#ASMEND
5435 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5437 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_2_2_2:
5439 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5440 ; GFX940-NEXT: ;;#ASMSTART
5441 ; GFX940-NEXT: ; def s0
5442 ; GFX940-NEXT: ;;#ASMEND
5443 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
5444 ; GFX940-NEXT: ;;#ASMSTART
5445 ; GFX940-NEXT: ; use s[8:9]
5446 ; GFX940-NEXT: ;;#ASMEND
5447 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5448 %vec0 = call <2 x half> asm "; def $0", "=s"()
5449 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
5450 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5454 define void @s_shuffle_v4f16_v2f16__2_2_2_2() {
5455 ; GFX9-LABEL: s_shuffle_v4f16_v2f16__2_2_2_2:
5457 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5458 ; GFX9-NEXT: ;;#ASMSTART
5459 ; GFX9-NEXT: ; use s[8:9]
5460 ; GFX9-NEXT: ;;#ASMEND
5461 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5462 %vec0 = call <2 x half> asm "; def $0", "=s"()
5463 %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
5464 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5468 define void @s_shuffle_v4f16_v2f16__3_2_2_2() {
5469 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_2_2_2:
5471 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5472 ; GFX900-NEXT: ;;#ASMSTART
5473 ; GFX900-NEXT: ; def s4
5474 ; GFX900-NEXT: ;;#ASMEND
5475 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
5476 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5477 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5478 ; GFX900-NEXT: ;;#ASMSTART
5479 ; GFX900-NEXT: ; use s[8:9]
5480 ; GFX900-NEXT: ;;#ASMEND
5481 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5483 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_2_2_2:
5485 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5486 ; GFX90A-NEXT: ;;#ASMSTART
5487 ; GFX90A-NEXT: ; def s4
5488 ; GFX90A-NEXT: ;;#ASMEND
5489 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
5490 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5491 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5492 ; GFX90A-NEXT: ;;#ASMSTART
5493 ; GFX90A-NEXT: ; use s[8:9]
5494 ; GFX90A-NEXT: ;;#ASMEND
5495 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5497 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_2_2:
5499 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5500 ; GFX940-NEXT: ;;#ASMSTART
5501 ; GFX940-NEXT: ; def s0
5502 ; GFX940-NEXT: ;;#ASMEND
5503 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
5504 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
5505 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
5506 ; GFX940-NEXT: ;;#ASMSTART
5507 ; GFX940-NEXT: ; use s[8:9]
5508 ; GFX940-NEXT: ;;#ASMEND
5509 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5510 %vec0 = call <2 x half> asm "; def $0", "=s"()
5511 %vec1 = call <2 x half> asm "; def $0", "=s"()
5512 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
5513 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5517 define void @s_shuffle_v4f16_v2f16__3_u_2_2() {
5518 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_u_2_2:
5520 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5521 ; GFX900-NEXT: ;;#ASMSTART
5522 ; GFX900-NEXT: ; def s4
5523 ; GFX900-NEXT: ;;#ASMEND
5524 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
5525 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5526 ; GFX900-NEXT: ;;#ASMSTART
5527 ; GFX900-NEXT: ; use s[8:9]
5528 ; GFX900-NEXT: ;;#ASMEND
5529 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5531 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_u_2_2:
5533 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5534 ; GFX90A-NEXT: ;;#ASMSTART
5535 ; GFX90A-NEXT: ; def s4
5536 ; GFX90A-NEXT: ;;#ASMEND
5537 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
5538 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5539 ; GFX90A-NEXT: ;;#ASMSTART
5540 ; GFX90A-NEXT: ; use s[8:9]
5541 ; GFX90A-NEXT: ;;#ASMEND
5542 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5544 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_2_2:
5546 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5547 ; GFX940-NEXT: ;;#ASMSTART
5548 ; GFX940-NEXT: ; def s0
5549 ; GFX940-NEXT: ;;#ASMEND
5550 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
5551 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
5552 ; GFX940-NEXT: ;;#ASMSTART
5553 ; GFX940-NEXT: ; use s[8:9]
5554 ; GFX940-NEXT: ;;#ASMEND
5555 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5556 %vec0 = call <2 x half> asm "; def $0", "=s"()
5557 %vec1 = call <2 x half> asm "; def $0", "=s"()
5558 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
5559 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5563 define void @s_shuffle_v4f16_v2f16__3_0_2_2() {
5564 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_0_2_2:
5566 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5567 ; GFX900-NEXT: ;;#ASMSTART
5568 ; GFX900-NEXT: ; def s5
5569 ; GFX900-NEXT: ;;#ASMEND
5570 ; GFX900-NEXT: s_lshr_b32 s6, s5, 16
5571 ; GFX900-NEXT: ;;#ASMSTART
5572 ; GFX900-NEXT: ; def s4
5573 ; GFX900-NEXT: ;;#ASMEND
5574 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4
5575 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5
5576 ; GFX900-NEXT: ;;#ASMSTART
5577 ; GFX900-NEXT: ; use s[8:9]
5578 ; GFX900-NEXT: ;;#ASMEND
5579 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5581 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_0_2_2:
5583 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5584 ; GFX90A-NEXT: ;;#ASMSTART
5585 ; GFX90A-NEXT: ; def s5
5586 ; GFX90A-NEXT: ;;#ASMEND
5587 ; GFX90A-NEXT: s_lshr_b32 s6, s5, 16
5588 ; GFX90A-NEXT: ;;#ASMSTART
5589 ; GFX90A-NEXT: ; def s4
5590 ; GFX90A-NEXT: ;;#ASMEND
5591 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4
5592 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5
5593 ; GFX90A-NEXT: ;;#ASMSTART
5594 ; GFX90A-NEXT: ; use s[8:9]
5595 ; GFX90A-NEXT: ;;#ASMEND
5596 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5598 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_2_2:
5600 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5601 ; GFX940-NEXT: ;;#ASMSTART
5602 ; GFX940-NEXT: ; def s1
5603 ; GFX940-NEXT: ;;#ASMEND
5604 ; GFX940-NEXT: s_lshr_b32 s2, s1, 16
5605 ; GFX940-NEXT: ;;#ASMSTART
5606 ; GFX940-NEXT: ; def s0
5607 ; GFX940-NEXT: ;;#ASMEND
5608 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0
5609 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1
5610 ; GFX940-NEXT: ;;#ASMSTART
5611 ; GFX940-NEXT: ; use s[8:9]
5612 ; GFX940-NEXT: ;;#ASMEND
5613 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5614 %vec0 = call <2 x half> asm "; def $0", "=s"()
5615 %vec1 = call <2 x half> asm "; def $0", "=s"()
5616 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 2, i32 2>
5617 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5621 define void @s_shuffle_v4f16_v2f16__3_1_2_2() {
5622 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_1_2_2:
5624 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5625 ; GFX900-NEXT: ;;#ASMSTART
5626 ; GFX900-NEXT: ; def s4
5627 ; GFX900-NEXT: ;;#ASMEND
5628 ; GFX900-NEXT: ;;#ASMSTART
5629 ; GFX900-NEXT: ; def s5
5630 ; GFX900-NEXT: ;;#ASMEND
5631 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5632 ; GFX900-NEXT: s_lshr_b32 s6, s5, 16
5633 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4
5634 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5
5635 ; GFX900-NEXT: ;;#ASMSTART
5636 ; GFX900-NEXT: ; use s[8:9]
5637 ; GFX900-NEXT: ;;#ASMEND
5638 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5640 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_1_2_2:
5642 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5643 ; GFX90A-NEXT: ;;#ASMSTART
5644 ; GFX90A-NEXT: ; def s4
5645 ; GFX90A-NEXT: ;;#ASMEND
5646 ; GFX90A-NEXT: ;;#ASMSTART
5647 ; GFX90A-NEXT: ; def s5
5648 ; GFX90A-NEXT: ;;#ASMEND
5649 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5650 ; GFX90A-NEXT: s_lshr_b32 s6, s5, 16
5651 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4
5652 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5
5653 ; GFX90A-NEXT: ;;#ASMSTART
5654 ; GFX90A-NEXT: ; use s[8:9]
5655 ; GFX90A-NEXT: ;;#ASMEND
5656 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5658 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_2_2:
5660 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5661 ; GFX940-NEXT: ;;#ASMSTART
5662 ; GFX940-NEXT: ; def s0
5663 ; GFX940-NEXT: ;;#ASMEND
5664 ; GFX940-NEXT: ;;#ASMSTART
5665 ; GFX940-NEXT: ; def s1
5666 ; GFX940-NEXT: ;;#ASMEND
5667 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5668 ; GFX940-NEXT: s_lshr_b32 s2, s1, 16
5669 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0
5670 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1
5671 ; GFX940-NEXT: ;;#ASMSTART
5672 ; GFX940-NEXT: ; use s[8:9]
5673 ; GFX940-NEXT: ;;#ASMEND
5674 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5675 %vec0 = call <2 x half> asm "; def $0", "=s"()
5676 %vec1 = call <2 x half> asm "; def $0", "=s"()
5677 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
5678 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5682 define void @s_shuffle_v4f16_v2f16__3_3_2_2() {
5683 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_2_2:
5685 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5686 ; GFX900-NEXT: ;;#ASMSTART
5687 ; GFX900-NEXT: ; def s4
5688 ; GFX900-NEXT: ;;#ASMEND
5689 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
5690 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5691 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5692 ; GFX900-NEXT: ;;#ASMSTART
5693 ; GFX900-NEXT: ; use s[8:9]
5694 ; GFX900-NEXT: ;;#ASMEND
5695 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5697 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_2_2:
5699 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5700 ; GFX90A-NEXT: ;;#ASMSTART
5701 ; GFX90A-NEXT: ; def s4
5702 ; GFX90A-NEXT: ;;#ASMEND
5703 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
5704 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5705 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5706 ; GFX90A-NEXT: ;;#ASMSTART
5707 ; GFX90A-NEXT: ; use s[8:9]
5708 ; GFX90A-NEXT: ;;#ASMEND
5709 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5711 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_2:
5713 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5714 ; GFX940-NEXT: ;;#ASMSTART
5715 ; GFX940-NEXT: ; def s0
5716 ; GFX940-NEXT: ;;#ASMEND
5717 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
5718 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
5719 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
5720 ; GFX940-NEXT: ;;#ASMSTART
5721 ; GFX940-NEXT: ; use s[8:9]
5722 ; GFX940-NEXT: ;;#ASMEND
5723 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5724 %vec0 = call <2 x half> asm "; def $0", "=s"()
5725 %vec1 = call <2 x half> asm "; def $0", "=s"()
5726 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 2>
5727 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5731 define void @s_shuffle_v4f16_v2f16__3_3_u_2() {
5732 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_u_2:
5734 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5735 ; GFX900-NEXT: ;;#ASMSTART
5736 ; GFX900-NEXT: ; def s4
5737 ; GFX900-NEXT: ;;#ASMEND
5738 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
5739 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5740 ; GFX900-NEXT: s_lshl_b32 s9, s4, 16
5741 ; GFX900-NEXT: ;;#ASMSTART
5742 ; GFX900-NEXT: ; use s[8:9]
5743 ; GFX900-NEXT: ;;#ASMEND
5744 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5746 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_u_2:
5748 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5749 ; GFX90A-NEXT: ;;#ASMSTART
5750 ; GFX90A-NEXT: ; def s4
5751 ; GFX90A-NEXT: ;;#ASMEND
5752 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
5753 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5754 ; GFX90A-NEXT: s_lshl_b32 s9, s4, 16
5755 ; GFX90A-NEXT: ;;#ASMSTART
5756 ; GFX90A-NEXT: ; use s[8:9]
5757 ; GFX90A-NEXT: ;;#ASMEND
5758 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5760 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_2:
5762 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5763 ; GFX940-NEXT: ;;#ASMSTART
5764 ; GFX940-NEXT: ; def s0
5765 ; GFX940-NEXT: ;;#ASMEND
5766 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
5767 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
5768 ; GFX940-NEXT: s_lshl_b32 s9, s0, 16
5769 ; GFX940-NEXT: ;;#ASMSTART
5770 ; GFX940-NEXT: ; use s[8:9]
5771 ; GFX940-NEXT: ;;#ASMEND
5772 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5773 %vec0 = call <2 x half> asm "; def $0", "=s"()
5774 %vec1 = call <2 x half> asm "; def $0", "=s"()
5775 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 2>
5776 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5780 define void @s_shuffle_v4f16_v2f16__3_3_0_2() {
5781 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_0_2:
5783 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5784 ; GFX900-NEXT: ;;#ASMSTART
5785 ; GFX900-NEXT: ; def s4
5786 ; GFX900-NEXT: ;;#ASMEND
5787 ; GFX900-NEXT: ;;#ASMSTART
5788 ; GFX900-NEXT: ; def s5
5789 ; GFX900-NEXT: ;;#ASMEND
5790 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5
5791 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
5792 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5793 ; GFX900-NEXT: ;;#ASMSTART
5794 ; GFX900-NEXT: ; use s[8:9]
5795 ; GFX900-NEXT: ;;#ASMEND
5796 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5798 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_0_2:
5800 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5801 ; GFX90A-NEXT: ;;#ASMSTART
5802 ; GFX90A-NEXT: ; def s4
5803 ; GFX90A-NEXT: ;;#ASMEND
5804 ; GFX90A-NEXT: ;;#ASMSTART
5805 ; GFX90A-NEXT: ; def s5
5806 ; GFX90A-NEXT: ;;#ASMEND
5807 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5
5808 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
5809 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5810 ; GFX90A-NEXT: ;;#ASMSTART
5811 ; GFX90A-NEXT: ; use s[8:9]
5812 ; GFX90A-NEXT: ;;#ASMEND
5813 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5815 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_2:
5817 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5818 ; GFX940-NEXT: ;;#ASMSTART
5819 ; GFX940-NEXT: ; def s0
5820 ; GFX940-NEXT: ;;#ASMEND
5821 ; GFX940-NEXT: ;;#ASMSTART
5822 ; GFX940-NEXT: ; def s1
5823 ; GFX940-NEXT: ;;#ASMEND
5824 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1
5825 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
5826 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
5827 ; GFX940-NEXT: ;;#ASMSTART
5828 ; GFX940-NEXT: ; use s[8:9]
5829 ; GFX940-NEXT: ;;#ASMEND
5830 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5831 %vec0 = call <2 x half> asm "; def $0", "=s"()
5832 %vec1 = call <2 x half> asm "; def $0", "=s"()
5833 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 2>
5834 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5838 define void @s_shuffle_v4f16_v2f16__3_3_1_2() {
5839 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_1_2:
5841 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5842 ; GFX900-NEXT: ;;#ASMSTART
5843 ; GFX900-NEXT: ; def s4
5844 ; GFX900-NEXT: ;;#ASMEND
5845 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5846 ; GFX900-NEXT: ;;#ASMSTART
5847 ; GFX900-NEXT: ; def s5
5848 ; GFX900-NEXT: ;;#ASMEND
5849 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5
5850 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
5851 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5852 ; GFX900-NEXT: ;;#ASMSTART
5853 ; GFX900-NEXT: ; use s[8:9]
5854 ; GFX900-NEXT: ;;#ASMEND
5855 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5857 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_1_2:
5859 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5860 ; GFX90A-NEXT: ;;#ASMSTART
5861 ; GFX90A-NEXT: ; def s4
5862 ; GFX90A-NEXT: ;;#ASMEND
5863 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5864 ; GFX90A-NEXT: ;;#ASMSTART
5865 ; GFX90A-NEXT: ; def s5
5866 ; GFX90A-NEXT: ;;#ASMEND
5867 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5
5868 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
5869 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
5870 ; GFX90A-NEXT: ;;#ASMSTART
5871 ; GFX90A-NEXT: ; use s[8:9]
5872 ; GFX90A-NEXT: ;;#ASMEND
5873 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5875 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_2:
5877 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5878 ; GFX940-NEXT: ;;#ASMSTART
5879 ; GFX940-NEXT: ; def s0
5880 ; GFX940-NEXT: ;;#ASMEND
5881 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5882 ; GFX940-NEXT: ;;#ASMSTART
5883 ; GFX940-NEXT: ; def s1
5884 ; GFX940-NEXT: ;;#ASMEND
5885 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1
5886 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
5887 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
5888 ; GFX940-NEXT: ;;#ASMSTART
5889 ; GFX940-NEXT: ; use s[8:9]
5890 ; GFX940-NEXT: ;;#ASMEND
5891 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5892 %vec0 = call <2 x half> asm "; def $0", "=s"()
5893 %vec1 = call <2 x half> asm "; def $0", "=s"()
5894 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 2>
5895 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5899 define void @s_shuffle_v4f16_v2f16__u_3_3_3() {
5900 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__u_3_3_3:
5902 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5903 ; GFX900-NEXT: ;;#ASMSTART
5904 ; GFX900-NEXT: ; def s8
5905 ; GFX900-NEXT: ;;#ASMEND
5906 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
5907 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5908 ; GFX900-NEXT: ;;#ASMSTART
5909 ; GFX900-NEXT: ; use s[8:9]
5910 ; GFX900-NEXT: ;;#ASMEND
5911 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5913 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__u_3_3_3:
5915 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5916 ; GFX90A-NEXT: ;;#ASMSTART
5917 ; GFX90A-NEXT: ; def s8
5918 ; GFX90A-NEXT: ;;#ASMEND
5919 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
5920 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
5921 ; GFX90A-NEXT: ;;#ASMSTART
5922 ; GFX90A-NEXT: ; use s[8:9]
5923 ; GFX90A-NEXT: ;;#ASMEND
5924 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5926 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__u_3_3_3:
5928 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5929 ; GFX940-NEXT: ;;#ASMSTART
5930 ; GFX940-NEXT: ; def s8
5931 ; GFX940-NEXT: ;;#ASMEND
5932 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
5933 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
5934 ; GFX940-NEXT: ;;#ASMSTART
5935 ; GFX940-NEXT: ; use s[8:9]
5936 ; GFX940-NEXT: ;;#ASMEND
5937 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5938 %vec0 = call <2 x half> asm "; def $0", "=s"()
5939 %vec1 = call <2 x half> asm "; def $0", "=s"()
5940 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
5941 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
5945 define void @s_shuffle_v4f16_v2f16__0_3_3_3() {
5946 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__0_3_3_3:
5948 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5949 ; GFX900-NEXT: ;;#ASMSTART
5950 ; GFX900-NEXT: ; def s5
5951 ; GFX900-NEXT: ;;#ASMEND
5952 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
5953 ; GFX900-NEXT: ;;#ASMSTART
5954 ; GFX900-NEXT: ; def s4
5955 ; GFX900-NEXT: ;;#ASMEND
5956 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5957 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5
5958 ; GFX900-NEXT: ;;#ASMSTART
5959 ; GFX900-NEXT: ; use s[8:9]
5960 ; GFX900-NEXT: ;;#ASMEND
5961 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5963 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__0_3_3_3:
5965 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5966 ; GFX90A-NEXT: ;;#ASMSTART
5967 ; GFX90A-NEXT: ; def s5
5968 ; GFX90A-NEXT: ;;#ASMEND
5969 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
5970 ; GFX90A-NEXT: ;;#ASMSTART
5971 ; GFX90A-NEXT: ; def s4
5972 ; GFX90A-NEXT: ;;#ASMEND
5973 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5974 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5
5975 ; GFX90A-NEXT: ;;#ASMSTART
5976 ; GFX90A-NEXT: ; use s[8:9]
5977 ; GFX90A-NEXT: ;;#ASMEND
5978 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5980 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_3_3_3:
5982 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5983 ; GFX940-NEXT: ;;#ASMSTART
5984 ; GFX940-NEXT: ; def s1
5985 ; GFX940-NEXT: ;;#ASMEND
5986 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
5987 ; GFX940-NEXT: ;;#ASMSTART
5988 ; GFX940-NEXT: ; def s0
5989 ; GFX940-NEXT: ;;#ASMEND
5990 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
5991 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1
5992 ; GFX940-NEXT: ;;#ASMSTART
5993 ; GFX940-NEXT: ; use s[8:9]
5994 ; GFX940-NEXT: ;;#ASMEND
5995 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5996 %vec0 = call <2 x half> asm "; def $0", "=s"()
5997 %vec1 = call <2 x half> asm "; def $0", "=s"()
5998 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
5999 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6003 define void @s_shuffle_v4f16_v2f16__1_3_3_3() {
6004 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__1_3_3_3:
6006 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6007 ; GFX900-NEXT: ;;#ASMSTART
6008 ; GFX900-NEXT: ; def s4
6009 ; GFX900-NEXT: ;;#ASMEND
6010 ; GFX900-NEXT: ;;#ASMSTART
6011 ; GFX900-NEXT: ; def s5
6012 ; GFX900-NEXT: ;;#ASMEND
6013 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
6014 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
6015 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
6016 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6017 ; GFX900-NEXT: ;;#ASMSTART
6018 ; GFX900-NEXT: ; use s[8:9]
6019 ; GFX900-NEXT: ;;#ASMEND
6020 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6022 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__1_3_3_3:
6024 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6025 ; GFX90A-NEXT: ;;#ASMSTART
6026 ; GFX90A-NEXT: ; def s4
6027 ; GFX90A-NEXT: ;;#ASMEND
6028 ; GFX90A-NEXT: ;;#ASMSTART
6029 ; GFX90A-NEXT: ; def s5
6030 ; GFX90A-NEXT: ;;#ASMEND
6031 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
6032 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
6033 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
6034 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6035 ; GFX90A-NEXT: ;;#ASMSTART
6036 ; GFX90A-NEXT: ; use s[8:9]
6037 ; GFX90A-NEXT: ;;#ASMEND
6038 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6040 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_3_3_3:
6042 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6043 ; GFX940-NEXT: ;;#ASMSTART
6044 ; GFX940-NEXT: ; def s0
6045 ; GFX940-NEXT: ;;#ASMEND
6046 ; GFX940-NEXT: ;;#ASMSTART
6047 ; GFX940-NEXT: ; def s1
6048 ; GFX940-NEXT: ;;#ASMEND
6049 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
6050 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
6051 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
6052 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1
6053 ; GFX940-NEXT: ;;#ASMSTART
6054 ; GFX940-NEXT: ; use s[8:9]
6055 ; GFX940-NEXT: ;;#ASMEND
6056 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6057 %vec0 = call <2 x half> asm "; def $0", "=s"()
6058 %vec1 = call <2 x half> asm "; def $0", "=s"()
6059 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
6060 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6064 define void @s_shuffle_v4f16_v2f16__2_3_3_3() {
6065 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__2_3_3_3:
6067 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6068 ; GFX900-NEXT: ;;#ASMSTART
6069 ; GFX900-NEXT: ; def s8
6070 ; GFX900-NEXT: ;;#ASMEND
6071 ; GFX900-NEXT: s_lshr_b32 s4, s8, 16
6072 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4
6073 ; GFX900-NEXT: ;;#ASMSTART
6074 ; GFX900-NEXT: ; use s[8:9]
6075 ; GFX900-NEXT: ;;#ASMEND
6076 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6078 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__2_3_3_3:
6080 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6081 ; GFX90A-NEXT: ;;#ASMSTART
6082 ; GFX90A-NEXT: ; def s8
6083 ; GFX90A-NEXT: ;;#ASMEND
6084 ; GFX90A-NEXT: s_lshr_b32 s4, s8, 16
6085 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4
6086 ; GFX90A-NEXT: ;;#ASMSTART
6087 ; GFX90A-NEXT: ; use s[8:9]
6088 ; GFX90A-NEXT: ;;#ASMEND
6089 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6091 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__2_3_3_3:
6093 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6094 ; GFX940-NEXT: ;;#ASMSTART
6095 ; GFX940-NEXT: ; def s8
6096 ; GFX940-NEXT: ;;#ASMEND
6097 ; GFX940-NEXT: s_lshr_b32 s0, s8, 16
6098 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0
6099 ; GFX940-NEXT: ;;#ASMSTART
6100 ; GFX940-NEXT: ; use s[8:9]
6101 ; GFX940-NEXT: ;;#ASMEND
6102 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6103 %vec0 = call <2 x half> asm "; def $0", "=s"()
6104 %vec1 = call <2 x half> asm "; def $0", "=s"()
6105 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
6106 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6110 define void @s_shuffle_v4f16_v2f16__3_u_3_3() {
6111 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_u_3_3:
6113 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6114 ; GFX900-NEXT: ;;#ASMSTART
6115 ; GFX900-NEXT: ; def s4
6116 ; GFX900-NEXT: ;;#ASMEND
6117 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
6118 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s8, s8
6119 ; GFX900-NEXT: ;;#ASMSTART
6120 ; GFX900-NEXT: ; use s[8:9]
6121 ; GFX900-NEXT: ;;#ASMEND
6122 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6124 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_u_3_3:
6126 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6127 ; GFX90A-NEXT: ;;#ASMSTART
6128 ; GFX90A-NEXT: ; def s4
6129 ; GFX90A-NEXT: ;;#ASMEND
6130 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
6131 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s8, s8
6132 ; GFX90A-NEXT: ;;#ASMSTART
6133 ; GFX90A-NEXT: ; use s[8:9]
6134 ; GFX90A-NEXT: ;;#ASMEND
6135 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6137 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_3_3:
6139 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6140 ; GFX940-NEXT: ;;#ASMSTART
6141 ; GFX940-NEXT: ; def s0
6142 ; GFX940-NEXT: ;;#ASMEND
6143 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
6144 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s8, s8
6145 ; GFX940-NEXT: ;;#ASMSTART
6146 ; GFX940-NEXT: ; use s[8:9]
6147 ; GFX940-NEXT: ;;#ASMEND
6148 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6149 %vec0 = call <2 x half> asm "; def $0", "=s"()
6150 %vec1 = call <2 x half> asm "; def $0", "=s"()
6151 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
6152 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6156 define void @s_shuffle_v4f16_v2f16__3_0_3_3() {
6157 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_0_3_3:
6159 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6160 ; GFX900-NEXT: ;;#ASMSTART
6161 ; GFX900-NEXT: ; def s5
6162 ; GFX900-NEXT: ;;#ASMEND
6163 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
6164 ; GFX900-NEXT: ;;#ASMSTART
6165 ; GFX900-NEXT: ; def s4
6166 ; GFX900-NEXT: ;;#ASMEND
6167 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6168 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6169 ; GFX900-NEXT: ;;#ASMSTART
6170 ; GFX900-NEXT: ; use s[8:9]
6171 ; GFX900-NEXT: ;;#ASMEND
6172 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6174 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_0_3_3:
6176 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6177 ; GFX90A-NEXT: ;;#ASMSTART
6178 ; GFX90A-NEXT: ; def s5
6179 ; GFX90A-NEXT: ;;#ASMEND
6180 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
6181 ; GFX90A-NEXT: ;;#ASMSTART
6182 ; GFX90A-NEXT: ; def s4
6183 ; GFX90A-NEXT: ;;#ASMEND
6184 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6185 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6186 ; GFX90A-NEXT: ;;#ASMSTART
6187 ; GFX90A-NEXT: ; use s[8:9]
6188 ; GFX90A-NEXT: ;;#ASMEND
6189 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6191 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_3_3:
6193 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6194 ; GFX940-NEXT: ;;#ASMSTART
6195 ; GFX940-NEXT: ; def s1
6196 ; GFX940-NEXT: ;;#ASMEND
6197 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
6198 ; GFX940-NEXT: ;;#ASMSTART
6199 ; GFX940-NEXT: ; def s0
6200 ; GFX940-NEXT: ;;#ASMEND
6201 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
6202 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1
6203 ; GFX940-NEXT: ;;#ASMSTART
6204 ; GFX940-NEXT: ; use s[8:9]
6205 ; GFX940-NEXT: ;;#ASMEND
6206 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6207 %vec0 = call <2 x half> asm "; def $0", "=s"()
6208 %vec1 = call <2 x half> asm "; def $0", "=s"()
6209 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 0, i32 3, i32 3>
6210 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6214 define void @s_shuffle_v4f16_v2f16__3_1_3_3() {
6215 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_1_3_3:
6217 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6218 ; GFX900-NEXT: ;;#ASMSTART
6219 ; GFX900-NEXT: ; def s4
6220 ; GFX900-NEXT: ;;#ASMEND
6221 ; GFX900-NEXT: ;;#ASMSTART
6222 ; GFX900-NEXT: ; def s5
6223 ; GFX900-NEXT: ;;#ASMEND
6224 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
6225 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
6226 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6227 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6228 ; GFX900-NEXT: ;;#ASMSTART
6229 ; GFX900-NEXT: ; use s[8:9]
6230 ; GFX900-NEXT: ;;#ASMEND
6231 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6233 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_1_3_3:
6235 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6236 ; GFX90A-NEXT: ;;#ASMSTART
6237 ; GFX90A-NEXT: ; def s4
6238 ; GFX90A-NEXT: ;;#ASMEND
6239 ; GFX90A-NEXT: ;;#ASMSTART
6240 ; GFX90A-NEXT: ; def s5
6241 ; GFX90A-NEXT: ;;#ASMEND
6242 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
6243 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
6244 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6245 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6246 ; GFX90A-NEXT: ;;#ASMSTART
6247 ; GFX90A-NEXT: ; use s[8:9]
6248 ; GFX90A-NEXT: ;;#ASMEND
6249 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6251 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_3_3:
6253 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6254 ; GFX940-NEXT: ;;#ASMSTART
6255 ; GFX940-NEXT: ; def s0
6256 ; GFX940-NEXT: ;;#ASMEND
6257 ; GFX940-NEXT: ;;#ASMSTART
6258 ; GFX940-NEXT: ; def s1
6259 ; GFX940-NEXT: ;;#ASMEND
6260 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
6261 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
6262 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
6263 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1
6264 ; GFX940-NEXT: ;;#ASMSTART
6265 ; GFX940-NEXT: ; use s[8:9]
6266 ; GFX940-NEXT: ;;#ASMEND
6267 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6268 %vec0 = call <2 x half> asm "; def $0", "=s"()
6269 %vec1 = call <2 x half> asm "; def $0", "=s"()
6270 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 1, i32 3, i32 3>
6271 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6275 define void @s_shuffle_v4f16_v2f16__3_2_3_3() {
6276 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_2_3_3:
6278 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6279 ; GFX900-NEXT: ;;#ASMSTART
6280 ; GFX900-NEXT: ; def s4
6281 ; GFX900-NEXT: ;;#ASMEND
6282 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
6283 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6284 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6285 ; GFX900-NEXT: ;;#ASMSTART
6286 ; GFX900-NEXT: ; use s[8:9]
6287 ; GFX900-NEXT: ;;#ASMEND
6288 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6290 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_2_3_3:
6292 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6293 ; GFX90A-NEXT: ;;#ASMSTART
6294 ; GFX90A-NEXT: ; def s4
6295 ; GFX90A-NEXT: ;;#ASMEND
6296 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
6297 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6298 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5
6299 ; GFX90A-NEXT: ;;#ASMSTART
6300 ; GFX90A-NEXT: ; use s[8:9]
6301 ; GFX90A-NEXT: ;;#ASMEND
6302 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6304 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_3_3:
6306 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6307 ; GFX940-NEXT: ;;#ASMSTART
6308 ; GFX940-NEXT: ; def s0
6309 ; GFX940-NEXT: ;;#ASMEND
6310 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
6311 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
6312 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1
6313 ; GFX940-NEXT: ;;#ASMSTART
6314 ; GFX940-NEXT: ; use s[8:9]
6315 ; GFX940-NEXT: ;;#ASMEND
6316 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6317 %vec0 = call <2 x half> asm "; def $0", "=s"()
6318 %vec1 = call <2 x half> asm "; def $0", "=s"()
6319 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
6320 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6324 define void @s_shuffle_v4f16_v2f16__3_3_u_3() {
6325 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_u_3:
6327 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6328 ; GFX900-NEXT: ;;#ASMSTART
6329 ; GFX900-NEXT: ; def s9
6330 ; GFX900-NEXT: ;;#ASMEND
6331 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
6332 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
6333 ; GFX900-NEXT: ;;#ASMSTART
6334 ; GFX900-NEXT: ; use s[8:9]
6335 ; GFX900-NEXT: ;;#ASMEND
6336 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6338 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_u_3:
6340 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6341 ; GFX90A-NEXT: ;;#ASMSTART
6342 ; GFX90A-NEXT: ; def s9
6343 ; GFX90A-NEXT: ;;#ASMEND
6344 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
6345 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
6346 ; GFX90A-NEXT: ;;#ASMSTART
6347 ; GFX90A-NEXT: ; use s[8:9]
6348 ; GFX90A-NEXT: ;;#ASMEND
6349 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6351 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_3:
6353 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6354 ; GFX940-NEXT: ;;#ASMSTART
6355 ; GFX940-NEXT: ; def s9
6356 ; GFX940-NEXT: ;;#ASMEND
6357 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
6358 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
6359 ; GFX940-NEXT: ;;#ASMSTART
6360 ; GFX940-NEXT: ; use s[8:9]
6361 ; GFX940-NEXT: ;;#ASMEND
6362 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6363 %vec0 = call <2 x half> asm "; def $0", "=s"()
6364 %vec1 = call <2 x half> asm "; def $0", "=s"()
6365 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 3>
6366 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6370 define void @s_shuffle_v4f16_v2f16__3_3_0_3() {
6371 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_0_3:
6373 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6374 ; GFX900-NEXT: ;;#ASMSTART
6375 ; GFX900-NEXT: ; def s5
6376 ; GFX900-NEXT: ;;#ASMEND
6377 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
6378 ; GFX900-NEXT: ;;#ASMSTART
6379 ; GFX900-NEXT: ; def s4
6380 ; GFX900-NEXT: ;;#ASMEND
6381 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5
6382 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
6383 ; GFX900-NEXT: ;;#ASMSTART
6384 ; GFX900-NEXT: ; use s[8:9]
6385 ; GFX900-NEXT: ;;#ASMEND
6386 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6388 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_0_3:
6390 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6391 ; GFX90A-NEXT: ;;#ASMSTART
6392 ; GFX90A-NEXT: ; def s5
6393 ; GFX90A-NEXT: ;;#ASMEND
6394 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
6395 ; GFX90A-NEXT: ;;#ASMSTART
6396 ; GFX90A-NEXT: ; def s4
6397 ; GFX90A-NEXT: ;;#ASMEND
6398 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5
6399 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
6400 ; GFX90A-NEXT: ;;#ASMSTART
6401 ; GFX90A-NEXT: ; use s[8:9]
6402 ; GFX90A-NEXT: ;;#ASMEND
6403 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6405 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_3:
6407 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6408 ; GFX940-NEXT: ;;#ASMSTART
6409 ; GFX940-NEXT: ; def s1
6410 ; GFX940-NEXT: ;;#ASMEND
6411 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
6412 ; GFX940-NEXT: ;;#ASMSTART
6413 ; GFX940-NEXT: ; def s0
6414 ; GFX940-NEXT: ;;#ASMEND
6415 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1
6416 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
6417 ; GFX940-NEXT: ;;#ASMSTART
6418 ; GFX940-NEXT: ; use s[8:9]
6419 ; GFX940-NEXT: ;;#ASMEND
6420 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6421 %vec0 = call <2 x half> asm "; def $0", "=s"()
6422 %vec1 = call <2 x half> asm "; def $0", "=s"()
6423 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 3>
6424 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6428 define void @s_shuffle_v4f16_v2f16__3_3_1_3() {
6429 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_1_3:
6431 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6432 ; GFX900-NEXT: ;;#ASMSTART
6433 ; GFX900-NEXT: ; def s4
6434 ; GFX900-NEXT: ;;#ASMEND
6435 ; GFX900-NEXT: ;;#ASMSTART
6436 ; GFX900-NEXT: ; def s5
6437 ; GFX900-NEXT: ;;#ASMEND
6438 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
6439 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
6440 ; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5
6441 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
6442 ; GFX900-NEXT: ;;#ASMSTART
6443 ; GFX900-NEXT: ; use s[8:9]
6444 ; GFX900-NEXT: ;;#ASMEND
6445 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6447 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_1_3:
6449 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6450 ; GFX90A-NEXT: ;;#ASMSTART
6451 ; GFX90A-NEXT: ; def s4
6452 ; GFX90A-NEXT: ;;#ASMEND
6453 ; GFX90A-NEXT: ;;#ASMSTART
6454 ; GFX90A-NEXT: ; def s5
6455 ; GFX90A-NEXT: ;;#ASMEND
6456 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
6457 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
6458 ; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5
6459 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
6460 ; GFX90A-NEXT: ;;#ASMSTART
6461 ; GFX90A-NEXT: ; use s[8:9]
6462 ; GFX90A-NEXT: ;;#ASMEND
6463 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6465 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_3:
6467 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6468 ; GFX940-NEXT: ;;#ASMSTART
6469 ; GFX940-NEXT: ; def s0
6470 ; GFX940-NEXT: ;;#ASMEND
6471 ; GFX940-NEXT: ;;#ASMSTART
6472 ; GFX940-NEXT: ; def s1
6473 ; GFX940-NEXT: ;;#ASMEND
6474 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
6475 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
6476 ; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1
6477 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
6478 ; GFX940-NEXT: ;;#ASMSTART
6479 ; GFX940-NEXT: ; use s[8:9]
6480 ; GFX940-NEXT: ;;#ASMEND
6481 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6482 %vec0 = call <2 x half> asm "; def $0", "=s"()
6483 %vec1 = call <2 x half> asm "; def $0", "=s"()
6484 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 3>
6485 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6489 define void @s_shuffle_v4f16_v2f16__3_3_2_3() {
6490 ; GFX900-LABEL: s_shuffle_v4f16_v2f16__3_3_2_3:
6492 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6493 ; GFX900-NEXT: ;;#ASMSTART
6494 ; GFX900-NEXT: ; def s9
6495 ; GFX900-NEXT: ;;#ASMEND
6496 ; GFX900-NEXT: s_lshr_b32 s4, s9, 16
6497 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
6498 ; GFX900-NEXT: ;;#ASMSTART
6499 ; GFX900-NEXT: ; use s[8:9]
6500 ; GFX900-NEXT: ;;#ASMEND
6501 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6503 ; GFX90A-LABEL: s_shuffle_v4f16_v2f16__3_3_2_3:
6505 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6506 ; GFX90A-NEXT: ;;#ASMSTART
6507 ; GFX90A-NEXT: ; def s9
6508 ; GFX90A-NEXT: ;;#ASMEND
6509 ; GFX90A-NEXT: s_lshr_b32 s4, s9, 16
6510 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
6511 ; GFX90A-NEXT: ;;#ASMSTART
6512 ; GFX90A-NEXT: ; use s[8:9]
6513 ; GFX90A-NEXT: ;;#ASMEND
6514 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6516 ; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_3:
6518 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6519 ; GFX940-NEXT: ;;#ASMSTART
6520 ; GFX940-NEXT: ; def s9
6521 ; GFX940-NEXT: ;;#ASMEND
6522 ; GFX940-NEXT: s_lshr_b32 s0, s9, 16
6523 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
6524 ; GFX940-NEXT: ;;#ASMSTART
6525 ; GFX940-NEXT: ; use s[8:9]
6526 ; GFX940-NEXT: ;;#ASMEND
6527 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6528 %vec0 = call <2 x half> asm "; def $0", "=s"()
6529 %vec1 = call <2 x half> asm "; def $0", "=s"()
6530 %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 3>
6531 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf)
6534 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
6535 ; GFX90APLUS: {{.*}}