1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v2i16_v2i16__u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v2i16_v2i16__u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <2 x i16> asm "; def $0", "=v"()
13 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> poison
14 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
18 define void @v_shuffle_v2i16_v2i16__0_u(ptr addrspace(1) inreg %ptr) {
19 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__0_u:
21 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
23 ; GFX900-NEXT: ;;#ASMSTART
24 ; GFX900-NEXT: ; def v1
25 ; GFX900-NEXT: ;;#ASMEND
26 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
27 ; GFX900-NEXT: s_waitcnt vmcnt(0)
28 ; GFX900-NEXT: s_setpc_b64 s[30:31]
30 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__0_u:
32 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
34 ; GFX90A-NEXT: ;;#ASMSTART
35 ; GFX90A-NEXT: ; def v1
36 ; GFX90A-NEXT: ;;#ASMEND
37 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
38 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
39 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
41 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_u:
43 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
45 ; GFX940-NEXT: ;;#ASMSTART
46 ; GFX940-NEXT: ; def v1
47 ; GFX940-NEXT: ;;#ASMEND
48 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
49 ; GFX940-NEXT: s_waitcnt vmcnt(0)
50 ; GFX940-NEXT: s_setpc_b64 s[30:31]
51 %vec0 = call <2 x i16> asm "; def $0", "=v"()
52 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
53 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
57 define void @v_shuffle_v2i16_v2i16__1_u(ptr addrspace(1) inreg %ptr) {
58 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__1_u:
60 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX900-NEXT: ;;#ASMSTART
62 ; GFX900-NEXT: ; def v1
63 ; GFX900-NEXT: ;;#ASMEND
64 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
65 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
66 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
67 ; GFX900-NEXT: s_waitcnt vmcnt(0)
68 ; GFX900-NEXT: s_setpc_b64 s[30:31]
70 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__1_u:
72 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX90A-NEXT: ;;#ASMSTART
74 ; GFX90A-NEXT: ; def v1
75 ; GFX90A-NEXT: ;;#ASMEND
76 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
77 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
78 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
79 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
80 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
82 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_u:
84 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX940-NEXT: ;;#ASMSTART
86 ; GFX940-NEXT: ; def v1
87 ; GFX940-NEXT: ;;#ASMEND
88 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
89 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
90 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
91 ; GFX940-NEXT: s_waitcnt vmcnt(0)
92 ; GFX940-NEXT: s_setpc_b64 s[30:31]
93 %vec0 = call <2 x i16> asm "; def $0", "=v"()
94 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
95 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
99 define void @v_shuffle_v2i16_v2i16__2_u(ptr addrspace(1) inreg %ptr) {
100 ; GFX9-LABEL: v_shuffle_v2i16_v2i16__2_u:
102 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX9-NEXT: s_setpc_b64 s[30:31]
104 %vec0 = call <2 x i16> asm "; def $0", "=v"()
105 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 poison>
106 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
110 define void @v_shuffle_v2i16_v2i16__3_u(ptr addrspace(1) inreg %ptr) {
111 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__3_u:
113 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX900-NEXT: ;;#ASMSTART
115 ; GFX900-NEXT: ; def v1
116 ; GFX900-NEXT: ;;#ASMEND
117 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
118 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
119 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
120 ; GFX900-NEXT: s_waitcnt vmcnt(0)
121 ; GFX900-NEXT: s_setpc_b64 s[30:31]
123 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__3_u:
125 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX90A-NEXT: ;;#ASMSTART
127 ; GFX90A-NEXT: ; def v1
128 ; GFX90A-NEXT: ;;#ASMEND
129 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
130 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
131 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
132 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
133 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
135 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_u:
137 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX940-NEXT: ;;#ASMSTART
139 ; GFX940-NEXT: ; def v1
140 ; GFX940-NEXT: ;;#ASMEND
141 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
142 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
143 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
144 ; GFX940-NEXT: s_waitcnt vmcnt(0)
145 ; GFX940-NEXT: s_setpc_b64 s[30:31]
146 %vec0 = call <2 x i16> asm "; def $0", "=v"()
147 %vec1 = call <2 x i16> asm "; def $0", "=v"()
148 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 poison>
149 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
153 define void @v_shuffle_v2i16_v2i16__3_0(ptr addrspace(1) inreg %ptr) {
154 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__3_0:
156 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX900-NEXT: ;;#ASMSTART
158 ; GFX900-NEXT: ; def v1
159 ; GFX900-NEXT: ;;#ASMEND
160 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
161 ; GFX900-NEXT: ;;#ASMSTART
162 ; GFX900-NEXT: ; def v2
163 ; GFX900-NEXT: ;;#ASMEND
164 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16
165 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
166 ; GFX900-NEXT: s_waitcnt vmcnt(0)
167 ; GFX900-NEXT: s_setpc_b64 s[30:31]
169 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__3_0:
171 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX90A-NEXT: ;;#ASMSTART
173 ; GFX90A-NEXT: ; def v1
174 ; GFX90A-NEXT: ;;#ASMEND
175 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
176 ; GFX90A-NEXT: ;;#ASMSTART
177 ; GFX90A-NEXT: ; def v2
178 ; GFX90A-NEXT: ;;#ASMEND
179 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v2, 16
180 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
181 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
182 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
184 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_0:
186 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX940-NEXT: ;;#ASMSTART
188 ; GFX940-NEXT: ; def v1
189 ; GFX940-NEXT: ;;#ASMEND
190 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
191 ; GFX940-NEXT: ;;#ASMSTART
192 ; GFX940-NEXT: ; def v2
193 ; GFX940-NEXT: ;;#ASMEND
194 ; GFX940-NEXT: s_nop 0
195 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16
196 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
197 ; GFX940-NEXT: s_waitcnt vmcnt(0)
198 ; GFX940-NEXT: s_setpc_b64 s[30:31]
199 %vec0 = call <2 x i16> asm "; def $0", "=v"()
200 %vec1 = call <2 x i16> asm "; def $0", "=v"()
201 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0>
202 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
206 define void @v_shuffle_v2i16_v2i16__3_1(ptr addrspace(1) inreg %ptr) {
207 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__3_1:
209 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX900-NEXT: ;;#ASMSTART
211 ; GFX900-NEXT: ; def v1
212 ; GFX900-NEXT: ;;#ASMEND
213 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
214 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
215 ; GFX900-NEXT: ;;#ASMSTART
216 ; GFX900-NEXT: ; def v2
217 ; GFX900-NEXT: ;;#ASMEND
218 ; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4
219 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
220 ; GFX900-NEXT: s_waitcnt vmcnt(0)
221 ; GFX900-NEXT: s_setpc_b64 s[30:31]
223 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__3_1:
225 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226 ; GFX90A-NEXT: ;;#ASMSTART
227 ; GFX90A-NEXT: ; def v1
228 ; GFX90A-NEXT: ;;#ASMEND
229 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
230 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
231 ; GFX90A-NEXT: ;;#ASMSTART
232 ; GFX90A-NEXT: ; def v2
233 ; GFX90A-NEXT: ;;#ASMEND
234 ; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4
235 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
236 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
237 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
239 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_1:
241 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX940-NEXT: ;;#ASMSTART
243 ; GFX940-NEXT: ; def v1
244 ; GFX940-NEXT: ;;#ASMEND
245 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
246 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
247 ; GFX940-NEXT: ;;#ASMSTART
248 ; GFX940-NEXT: ; def v2
249 ; GFX940-NEXT: ;;#ASMEND
250 ; GFX940-NEXT: s_nop 0
251 ; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2
252 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
253 ; GFX940-NEXT: s_waitcnt vmcnt(0)
254 ; GFX940-NEXT: s_setpc_b64 s[30:31]
255 %vec0 = call <2 x i16> asm "; def $0", "=v"()
256 %vec1 = call <2 x i16> asm "; def $0", "=v"()
257 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1>
258 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
262 define void @v_shuffle_v2i16_v2i16__3_2(ptr addrspace(1) inreg %ptr) {
263 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__3_2:
265 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX900-NEXT: ;;#ASMSTART
267 ; GFX900-NEXT: ; def v1
268 ; GFX900-NEXT: ;;#ASMEND
269 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
270 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
271 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
272 ; GFX900-NEXT: s_waitcnt vmcnt(0)
273 ; GFX900-NEXT: s_setpc_b64 s[30:31]
275 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__3_2:
277 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX90A-NEXT: ;;#ASMSTART
279 ; GFX90A-NEXT: ; def v1
280 ; GFX90A-NEXT: ;;#ASMEND
281 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
282 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
283 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
284 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
285 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
287 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_2:
289 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX940-NEXT: ;;#ASMSTART
291 ; GFX940-NEXT: ; def v1
292 ; GFX940-NEXT: ;;#ASMEND
293 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
294 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
295 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
296 ; GFX940-NEXT: s_waitcnt vmcnt(0)
297 ; GFX940-NEXT: s_setpc_b64 s[30:31]
298 %vec0 = call <2 x i16> asm "; def $0", "=v"()
299 %vec1 = call <2 x i16> asm "; def $0", "=v"()
300 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2>
301 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
305 define void @v_shuffle_v2i16_v2i16__3_3(ptr addrspace(1) inreg %ptr) {
306 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__3_3:
308 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GFX900-NEXT: ;;#ASMSTART
310 ; GFX900-NEXT: ; def v1
311 ; GFX900-NEXT: ;;#ASMEND
312 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
313 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
314 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
315 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
316 ; GFX900-NEXT: s_waitcnt vmcnt(0)
317 ; GFX900-NEXT: s_setpc_b64 s[30:31]
319 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__3_3:
321 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX90A-NEXT: ;;#ASMSTART
323 ; GFX90A-NEXT: ; def v1
324 ; GFX90A-NEXT: ;;#ASMEND
325 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
326 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
327 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
328 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
329 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
330 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
332 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_3:
334 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; GFX940-NEXT: ;;#ASMSTART
336 ; GFX940-NEXT: ; def v1
337 ; GFX940-NEXT: ;;#ASMEND
338 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
339 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
340 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
341 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
342 ; GFX940-NEXT: s_waitcnt vmcnt(0)
343 ; GFX940-NEXT: s_setpc_b64 s[30:31]
344 %vec0 = call <2 x i16> asm "; def $0", "=v"()
345 %vec1 = call <2 x i16> asm "; def $0", "=v"()
346 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3>
347 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
351 define void @v_shuffle_v2i16_v2i16__u_0(ptr addrspace(1) inreg %ptr) {
352 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__u_0:
354 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX900-NEXT: ;;#ASMSTART
356 ; GFX900-NEXT: ; def v1
357 ; GFX900-NEXT: ;;#ASMEND
358 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
359 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1
360 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
361 ; GFX900-NEXT: s_waitcnt vmcnt(0)
362 ; GFX900-NEXT: s_setpc_b64 s[30:31]
364 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__u_0:
366 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367 ; GFX90A-NEXT: ;;#ASMSTART
368 ; GFX90A-NEXT: ; def v1
369 ; GFX90A-NEXT: ;;#ASMEND
370 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
371 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1
372 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
373 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
374 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
376 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__u_0:
378 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX940-NEXT: ;;#ASMSTART
380 ; GFX940-NEXT: ; def v1
381 ; GFX940-NEXT: ;;#ASMEND
382 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
383 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1
384 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
385 ; GFX940-NEXT: s_waitcnt vmcnt(0)
386 ; GFX940-NEXT: s_setpc_b64 s[30:31]
387 %vec0 = call <2 x i16> asm "; def $0", "=v"()
388 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 poison, i32 0>
389 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
393 define void @v_shuffle_v2i16_v2i16__0_0(ptr addrspace(1) inreg %ptr) {
394 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__0_0:
396 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397 ; GFX900-NEXT: ;;#ASMSTART
398 ; GFX900-NEXT: ; def v1
399 ; GFX900-NEXT: ;;#ASMEND
400 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
401 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
402 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
403 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
404 ; GFX900-NEXT: s_waitcnt vmcnt(0)
405 ; GFX900-NEXT: s_setpc_b64 s[30:31]
407 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__0_0:
409 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410 ; GFX90A-NEXT: ;;#ASMSTART
411 ; GFX90A-NEXT: ; def v1
412 ; GFX90A-NEXT: ;;#ASMEND
413 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
414 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
415 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
416 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
417 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
418 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
420 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_0:
422 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423 ; GFX940-NEXT: ;;#ASMSTART
424 ; GFX940-NEXT: ; def v1
425 ; GFX940-NEXT: ;;#ASMEND
426 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
427 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
428 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
429 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
430 ; GFX940-NEXT: s_waitcnt vmcnt(0)
431 ; GFX940-NEXT: s_setpc_b64 s[30:31]
432 %vec0 = call <2 x i16> asm "; def $0", "=v"()
433 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> zeroinitializer
434 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
438 define void @v_shuffle_v2i16_v2i16__1_0(ptr addrspace(1) inreg %ptr) {
439 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__1_0:
441 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; GFX900-NEXT: ;;#ASMSTART
443 ; GFX900-NEXT: ; def v1
444 ; GFX900-NEXT: ;;#ASMEND
445 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
446 ; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16
447 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
448 ; GFX900-NEXT: s_waitcnt vmcnt(0)
449 ; GFX900-NEXT: s_setpc_b64 s[30:31]
451 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__1_0:
453 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX90A-NEXT: ;;#ASMSTART
455 ; GFX90A-NEXT: ; def v1
456 ; GFX90A-NEXT: ;;#ASMEND
457 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
458 ; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16
459 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
460 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
461 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
463 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_0:
465 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466 ; GFX940-NEXT: ;;#ASMSTART
467 ; GFX940-NEXT: ; def v1
468 ; GFX940-NEXT: ;;#ASMEND
469 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
470 ; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16
471 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
472 ; GFX940-NEXT: s_waitcnt vmcnt(0)
473 ; GFX940-NEXT: s_setpc_b64 s[30:31]
474 %vec0 = call <2 x i16> asm "; def $0", "=v"()
475 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
476 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
480 define void @v_shuffle_v2i16_v2i16__2_0(ptr addrspace(1) inreg %ptr) {
481 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__2_0:
483 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484 ; GFX900-NEXT: ;;#ASMSTART
485 ; GFX900-NEXT: ; def v1
486 ; GFX900-NEXT: ;;#ASMEND
487 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
488 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1
489 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
490 ; GFX900-NEXT: s_waitcnt vmcnt(0)
491 ; GFX900-NEXT: s_setpc_b64 s[30:31]
493 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__2_0:
495 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496 ; GFX90A-NEXT: ;;#ASMSTART
497 ; GFX90A-NEXT: ; def v1
498 ; GFX90A-NEXT: ;;#ASMEND
499 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
500 ; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1
501 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
502 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
503 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
505 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__2_0:
507 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; GFX940-NEXT: ;;#ASMSTART
509 ; GFX940-NEXT: ; def v1
510 ; GFX940-NEXT: ;;#ASMEND
511 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
512 ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1
513 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
514 ; GFX940-NEXT: s_waitcnt vmcnt(0)
515 ; GFX940-NEXT: s_setpc_b64 s[30:31]
516 %vec0 = call <2 x i16> asm "; def $0", "=v"()
517 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 0>
518 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
522 define void @v_shuffle_v2i16_v2i16__u_1(ptr addrspace(1) inreg %ptr) {
523 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__u_1:
525 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
527 ; GFX900-NEXT: ;;#ASMSTART
528 ; GFX900-NEXT: ; def v1
529 ; GFX900-NEXT: ;;#ASMEND
530 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
531 ; GFX900-NEXT: s_waitcnt vmcnt(0)
532 ; GFX900-NEXT: s_setpc_b64 s[30:31]
534 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__u_1:
536 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
538 ; GFX90A-NEXT: ;;#ASMSTART
539 ; GFX90A-NEXT: ; def v1
540 ; GFX90A-NEXT: ;;#ASMEND
541 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
542 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
543 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
545 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__u_1:
547 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
548 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
549 ; GFX940-NEXT: ;;#ASMSTART
550 ; GFX940-NEXT: ; def v1
551 ; GFX940-NEXT: ;;#ASMEND
552 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
553 ; GFX940-NEXT: s_waitcnt vmcnt(0)
554 ; GFX940-NEXT: s_setpc_b64 s[30:31]
555 %vec0 = call <2 x i16> asm "; def $0", "=v"()
556 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 poison, i32 1>
557 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
561 define void @v_shuffle_v2i16_v2i16__0_1(ptr addrspace(1) inreg %ptr) {
562 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__0_1:
564 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
566 ; GFX900-NEXT: ;;#ASMSTART
567 ; GFX900-NEXT: ; def v1
568 ; GFX900-NEXT: ;;#ASMEND
569 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
570 ; GFX900-NEXT: s_waitcnt vmcnt(0)
571 ; GFX900-NEXT: s_setpc_b64 s[30:31]
573 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__0_1:
575 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
577 ; GFX90A-NEXT: ;;#ASMSTART
578 ; GFX90A-NEXT: ; def v1
579 ; GFX90A-NEXT: ;;#ASMEND
580 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
581 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
582 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
584 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_1:
586 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
587 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
588 ; GFX940-NEXT: ;;#ASMSTART
589 ; GFX940-NEXT: ; def v1
590 ; GFX940-NEXT: ;;#ASMEND
591 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
592 ; GFX940-NEXT: s_waitcnt vmcnt(0)
593 ; GFX940-NEXT: s_setpc_b64 s[30:31]
594 %vec0 = call <2 x i16> asm "; def $0", "=v"()
595 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 0, i32 1>
596 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
600 define void @v_shuffle_v2i16_v2i16__1_1(ptr addrspace(1) inreg %ptr) {
601 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__1_1:
603 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604 ; GFX900-NEXT: ;;#ASMSTART
605 ; GFX900-NEXT: ; def v1
606 ; GFX900-NEXT: ;;#ASMEND
607 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
608 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
609 ; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4
610 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
611 ; GFX900-NEXT: s_waitcnt vmcnt(0)
612 ; GFX900-NEXT: s_setpc_b64 s[30:31]
614 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__1_1:
616 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617 ; GFX90A-NEXT: ;;#ASMSTART
618 ; GFX90A-NEXT: ; def v1
619 ; GFX90A-NEXT: ;;#ASMEND
620 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
621 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
622 ; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4
623 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
624 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
625 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
627 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_1:
629 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630 ; GFX940-NEXT: ;;#ASMSTART
631 ; GFX940-NEXT: ; def v1
632 ; GFX940-NEXT: ;;#ASMEND
633 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
634 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
635 ; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2
636 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
637 ; GFX940-NEXT: s_waitcnt vmcnt(0)
638 ; GFX940-NEXT: s_setpc_b64 s[30:31]
639 %vec0 = call <2 x i16> asm "; def $0", "=v"()
640 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
641 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
645 define void @v_shuffle_v2i16_v2i16__2_1(ptr addrspace(1) inreg %ptr) {
646 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__2_1:
648 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
650 ; GFX900-NEXT: ;;#ASMSTART
651 ; GFX900-NEXT: ; def v1
652 ; GFX900-NEXT: ;;#ASMEND
653 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
654 ; GFX900-NEXT: s_waitcnt vmcnt(0)
655 ; GFX900-NEXT: s_setpc_b64 s[30:31]
657 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__2_1:
659 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
661 ; GFX90A-NEXT: ;;#ASMSTART
662 ; GFX90A-NEXT: ; def v1
663 ; GFX90A-NEXT: ;;#ASMEND
664 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
665 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
666 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
668 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__2_1:
670 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
671 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
672 ; GFX940-NEXT: ;;#ASMSTART
673 ; GFX940-NEXT: ; def v1
674 ; GFX940-NEXT: ;;#ASMEND
675 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
676 ; GFX940-NEXT: s_waitcnt vmcnt(0)
677 ; GFX940-NEXT: s_setpc_b64 s[30:31]
678 %vec0 = call <2 x i16> asm "; def $0", "=v"()
679 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 1>
680 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
684 define void @v_shuffle_v2i16_v2i16__u_2(ptr addrspace(1) inreg %ptr) {
685 ; GFX9-LABEL: v_shuffle_v2i16_v2i16__u_2:
687 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688 ; GFX9-NEXT: s_setpc_b64 s[30:31]
689 %vec0 = call <2 x i16> asm "; def $0", "=v"()
690 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 poison, i32 2>
691 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
695 define void @v_shuffle_v2i16_v2i16__0_2(ptr addrspace(1) inreg %ptr) {
696 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__0_2:
698 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
700 ; GFX900-NEXT: ;;#ASMSTART
701 ; GFX900-NEXT: ; def v1
702 ; GFX900-NEXT: ;;#ASMEND
703 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
704 ; GFX900-NEXT: s_waitcnt vmcnt(0)
705 ; GFX900-NEXT: s_setpc_b64 s[30:31]
707 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__0_2:
709 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
711 ; GFX90A-NEXT: ;;#ASMSTART
712 ; GFX90A-NEXT: ; def v1
713 ; GFX90A-NEXT: ;;#ASMEND
714 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
715 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
716 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
718 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_2:
720 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
722 ; GFX940-NEXT: ;;#ASMSTART
723 ; GFX940-NEXT: ; def v1
724 ; GFX940-NEXT: ;;#ASMEND
725 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
726 ; GFX940-NEXT: s_waitcnt vmcnt(0)
727 ; GFX940-NEXT: s_setpc_b64 s[30:31]
728 %vec0 = call <2 x i16> asm "; def $0", "=v"()
729 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 0, i32 2>
730 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
734 define void @v_shuffle_v2i16_v2i16__1_2(ptr addrspace(1) inreg %ptr) {
735 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__1_2:
737 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738 ; GFX900-NEXT: ;;#ASMSTART
739 ; GFX900-NEXT: ; def v1
740 ; GFX900-NEXT: ;;#ASMEND
741 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
742 ; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16
743 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
744 ; GFX900-NEXT: s_waitcnt vmcnt(0)
745 ; GFX900-NEXT: s_setpc_b64 s[30:31]
747 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__1_2:
749 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750 ; GFX90A-NEXT: ;;#ASMSTART
751 ; GFX90A-NEXT: ; def v1
752 ; GFX90A-NEXT: ;;#ASMEND
753 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
754 ; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16
755 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
756 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
757 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
759 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_2:
761 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762 ; GFX940-NEXT: ;;#ASMSTART
763 ; GFX940-NEXT: ; def v1
764 ; GFX940-NEXT: ;;#ASMEND
765 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
766 ; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16
767 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
768 ; GFX940-NEXT: s_waitcnt vmcnt(0)
769 ; GFX940-NEXT: s_setpc_b64 s[30:31]
770 %vec0 = call <2 x i16> asm "; def $0", "=v"()
771 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 2>
772 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
776 define void @v_shuffle_v2i16_v2i16__2_2(ptr addrspace(1) inreg %ptr) {
777 ; GFX9-LABEL: v_shuffle_v2i16_v2i16__2_2:
779 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
780 ; GFX9-NEXT: s_setpc_b64 s[30:31]
781 %vec0 = call <2 x i16> asm "; def $0", "=v"()
782 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 2>
783 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
787 define void @v_shuffle_v2i16_v2i16__u_3(ptr addrspace(1) inreg %ptr) {
788 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__u_3:
790 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
791 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
792 ; GFX900-NEXT: ;;#ASMSTART
793 ; GFX900-NEXT: ; def v1
794 ; GFX900-NEXT: ;;#ASMEND
795 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
796 ; GFX900-NEXT: s_waitcnt vmcnt(0)
797 ; GFX900-NEXT: s_setpc_b64 s[30:31]
799 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__u_3:
801 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
803 ; GFX90A-NEXT: ;;#ASMSTART
804 ; GFX90A-NEXT: ; def v1
805 ; GFX90A-NEXT: ;;#ASMEND
806 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
807 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
808 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
810 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__u_3:
812 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
814 ; GFX940-NEXT: ;;#ASMSTART
815 ; GFX940-NEXT: ; def v1
816 ; GFX940-NEXT: ;;#ASMEND
817 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
818 ; GFX940-NEXT: s_waitcnt vmcnt(0)
819 ; GFX940-NEXT: s_setpc_b64 s[30:31]
820 %vec0 = call <2 x i16> asm "; def $0", "=v"()
821 %vec1 = call <2 x i16> asm "; def $0", "=v"()
822 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 poison, i32 3>
823 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
827 define void @v_shuffle_v2i16_v2i16__0_3(ptr addrspace(1) inreg %ptr) {
828 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__0_3:
830 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
831 ; GFX900-NEXT: ;;#ASMSTART
832 ; GFX900-NEXT: ; def v1
833 ; GFX900-NEXT: ;;#ASMEND
834 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
835 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
836 ; GFX900-NEXT: ;;#ASMSTART
837 ; GFX900-NEXT: ; def v2
838 ; GFX900-NEXT: ;;#ASMEND
839 ; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v2
840 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
841 ; GFX900-NEXT: s_waitcnt vmcnt(0)
842 ; GFX900-NEXT: s_setpc_b64 s[30:31]
844 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__0_3:
846 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847 ; GFX90A-NEXT: ;;#ASMSTART
848 ; GFX90A-NEXT: ; def v1
849 ; GFX90A-NEXT: ;;#ASMEND
850 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
851 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
852 ; GFX90A-NEXT: ;;#ASMSTART
853 ; GFX90A-NEXT: ; def v2
854 ; GFX90A-NEXT: ;;#ASMEND
855 ; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v2
856 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
857 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
858 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
860 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_3:
862 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; GFX940-NEXT: ;;#ASMSTART
864 ; GFX940-NEXT: ; def v1
865 ; GFX940-NEXT: ;;#ASMEND
866 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
867 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
868 ; GFX940-NEXT: ;;#ASMSTART
869 ; GFX940-NEXT: ; def v2
870 ; GFX940-NEXT: ;;#ASMEND
871 ; GFX940-NEXT: s_nop 0
872 ; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2
873 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
874 ; GFX940-NEXT: s_waitcnt vmcnt(0)
875 ; GFX940-NEXT: s_setpc_b64 s[30:31]
876 %vec0 = call <2 x i16> asm "; def $0", "=v"()
877 %vec1 = call <2 x i16> asm "; def $0", "=v"()
878 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3>
879 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
883 define void @v_shuffle_v2i16_v2i16__1_3(ptr addrspace(1) inreg %ptr) {
884 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__1_3:
886 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
887 ; GFX900-NEXT: ;;#ASMSTART
888 ; GFX900-NEXT: ; def v1
889 ; GFX900-NEXT: ;;#ASMEND
890 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
891 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
892 ; GFX900-NEXT: ;;#ASMSTART
893 ; GFX900-NEXT: ; def v2
894 ; GFX900-NEXT: ;;#ASMEND
895 ; GFX900-NEXT: v_perm_b32 v1, v2, v1, s4
896 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
897 ; GFX900-NEXT: s_waitcnt vmcnt(0)
898 ; GFX900-NEXT: s_setpc_b64 s[30:31]
900 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__1_3:
902 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903 ; GFX90A-NEXT: ;;#ASMSTART
904 ; GFX90A-NEXT: ; def v1
905 ; GFX90A-NEXT: ;;#ASMEND
906 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
907 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
908 ; GFX90A-NEXT: ;;#ASMSTART
909 ; GFX90A-NEXT: ; def v2
910 ; GFX90A-NEXT: ;;#ASMEND
911 ; GFX90A-NEXT: v_perm_b32 v1, v2, v1, s4
912 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
913 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
914 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
916 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_3:
918 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
919 ; GFX940-NEXT: ;;#ASMSTART
920 ; GFX940-NEXT: ; def v1
921 ; GFX940-NEXT: ;;#ASMEND
922 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
923 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
924 ; GFX940-NEXT: ;;#ASMSTART
925 ; GFX940-NEXT: ; def v2
926 ; GFX940-NEXT: ;;#ASMEND
927 ; GFX940-NEXT: s_nop 0
928 ; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2
929 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
930 ; GFX940-NEXT: s_waitcnt vmcnt(0)
931 ; GFX940-NEXT: s_setpc_b64 s[30:31]
932 %vec0 = call <2 x i16> asm "; def $0", "=v"()
933 %vec1 = call <2 x i16> asm "; def $0", "=v"()
934 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3>
935 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
939 define void @v_shuffle_v2i16_v2i16__2_3(ptr addrspace(1) inreg %ptr) {
940 ; GFX900-LABEL: v_shuffle_v2i16_v2i16__2_3:
942 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943 ; GFX900-NEXT: v_mov_b32_e32 v0, 0
944 ; GFX900-NEXT: ;;#ASMSTART
945 ; GFX900-NEXT: ; def v1
946 ; GFX900-NEXT: ;;#ASMEND
947 ; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
948 ; GFX900-NEXT: s_waitcnt vmcnt(0)
949 ; GFX900-NEXT: s_setpc_b64 s[30:31]
951 ; GFX90A-LABEL: v_shuffle_v2i16_v2i16__2_3:
953 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
955 ; GFX90A-NEXT: ;;#ASMSTART
956 ; GFX90A-NEXT: ; def v1
957 ; GFX90A-NEXT: ;;#ASMEND
958 ; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
959 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
960 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
962 ; GFX940-LABEL: v_shuffle_v2i16_v2i16__2_3:
964 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
965 ; GFX940-NEXT: v_mov_b32_e32 v0, 0
966 ; GFX940-NEXT: ;;#ASMSTART
967 ; GFX940-NEXT: ; def v1
968 ; GFX940-NEXT: ;;#ASMEND
969 ; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
970 ; GFX940-NEXT: s_waitcnt vmcnt(0)
971 ; GFX940-NEXT: s_setpc_b64 s[30:31]
972 %vec0 = call <2 x i16> asm "; def $0", "=v"()
973 %vec1 = call <2 x i16> asm "; def $0", "=v"()
974 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 3>
975 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
979 define void @s_shuffle_v2i16_v2i16__u_u() {
980 ; GFX9-LABEL: s_shuffle_v2i16_v2i16__u_u:
982 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; GFX9-NEXT: ;;#ASMSTART
984 ; GFX9-NEXT: ; use s8
985 ; GFX9-NEXT: ;;#ASMEND
986 ; GFX9-NEXT: s_setpc_b64 s[30:31]
987 %vec0 = call <2 x i16> asm "; def $0", "=s"()
988 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> poison
989 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
993 define void @s_shuffle_v2i16_v2i16__0_u() {
994 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__0_u:
996 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
997 ; GFX900-NEXT: ;;#ASMSTART
998 ; GFX900-NEXT: ; def s8
999 ; GFX900-NEXT: ;;#ASMEND
1000 ; GFX900-NEXT: ;;#ASMSTART
1001 ; GFX900-NEXT: ; use s8
1002 ; GFX900-NEXT: ;;#ASMEND
1003 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1005 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__0_u:
1007 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008 ; GFX90A-NEXT: ;;#ASMSTART
1009 ; GFX90A-NEXT: ; def s8
1010 ; GFX90A-NEXT: ;;#ASMEND
1011 ; GFX90A-NEXT: ;;#ASMSTART
1012 ; GFX90A-NEXT: ; use s8
1013 ; GFX90A-NEXT: ;;#ASMEND
1014 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1016 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_u:
1018 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019 ; GFX940-NEXT: ;;#ASMSTART
1020 ; GFX940-NEXT: ; def s8
1021 ; GFX940-NEXT: ;;#ASMEND
1022 ; GFX940-NEXT: s_nop 0
1023 ; GFX940-NEXT: ;;#ASMSTART
1024 ; GFX940-NEXT: ; use s8
1025 ; GFX940-NEXT: ;;#ASMEND
1026 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1027 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1028 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
1029 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1033 define void @s_shuffle_v2i16_v2i16__1_u() {
1034 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__1_u:
1036 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037 ; GFX900-NEXT: ;;#ASMSTART
1038 ; GFX900-NEXT: ; def s4
1039 ; GFX900-NEXT: ;;#ASMEND
1040 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
1041 ; GFX900-NEXT: ;;#ASMSTART
1042 ; GFX900-NEXT: ; use s8
1043 ; GFX900-NEXT: ;;#ASMEND
1044 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1046 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__1_u:
1048 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1049 ; GFX90A-NEXT: ;;#ASMSTART
1050 ; GFX90A-NEXT: ; def s4
1051 ; GFX90A-NEXT: ;;#ASMEND
1052 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
1053 ; GFX90A-NEXT: ;;#ASMSTART
1054 ; GFX90A-NEXT: ; use s8
1055 ; GFX90A-NEXT: ;;#ASMEND
1056 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1058 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_u:
1060 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1061 ; GFX940-NEXT: ;;#ASMSTART
1062 ; GFX940-NEXT: ; def s0
1063 ; GFX940-NEXT: ;;#ASMEND
1064 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
1065 ; GFX940-NEXT: ;;#ASMSTART
1066 ; GFX940-NEXT: ; use s8
1067 ; GFX940-NEXT: ;;#ASMEND
1068 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1069 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1070 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
1071 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1075 define void @s_shuffle_v2i16_v2i16__2_u() {
1076 ; GFX9-LABEL: s_shuffle_v2i16_v2i16__2_u:
1078 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1079 ; GFX9-NEXT: ;;#ASMSTART
1080 ; GFX9-NEXT: ; use s8
1081 ; GFX9-NEXT: ;;#ASMEND
1082 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1083 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1084 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 poison>
1085 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1089 define void @s_shuffle_v2i16_v2i16__3_u() {
1090 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__3_u:
1092 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1093 ; GFX900-NEXT: ;;#ASMSTART
1094 ; GFX900-NEXT: ; def s4
1095 ; GFX900-NEXT: ;;#ASMEND
1096 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
1097 ; GFX900-NEXT: ;;#ASMSTART
1098 ; GFX900-NEXT: ; use s8
1099 ; GFX900-NEXT: ;;#ASMEND
1100 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1102 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__3_u:
1104 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105 ; GFX90A-NEXT: ;;#ASMSTART
1106 ; GFX90A-NEXT: ; def s4
1107 ; GFX90A-NEXT: ;;#ASMEND
1108 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
1109 ; GFX90A-NEXT: ;;#ASMSTART
1110 ; GFX90A-NEXT: ; use s8
1111 ; GFX90A-NEXT: ;;#ASMEND
1112 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1114 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_u:
1116 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1117 ; GFX940-NEXT: ;;#ASMSTART
1118 ; GFX940-NEXT: ; def s0
1119 ; GFX940-NEXT: ;;#ASMEND
1120 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
1121 ; GFX940-NEXT: ;;#ASMSTART
1122 ; GFX940-NEXT: ; use s8
1123 ; GFX940-NEXT: ;;#ASMEND
1124 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1125 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1126 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1127 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 poison>
1128 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1132 define void @s_shuffle_v2i16_v2i16__3_0() {
1133 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__3_0:
1135 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136 ; GFX900-NEXT: ;;#ASMSTART
1137 ; GFX900-NEXT: ; def s5
1138 ; GFX900-NEXT: ;;#ASMEND
1139 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
1140 ; GFX900-NEXT: ;;#ASMSTART
1141 ; GFX900-NEXT: ; def s4
1142 ; GFX900-NEXT: ;;#ASMEND
1143 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1144 ; GFX900-NEXT: ;;#ASMSTART
1145 ; GFX900-NEXT: ; use s8
1146 ; GFX900-NEXT: ;;#ASMEND
1147 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1149 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__3_0:
1151 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152 ; GFX90A-NEXT: ;;#ASMSTART
1153 ; GFX90A-NEXT: ; def s5
1154 ; GFX90A-NEXT: ;;#ASMEND
1155 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
1156 ; GFX90A-NEXT: ;;#ASMSTART
1157 ; GFX90A-NEXT: ; def s4
1158 ; GFX90A-NEXT: ;;#ASMEND
1159 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1160 ; GFX90A-NEXT: ;;#ASMSTART
1161 ; GFX90A-NEXT: ; use s8
1162 ; GFX90A-NEXT: ;;#ASMEND
1163 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1165 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_0:
1167 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168 ; GFX940-NEXT: ;;#ASMSTART
1169 ; GFX940-NEXT: ; def s1
1170 ; GFX940-NEXT: ;;#ASMEND
1171 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
1172 ; GFX940-NEXT: ;;#ASMSTART
1173 ; GFX940-NEXT: ; def s0
1174 ; GFX940-NEXT: ;;#ASMEND
1175 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
1176 ; GFX940-NEXT: ;;#ASMSTART
1177 ; GFX940-NEXT: ; use s8
1178 ; GFX940-NEXT: ;;#ASMEND
1179 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1180 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1181 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1182 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0>
1183 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1187 define void @s_shuffle_v2i16_v2i16__3_1() {
1188 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__3_1:
1190 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1191 ; GFX900-NEXT: ;;#ASMSTART
1192 ; GFX900-NEXT: ; def s4
1193 ; GFX900-NEXT: ;;#ASMEND
1194 ; GFX900-NEXT: ;;#ASMSTART
1195 ; GFX900-NEXT: ; def s5
1196 ; GFX900-NEXT: ;;#ASMEND
1197 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4
1198 ; GFX900-NEXT: ;;#ASMSTART
1199 ; GFX900-NEXT: ; use s8
1200 ; GFX900-NEXT: ;;#ASMEND
1201 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1203 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__3_1:
1205 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206 ; GFX90A-NEXT: ;;#ASMSTART
1207 ; GFX90A-NEXT: ; def s4
1208 ; GFX90A-NEXT: ;;#ASMEND
1209 ; GFX90A-NEXT: ;;#ASMSTART
1210 ; GFX90A-NEXT: ; def s5
1211 ; GFX90A-NEXT: ;;#ASMEND
1212 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4
1213 ; GFX90A-NEXT: ;;#ASMSTART
1214 ; GFX90A-NEXT: ; use s8
1215 ; GFX90A-NEXT: ;;#ASMEND
1216 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1218 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_1:
1220 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1221 ; GFX940-NEXT: ;;#ASMSTART
1222 ; GFX940-NEXT: ; def s0
1223 ; GFX940-NEXT: ;;#ASMEND
1224 ; GFX940-NEXT: ;;#ASMSTART
1225 ; GFX940-NEXT: ; def s1
1226 ; GFX940-NEXT: ;;#ASMEND
1227 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0
1228 ; GFX940-NEXT: ;;#ASMSTART
1229 ; GFX940-NEXT: ; use s8
1230 ; GFX940-NEXT: ;;#ASMEND
1231 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1232 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1233 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1234 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1>
1235 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1239 define void @s_shuffle_v2i16_v2i16__3_2() {
1240 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__3_2:
1242 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1243 ; GFX900-NEXT: ;;#ASMSTART
1244 ; GFX900-NEXT: ; def s4
1245 ; GFX900-NEXT: ;;#ASMEND
1246 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
1247 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1248 ; GFX900-NEXT: ;;#ASMSTART
1249 ; GFX900-NEXT: ; use s8
1250 ; GFX900-NEXT: ;;#ASMEND
1251 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1253 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__3_2:
1255 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1256 ; GFX90A-NEXT: ;;#ASMSTART
1257 ; GFX90A-NEXT: ; def s4
1258 ; GFX90A-NEXT: ;;#ASMEND
1259 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
1260 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1261 ; GFX90A-NEXT: ;;#ASMSTART
1262 ; GFX90A-NEXT: ; use s8
1263 ; GFX90A-NEXT: ;;#ASMEND
1264 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1266 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_2:
1268 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1269 ; GFX940-NEXT: ;;#ASMSTART
1270 ; GFX940-NEXT: ; def s0
1271 ; GFX940-NEXT: ;;#ASMEND
1272 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
1273 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
1274 ; GFX940-NEXT: ;;#ASMSTART
1275 ; GFX940-NEXT: ; use s8
1276 ; GFX940-NEXT: ;;#ASMEND
1277 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1278 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1279 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1280 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2>
1281 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1285 define void @s_shuffle_v2i16_v2i16__3_3() {
1286 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__3_3:
1288 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1289 ; GFX900-NEXT: ;;#ASMSTART
1290 ; GFX900-NEXT: ; def s4
1291 ; GFX900-NEXT: ;;#ASMEND
1292 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4
1293 ; GFX900-NEXT: ;;#ASMSTART
1294 ; GFX900-NEXT: ; use s8
1295 ; GFX900-NEXT: ;;#ASMEND
1296 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1298 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__3_3:
1300 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301 ; GFX90A-NEXT: ;;#ASMSTART
1302 ; GFX90A-NEXT: ; def s4
1303 ; GFX90A-NEXT: ;;#ASMEND
1304 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4
1305 ; GFX90A-NEXT: ;;#ASMSTART
1306 ; GFX90A-NEXT: ; use s8
1307 ; GFX90A-NEXT: ;;#ASMEND
1308 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1310 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_3:
1312 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; GFX940-NEXT: ;;#ASMSTART
1314 ; GFX940-NEXT: ; def s0
1315 ; GFX940-NEXT: ;;#ASMEND
1316 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0
1317 ; GFX940-NEXT: ;;#ASMSTART
1318 ; GFX940-NEXT: ; use s8
1319 ; GFX940-NEXT: ;;#ASMEND
1320 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1321 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1322 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1323 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3>
1324 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1328 define void @s_shuffle_v2i16_v2i16__u_0() {
1329 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__u_0:
1331 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332 ; GFX900-NEXT: ;;#ASMSTART
1333 ; GFX900-NEXT: ; def s4
1334 ; GFX900-NEXT: ;;#ASMEND
1335 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
1336 ; GFX900-NEXT: ;;#ASMSTART
1337 ; GFX900-NEXT: ; use s8
1338 ; GFX900-NEXT: ;;#ASMEND
1339 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1341 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__u_0:
1343 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1344 ; GFX90A-NEXT: ;;#ASMSTART
1345 ; GFX90A-NEXT: ; def s4
1346 ; GFX90A-NEXT: ;;#ASMEND
1347 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
1348 ; GFX90A-NEXT: ;;#ASMSTART
1349 ; GFX90A-NEXT: ; use s8
1350 ; GFX90A-NEXT: ;;#ASMEND
1351 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__u_0:
1355 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX940-NEXT: ;;#ASMSTART
1357 ; GFX940-NEXT: ; def s0
1358 ; GFX940-NEXT: ;;#ASMEND
1359 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
1360 ; GFX940-NEXT: ;;#ASMSTART
1361 ; GFX940-NEXT: ; use s8
1362 ; GFX940-NEXT: ;;#ASMEND
1363 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1364 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1365 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 poison, i32 0>
1366 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1370 define void @s_shuffle_v2i16_v2i16__0_0() {
1371 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__0_0:
1373 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1374 ; GFX900-NEXT: ;;#ASMSTART
1375 ; GFX900-NEXT: ; def s4
1376 ; GFX900-NEXT: ;;#ASMEND
1377 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1378 ; GFX900-NEXT: ;;#ASMSTART
1379 ; GFX900-NEXT: ; use s8
1380 ; GFX900-NEXT: ;;#ASMEND
1381 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1383 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__0_0:
1385 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386 ; GFX90A-NEXT: ;;#ASMSTART
1387 ; GFX90A-NEXT: ; def s4
1388 ; GFX90A-NEXT: ;;#ASMEND
1389 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
1390 ; GFX90A-NEXT: ;;#ASMSTART
1391 ; GFX90A-NEXT: ; use s8
1392 ; GFX90A-NEXT: ;;#ASMEND
1393 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1395 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_0:
1397 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX940-NEXT: ;;#ASMSTART
1399 ; GFX940-NEXT: ; def s0
1400 ; GFX940-NEXT: ;;#ASMEND
1401 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
1402 ; GFX940-NEXT: ;;#ASMSTART
1403 ; GFX940-NEXT: ; use s8
1404 ; GFX940-NEXT: ;;#ASMEND
1405 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1406 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1407 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> zeroinitializer
1408 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1412 define void @s_shuffle_v2i16_v2i16__1_0() {
1413 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__1_0:
1415 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1416 ; GFX900-NEXT: ;;#ASMSTART
1417 ; GFX900-NEXT: ; def s4
1418 ; GFX900-NEXT: ;;#ASMEND
1419 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
1420 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1421 ; GFX900-NEXT: ;;#ASMSTART
1422 ; GFX900-NEXT: ; use s8
1423 ; GFX900-NEXT: ;;#ASMEND
1424 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1426 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__1_0:
1428 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429 ; GFX90A-NEXT: ;;#ASMSTART
1430 ; GFX90A-NEXT: ; def s4
1431 ; GFX90A-NEXT: ;;#ASMEND
1432 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
1433 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
1434 ; GFX90A-NEXT: ;;#ASMSTART
1435 ; GFX90A-NEXT: ; use s8
1436 ; GFX90A-NEXT: ;;#ASMEND
1437 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1439 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_0:
1441 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1442 ; GFX940-NEXT: ;;#ASMSTART
1443 ; GFX940-NEXT: ; def s0
1444 ; GFX940-NEXT: ;;#ASMEND
1445 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
1446 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
1447 ; GFX940-NEXT: ;;#ASMSTART
1448 ; GFX940-NEXT: ; use s8
1449 ; GFX940-NEXT: ;;#ASMEND
1450 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1451 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1452 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
1453 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1457 define void @s_shuffle_v2i16_v2i16__2_0() {
1458 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__2_0:
1460 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1461 ; GFX900-NEXT: ;;#ASMSTART
1462 ; GFX900-NEXT: ; def s4
1463 ; GFX900-NEXT: ;;#ASMEND
1464 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
1465 ; GFX900-NEXT: ;;#ASMSTART
1466 ; GFX900-NEXT: ; use s8
1467 ; GFX900-NEXT: ;;#ASMEND
1468 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1470 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__2_0:
1472 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473 ; GFX90A-NEXT: ;;#ASMSTART
1474 ; GFX90A-NEXT: ; def s4
1475 ; GFX90A-NEXT: ;;#ASMEND
1476 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
1477 ; GFX90A-NEXT: ;;#ASMSTART
1478 ; GFX90A-NEXT: ; use s8
1479 ; GFX90A-NEXT: ;;#ASMEND
1480 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1482 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__2_0:
1484 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1485 ; GFX940-NEXT: ;;#ASMSTART
1486 ; GFX940-NEXT: ; def s0
1487 ; GFX940-NEXT: ;;#ASMEND
1488 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
1489 ; GFX940-NEXT: ;;#ASMSTART
1490 ; GFX940-NEXT: ; use s8
1491 ; GFX940-NEXT: ;;#ASMEND
1492 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1493 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1494 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 0>
1495 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1499 define void @s_shuffle_v2i16_v2i16__u_1() {
1500 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__u_1:
1502 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503 ; GFX900-NEXT: ;;#ASMSTART
1504 ; GFX900-NEXT: ; def s8
1505 ; GFX900-NEXT: ;;#ASMEND
1506 ; GFX900-NEXT: ;;#ASMSTART
1507 ; GFX900-NEXT: ; use s8
1508 ; GFX900-NEXT: ;;#ASMEND
1509 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1511 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__u_1:
1513 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1514 ; GFX90A-NEXT: ;;#ASMSTART
1515 ; GFX90A-NEXT: ; def s8
1516 ; GFX90A-NEXT: ;;#ASMEND
1517 ; GFX90A-NEXT: ;;#ASMSTART
1518 ; GFX90A-NEXT: ; use s8
1519 ; GFX90A-NEXT: ;;#ASMEND
1520 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1522 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__u_1:
1524 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525 ; GFX940-NEXT: ;;#ASMSTART
1526 ; GFX940-NEXT: ; def s8
1527 ; GFX940-NEXT: ;;#ASMEND
1528 ; GFX940-NEXT: s_nop 0
1529 ; GFX940-NEXT: ;;#ASMSTART
1530 ; GFX940-NEXT: ; use s8
1531 ; GFX940-NEXT: ;;#ASMEND
1532 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1533 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1534 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 poison, i32 1>
1535 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1539 define void @s_shuffle_v2i16_v2i16__0_1() {
1540 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__0_1:
1542 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1543 ; GFX900-NEXT: ;;#ASMSTART
1544 ; GFX900-NEXT: ; def s8
1545 ; GFX900-NEXT: ;;#ASMEND
1546 ; GFX900-NEXT: ;;#ASMSTART
1547 ; GFX900-NEXT: ; use s8
1548 ; GFX900-NEXT: ;;#ASMEND
1549 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1551 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__0_1:
1553 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554 ; GFX90A-NEXT: ;;#ASMSTART
1555 ; GFX90A-NEXT: ; def s8
1556 ; GFX90A-NEXT: ;;#ASMEND
1557 ; GFX90A-NEXT: ;;#ASMSTART
1558 ; GFX90A-NEXT: ; use s8
1559 ; GFX90A-NEXT: ;;#ASMEND
1560 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1562 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_1:
1564 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1565 ; GFX940-NEXT: ;;#ASMSTART
1566 ; GFX940-NEXT: ; def s8
1567 ; GFX940-NEXT: ;;#ASMEND
1568 ; GFX940-NEXT: s_nop 0
1569 ; GFX940-NEXT: ;;#ASMSTART
1570 ; GFX940-NEXT: ; use s8
1571 ; GFX940-NEXT: ;;#ASMEND
1572 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1573 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1574 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 0, i32 1>
1575 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1579 define void @s_shuffle_v2i16_v2i16__1_1() {
1580 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__1_1:
1582 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583 ; GFX900-NEXT: ;;#ASMSTART
1584 ; GFX900-NEXT: ; def s4
1585 ; GFX900-NEXT: ;;#ASMEND
1586 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4
1587 ; GFX900-NEXT: ;;#ASMSTART
1588 ; GFX900-NEXT: ; use s8
1589 ; GFX900-NEXT: ;;#ASMEND
1590 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1592 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__1_1:
1594 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1595 ; GFX90A-NEXT: ;;#ASMSTART
1596 ; GFX90A-NEXT: ; def s4
1597 ; GFX90A-NEXT: ;;#ASMEND
1598 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4
1599 ; GFX90A-NEXT: ;;#ASMSTART
1600 ; GFX90A-NEXT: ; use s8
1601 ; GFX90A-NEXT: ;;#ASMEND
1602 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1604 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_1:
1606 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1607 ; GFX940-NEXT: ;;#ASMSTART
1608 ; GFX940-NEXT: ; def s0
1609 ; GFX940-NEXT: ;;#ASMEND
1610 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0
1611 ; GFX940-NEXT: ;;#ASMSTART
1612 ; GFX940-NEXT: ; use s8
1613 ; GFX940-NEXT: ;;#ASMEND
1614 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1615 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1616 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
1617 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1621 define void @s_shuffle_v2i16_v2i16__2_1() {
1622 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__2_1:
1624 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625 ; GFX900-NEXT: ;;#ASMSTART
1626 ; GFX900-NEXT: ; def s8
1627 ; GFX900-NEXT: ;;#ASMEND
1628 ; GFX900-NEXT: ;;#ASMSTART
1629 ; GFX900-NEXT: ; use s8
1630 ; GFX900-NEXT: ;;#ASMEND
1631 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1633 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__2_1:
1635 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1636 ; GFX90A-NEXT: ;;#ASMSTART
1637 ; GFX90A-NEXT: ; def s8
1638 ; GFX90A-NEXT: ;;#ASMEND
1639 ; GFX90A-NEXT: ;;#ASMSTART
1640 ; GFX90A-NEXT: ; use s8
1641 ; GFX90A-NEXT: ;;#ASMEND
1642 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1644 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__2_1:
1646 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1647 ; GFX940-NEXT: ;;#ASMSTART
1648 ; GFX940-NEXT: ; def s8
1649 ; GFX940-NEXT: ;;#ASMEND
1650 ; GFX940-NEXT: s_nop 0
1651 ; GFX940-NEXT: ;;#ASMSTART
1652 ; GFX940-NEXT: ; use s8
1653 ; GFX940-NEXT: ;;#ASMEND
1654 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1655 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1656 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 1>
1657 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1661 define void @s_shuffle_v2i16_v2i16__u_2() {
1662 ; GFX9-LABEL: s_shuffle_v2i16_v2i16__u_2:
1664 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1665 ; GFX9-NEXT: ;;#ASMSTART
1666 ; GFX9-NEXT: ; use s8
1667 ; GFX9-NEXT: ;;#ASMEND
1668 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1669 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1670 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 poison, i32 2>
1671 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1675 define void @s_shuffle_v2i16_v2i16__0_2() {
1676 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__0_2:
1678 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679 ; GFX900-NEXT: ;;#ASMSTART
1680 ; GFX900-NEXT: ; def s8
1681 ; GFX900-NEXT: ;;#ASMEND
1682 ; GFX900-NEXT: ;;#ASMSTART
1683 ; GFX900-NEXT: ; use s8
1684 ; GFX900-NEXT: ;;#ASMEND
1685 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1687 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__0_2:
1689 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690 ; GFX90A-NEXT: ;;#ASMSTART
1691 ; GFX90A-NEXT: ; def s8
1692 ; GFX90A-NEXT: ;;#ASMEND
1693 ; GFX90A-NEXT: ;;#ASMSTART
1694 ; GFX90A-NEXT: ; use s8
1695 ; GFX90A-NEXT: ;;#ASMEND
1696 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1698 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_2:
1700 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1701 ; GFX940-NEXT: ;;#ASMSTART
1702 ; GFX940-NEXT: ; def s8
1703 ; GFX940-NEXT: ;;#ASMEND
1704 ; GFX940-NEXT: s_nop 0
1705 ; GFX940-NEXT: ;;#ASMSTART
1706 ; GFX940-NEXT: ; use s8
1707 ; GFX940-NEXT: ;;#ASMEND
1708 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1709 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1710 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 0, i32 2>
1711 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1715 define void @s_shuffle_v2i16_v2i16__1_2() {
1716 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__1_2:
1718 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1719 ; GFX900-NEXT: ;;#ASMSTART
1720 ; GFX900-NEXT: ; def s4
1721 ; GFX900-NEXT: ;;#ASMEND
1722 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
1723 ; GFX900-NEXT: ;;#ASMSTART
1724 ; GFX900-NEXT: ; use s8
1725 ; GFX900-NEXT: ;;#ASMEND
1726 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1728 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__1_2:
1730 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1731 ; GFX90A-NEXT: ;;#ASMSTART
1732 ; GFX90A-NEXT: ; def s4
1733 ; GFX90A-NEXT: ;;#ASMEND
1734 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
1735 ; GFX90A-NEXT: ;;#ASMSTART
1736 ; GFX90A-NEXT: ; use s8
1737 ; GFX90A-NEXT: ;;#ASMEND
1738 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1740 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_2:
1742 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1743 ; GFX940-NEXT: ;;#ASMSTART
1744 ; GFX940-NEXT: ; def s0
1745 ; GFX940-NEXT: ;;#ASMEND
1746 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
1747 ; GFX940-NEXT: ;;#ASMSTART
1748 ; GFX940-NEXT: ; use s8
1749 ; GFX940-NEXT: ;;#ASMEND
1750 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1751 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1752 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 1, i32 2>
1753 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1757 define void @s_shuffle_v2i16_v2i16__2_2() {
1758 ; GFX9-LABEL: s_shuffle_v2i16_v2i16__2_2:
1760 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1761 ; GFX9-NEXT: ;;#ASMSTART
1762 ; GFX9-NEXT: ; use s8
1763 ; GFX9-NEXT: ;;#ASMEND
1764 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1765 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1766 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> <i32 2, i32 2>
1767 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1771 define void @s_shuffle_v2i16_v2i16__u_3() {
1772 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__u_3:
1774 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1775 ; GFX900-NEXT: ;;#ASMSTART
1776 ; GFX900-NEXT: ; def s8
1777 ; GFX900-NEXT: ;;#ASMEND
1778 ; GFX900-NEXT: ;;#ASMSTART
1779 ; GFX900-NEXT: ; use s8
1780 ; GFX900-NEXT: ;;#ASMEND
1781 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1783 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__u_3:
1785 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786 ; GFX90A-NEXT: ;;#ASMSTART
1787 ; GFX90A-NEXT: ; def s8
1788 ; GFX90A-NEXT: ;;#ASMEND
1789 ; GFX90A-NEXT: ;;#ASMSTART
1790 ; GFX90A-NEXT: ; use s8
1791 ; GFX90A-NEXT: ;;#ASMEND
1792 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1794 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__u_3:
1796 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1797 ; GFX940-NEXT: ;;#ASMSTART
1798 ; GFX940-NEXT: ; def s8
1799 ; GFX940-NEXT: ;;#ASMEND
1800 ; GFX940-NEXT: s_nop 0
1801 ; GFX940-NEXT: ;;#ASMSTART
1802 ; GFX940-NEXT: ; use s8
1803 ; GFX940-NEXT: ;;#ASMEND
1804 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1805 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1806 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1807 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 poison, i32 3>
1808 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1812 define void @s_shuffle_v2i16_v2i16__0_3() {
1813 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__0_3:
1815 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1816 ; GFX900-NEXT: ;;#ASMSTART
1817 ; GFX900-NEXT: ; def s4
1818 ; GFX900-NEXT: ;;#ASMEND
1819 ; GFX900-NEXT: ;;#ASMSTART
1820 ; GFX900-NEXT: ; def s5
1821 ; GFX900-NEXT: ;;#ASMEND
1822 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5
1823 ; GFX900-NEXT: ;;#ASMSTART
1824 ; GFX900-NEXT: ; use s8
1825 ; GFX900-NEXT: ;;#ASMEND
1826 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1828 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__0_3:
1830 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1831 ; GFX90A-NEXT: ;;#ASMSTART
1832 ; GFX90A-NEXT: ; def s4
1833 ; GFX90A-NEXT: ;;#ASMEND
1834 ; GFX90A-NEXT: ;;#ASMSTART
1835 ; GFX90A-NEXT: ; def s5
1836 ; GFX90A-NEXT: ;;#ASMEND
1837 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5
1838 ; GFX90A-NEXT: ;;#ASMSTART
1839 ; GFX90A-NEXT: ; use s8
1840 ; GFX90A-NEXT: ;;#ASMEND
1841 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1843 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_3:
1845 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1846 ; GFX940-NEXT: ;;#ASMSTART
1847 ; GFX940-NEXT: ; def s0
1848 ; GFX940-NEXT: ;;#ASMEND
1849 ; GFX940-NEXT: ;;#ASMSTART
1850 ; GFX940-NEXT: ; def s1
1851 ; GFX940-NEXT: ;;#ASMEND
1852 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1
1853 ; GFX940-NEXT: ;;#ASMSTART
1854 ; GFX940-NEXT: ; use s8
1855 ; GFX940-NEXT: ;;#ASMEND
1856 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1857 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1858 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1859 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3>
1860 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1864 define void @s_shuffle_v2i16_v2i16__1_3() {
1865 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__1_3:
1867 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1868 ; GFX900-NEXT: ;;#ASMSTART
1869 ; GFX900-NEXT: ; def s4
1870 ; GFX900-NEXT: ;;#ASMEND
1871 ; GFX900-NEXT: ;;#ASMSTART
1872 ; GFX900-NEXT: ; def s5
1873 ; GFX900-NEXT: ;;#ASMEND
1874 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5
1875 ; GFX900-NEXT: ;;#ASMSTART
1876 ; GFX900-NEXT: ; use s8
1877 ; GFX900-NEXT: ;;#ASMEND
1878 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1880 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__1_3:
1882 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883 ; GFX90A-NEXT: ;;#ASMSTART
1884 ; GFX90A-NEXT: ; def s4
1885 ; GFX90A-NEXT: ;;#ASMEND
1886 ; GFX90A-NEXT: ;;#ASMSTART
1887 ; GFX90A-NEXT: ; def s5
1888 ; GFX90A-NEXT: ;;#ASMEND
1889 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5
1890 ; GFX90A-NEXT: ;;#ASMSTART
1891 ; GFX90A-NEXT: ; use s8
1892 ; GFX90A-NEXT: ;;#ASMEND
1893 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1895 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_3:
1897 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1898 ; GFX940-NEXT: ;;#ASMSTART
1899 ; GFX940-NEXT: ; def s0
1900 ; GFX940-NEXT: ;;#ASMEND
1901 ; GFX940-NEXT: ;;#ASMSTART
1902 ; GFX940-NEXT: ; def s1
1903 ; GFX940-NEXT: ;;#ASMEND
1904 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1
1905 ; GFX940-NEXT: ;;#ASMSTART
1906 ; GFX940-NEXT: ; use s8
1907 ; GFX940-NEXT: ;;#ASMEND
1908 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1909 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1910 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1911 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3>
1912 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1916 define void @s_shuffle_v2i16_v2i16__2_3() {
1917 ; GFX900-LABEL: s_shuffle_v2i16_v2i16__2_3:
1919 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1920 ; GFX900-NEXT: ;;#ASMSTART
1921 ; GFX900-NEXT: ; def s8
1922 ; GFX900-NEXT: ;;#ASMEND
1923 ; GFX900-NEXT: ;;#ASMSTART
1924 ; GFX900-NEXT: ; use s8
1925 ; GFX900-NEXT: ;;#ASMEND
1926 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1928 ; GFX90A-LABEL: s_shuffle_v2i16_v2i16__2_3:
1930 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931 ; GFX90A-NEXT: ;;#ASMSTART
1932 ; GFX90A-NEXT: ; def s8
1933 ; GFX90A-NEXT: ;;#ASMEND
1934 ; GFX90A-NEXT: ;;#ASMSTART
1935 ; GFX90A-NEXT: ; use s8
1936 ; GFX90A-NEXT: ;;#ASMEND
1937 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1939 ; GFX940-LABEL: s_shuffle_v2i16_v2i16__2_3:
1941 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942 ; GFX940-NEXT: ;;#ASMSTART
1943 ; GFX940-NEXT: ; def s8
1944 ; GFX940-NEXT: ;;#ASMEND
1945 ; GFX940-NEXT: s_nop 0
1946 ; GFX940-NEXT: ;;#ASMSTART
1947 ; GFX940-NEXT: ; use s8
1948 ; GFX940-NEXT: ;;#ASMEND
1949 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1950 %vec0 = call <2 x i16> asm "; def $0", "=s"()
1951 %vec1 = call <2 x i16> asm "; def $0", "=s"()
1952 %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 3>
1953 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
1956 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1957 ; GFX90APLUS: {{.*}}