1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
7 define void @v_shuffle_v2i16_v4i16__u_u(ptr addrspace(1) inreg %ptr) {
8 ; GFX9-LABEL: v_shuffle_v2i16_v4i16__u_u:
10 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %vec0 = call <4 x i16> asm "; def $0", "=v"()
13 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> poison
14 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
18 define void @v_shuffle_v2i16_v4i16__0_u(ptr addrspace(1) inreg %ptr) {
19 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_u:
21 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
23 ; GFX900-NEXT: ;;#ASMSTART
24 ; GFX900-NEXT: ; def v[0:1]
25 ; GFX900-NEXT: ;;#ASMEND
26 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
27 ; GFX900-NEXT: s_waitcnt vmcnt(0)
28 ; GFX900-NEXT: s_setpc_b64 s[30:31]
30 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_u:
32 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
34 ; GFX90A-NEXT: ;;#ASMSTART
35 ; GFX90A-NEXT: ; def v[0:1]
36 ; GFX90A-NEXT: ;;#ASMEND
37 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
38 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
39 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
41 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_u:
43 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
45 ; GFX940-NEXT: ;;#ASMSTART
46 ; GFX940-NEXT: ; def v[0:1]
47 ; GFX940-NEXT: ;;#ASMEND
48 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
49 ; GFX940-NEXT: s_waitcnt vmcnt(0)
50 ; GFX940-NEXT: s_setpc_b64 s[30:31]
51 %vec0 = call <4 x i16> asm "; def $0", "=v"()
52 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
53 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
57 define void @v_shuffle_v2i16_v4i16__1_u(ptr addrspace(1) inreg %ptr) {
58 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_u:
60 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX900-NEXT: ;;#ASMSTART
62 ; GFX900-NEXT: ; def v[0:1]
63 ; GFX900-NEXT: ;;#ASMEND
64 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
65 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
66 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
67 ; GFX900-NEXT: s_waitcnt vmcnt(0)
68 ; GFX900-NEXT: s_setpc_b64 s[30:31]
70 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_u:
72 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; GFX90A-NEXT: ;;#ASMSTART
74 ; GFX90A-NEXT: ; def v[0:1]
75 ; GFX90A-NEXT: ;;#ASMEND
76 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
77 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
78 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
79 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
80 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
82 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_u:
84 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX940-NEXT: ;;#ASMSTART
86 ; GFX940-NEXT: ; def v[0:1]
87 ; GFX940-NEXT: ;;#ASMEND
88 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
89 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
90 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
91 ; GFX940-NEXT: s_waitcnt vmcnt(0)
92 ; GFX940-NEXT: s_setpc_b64 s[30:31]
93 %vec0 = call <4 x i16> asm "; def $0", "=v"()
94 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 poison>
95 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
99 define void @v_shuffle_v2i16_v4i16__2_u(ptr addrspace(1) inreg %ptr) {
100 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_u:
102 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
104 ; GFX900-NEXT: ;;#ASMSTART
105 ; GFX900-NEXT: ; def v[0:1]
106 ; GFX900-NEXT: ;;#ASMEND
107 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
108 ; GFX900-NEXT: s_waitcnt vmcnt(0)
109 ; GFX900-NEXT: s_setpc_b64 s[30:31]
111 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_u:
113 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
115 ; GFX90A-NEXT: ;;#ASMSTART
116 ; GFX90A-NEXT: ; def v[0:1]
117 ; GFX90A-NEXT: ;;#ASMEND
118 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
119 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
120 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
122 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_u:
124 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
126 ; GFX940-NEXT: ;;#ASMSTART
127 ; GFX940-NEXT: ; def v[0:1]
128 ; GFX940-NEXT: ;;#ASMEND
129 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
130 ; GFX940-NEXT: s_waitcnt vmcnt(0)
131 ; GFX940-NEXT: s_setpc_b64 s[30:31]
132 %vec0 = call <4 x i16> asm "; def $0", "=v"()
133 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 poison>
134 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
138 define void @v_shuffle_v2i16_v4i16__3_u(ptr addrspace(1) inreg %ptr) {
139 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_u:
141 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX900-NEXT: ;;#ASMSTART
143 ; GFX900-NEXT: ; def v[0:1]
144 ; GFX900-NEXT: ;;#ASMEND
145 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
146 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
147 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
148 ; GFX900-NEXT: s_waitcnt vmcnt(0)
149 ; GFX900-NEXT: s_setpc_b64 s[30:31]
151 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_u:
153 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX90A-NEXT: ;;#ASMSTART
155 ; GFX90A-NEXT: ; def v[0:1]
156 ; GFX90A-NEXT: ;;#ASMEND
157 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
158 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
159 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
160 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
161 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
163 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_u:
165 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166 ; GFX940-NEXT: ;;#ASMSTART
167 ; GFX940-NEXT: ; def v[0:1]
168 ; GFX940-NEXT: ;;#ASMEND
169 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
170 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
171 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
172 ; GFX940-NEXT: s_waitcnt vmcnt(0)
173 ; GFX940-NEXT: s_setpc_b64 s[30:31]
174 %vec0 = call <4 x i16> asm "; def $0", "=v"()
175 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 poison>
176 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
180 define void @v_shuffle_v2i16_v4i16__4_u(ptr addrspace(1) inreg %ptr) {
181 ; GFX9-LABEL: v_shuffle_v2i16_v4i16__4_u:
183 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GFX9-NEXT: s_setpc_b64 s[30:31]
185 %vec0 = call <4 x i16> asm "; def $0", "=v"()
186 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 poison>
187 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
191 define void @v_shuffle_v2i16_v4i16__5_u(ptr addrspace(1) inreg %ptr) {
192 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_u:
194 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195 ; GFX900-NEXT: ;;#ASMSTART
196 ; GFX900-NEXT: ; def v[0:1]
197 ; GFX900-NEXT: ;;#ASMEND
198 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
199 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
200 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
201 ; GFX900-NEXT: s_waitcnt vmcnt(0)
202 ; GFX900-NEXT: s_setpc_b64 s[30:31]
204 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_u:
206 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207 ; GFX90A-NEXT: ;;#ASMSTART
208 ; GFX90A-NEXT: ; def v[0:1]
209 ; GFX90A-NEXT: ;;#ASMEND
210 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
211 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
212 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
213 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
214 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
216 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_u:
218 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219 ; GFX940-NEXT: ;;#ASMSTART
220 ; GFX940-NEXT: ; def v[0:1]
221 ; GFX940-NEXT: ;;#ASMEND
222 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
223 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
224 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
225 ; GFX940-NEXT: s_waitcnt vmcnt(0)
226 ; GFX940-NEXT: s_setpc_b64 s[30:31]
227 %vec0 = call <4 x i16> asm "; def $0", "=v"()
228 %vec1 = call <4 x i16> asm "; def $0", "=v"()
229 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 poison>
230 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
234 define void @v_shuffle_v2i16_v4i16__6_u(ptr addrspace(1) inreg %ptr) {
235 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_u:
237 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
239 ; GFX900-NEXT: ;;#ASMSTART
240 ; GFX900-NEXT: ; def v[0:1]
241 ; GFX900-NEXT: ;;#ASMEND
242 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
243 ; GFX900-NEXT: s_waitcnt vmcnt(0)
244 ; GFX900-NEXT: s_setpc_b64 s[30:31]
246 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_u:
248 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
250 ; GFX90A-NEXT: ;;#ASMSTART
251 ; GFX90A-NEXT: ; def v[0:1]
252 ; GFX90A-NEXT: ;;#ASMEND
253 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
254 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
255 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
257 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_u:
259 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
261 ; GFX940-NEXT: ;;#ASMSTART
262 ; GFX940-NEXT: ; def v[0:1]
263 ; GFX940-NEXT: ;;#ASMEND
264 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
265 ; GFX940-NEXT: s_waitcnt vmcnt(0)
266 ; GFX940-NEXT: s_setpc_b64 s[30:31]
267 %vec0 = call <4 x i16> asm "; def $0", "=v"()
268 %vec1 = call <4 x i16> asm "; def $0", "=v"()
269 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 poison>
270 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
274 define void @v_shuffle_v2i16_v4i16__7_u(ptr addrspace(1) inreg %ptr) {
275 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_u:
277 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278 ; GFX900-NEXT: ;;#ASMSTART
279 ; GFX900-NEXT: ; def v[0:1]
280 ; GFX900-NEXT: ;;#ASMEND
281 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
282 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
283 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
284 ; GFX900-NEXT: s_waitcnt vmcnt(0)
285 ; GFX900-NEXT: s_setpc_b64 s[30:31]
287 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_u:
289 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX90A-NEXT: ;;#ASMSTART
291 ; GFX90A-NEXT: ; def v[0:1]
292 ; GFX90A-NEXT: ;;#ASMEND
293 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
294 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
295 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
296 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
297 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
299 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_u:
301 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302 ; GFX940-NEXT: ;;#ASMSTART
303 ; GFX940-NEXT: ; def v[0:1]
304 ; GFX940-NEXT: ;;#ASMEND
305 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
306 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
307 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
308 ; GFX940-NEXT: s_waitcnt vmcnt(0)
309 ; GFX940-NEXT: s_setpc_b64 s[30:31]
310 %vec0 = call <4 x i16> asm "; def $0", "=v"()
311 %vec1 = call <4 x i16> asm "; def $0", "=v"()
312 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 poison>
313 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
317 define void @v_shuffle_v2i16_v4i16__7_0(ptr addrspace(1) inreg %ptr) {
318 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_0:
320 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321 ; GFX900-NEXT: ;;#ASMSTART
322 ; GFX900-NEXT: ; def v[0:1]
323 ; GFX900-NEXT: ;;#ASMEND
324 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
325 ; GFX900-NEXT: ;;#ASMSTART
326 ; GFX900-NEXT: ; def v[1:2]
327 ; GFX900-NEXT: ;;#ASMEND
328 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16
329 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
330 ; GFX900-NEXT: s_waitcnt vmcnt(0)
331 ; GFX900-NEXT: s_setpc_b64 s[30:31]
333 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_0:
335 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GFX90A-NEXT: ;;#ASMSTART
337 ; GFX90A-NEXT: ; def v[0:1]
338 ; GFX90A-NEXT: ;;#ASMEND
339 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
340 ; GFX90A-NEXT: ;;#ASMSTART
341 ; GFX90A-NEXT: ; def v[2:3]
342 ; GFX90A-NEXT: ;;#ASMEND
343 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16
344 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
345 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
346 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
348 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_0:
350 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX940-NEXT: ;;#ASMSTART
352 ; GFX940-NEXT: ; def v[0:1]
353 ; GFX940-NEXT: ;;#ASMEND
354 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
355 ; GFX940-NEXT: ;;#ASMSTART
356 ; GFX940-NEXT: ; def v[2:3]
357 ; GFX940-NEXT: ;;#ASMEND
358 ; GFX940-NEXT: s_nop 0
359 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16
360 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
361 ; GFX940-NEXT: s_waitcnt vmcnt(0)
362 ; GFX940-NEXT: s_setpc_b64 s[30:31]
363 %vec0 = call <4 x i16> asm "; def $0", "=v"()
364 %vec1 = call <4 x i16> asm "; def $0", "=v"()
365 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 0>
366 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
370 define void @v_shuffle_v2i16_v4i16__7_1(ptr addrspace(1) inreg %ptr) {
371 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_1:
373 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GFX900-NEXT: ;;#ASMSTART
375 ; GFX900-NEXT: ; def v[0:1]
376 ; GFX900-NEXT: ;;#ASMEND
377 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
378 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
379 ; GFX900-NEXT: ;;#ASMSTART
380 ; GFX900-NEXT: ; def v[1:2]
381 ; GFX900-NEXT: ;;#ASMEND
382 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
383 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
384 ; GFX900-NEXT: s_waitcnt vmcnt(0)
385 ; GFX900-NEXT: s_setpc_b64 s[30:31]
387 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_1:
389 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390 ; GFX90A-NEXT: ;;#ASMSTART
391 ; GFX90A-NEXT: ; def v[0:1]
392 ; GFX90A-NEXT: ;;#ASMEND
393 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
394 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
395 ; GFX90A-NEXT: ;;#ASMSTART
396 ; GFX90A-NEXT: ; def v[2:3]
397 ; GFX90A-NEXT: ;;#ASMEND
398 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
399 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
400 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
401 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
403 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_1:
405 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GFX940-NEXT: ;;#ASMSTART
407 ; GFX940-NEXT: ; def v[0:1]
408 ; GFX940-NEXT: ;;#ASMEND
409 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
410 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
411 ; GFX940-NEXT: ;;#ASMSTART
412 ; GFX940-NEXT: ; def v[2:3]
413 ; GFX940-NEXT: ;;#ASMEND
414 ; GFX940-NEXT: s_nop 0
415 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
416 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
417 ; GFX940-NEXT: s_waitcnt vmcnt(0)
418 ; GFX940-NEXT: s_setpc_b64 s[30:31]
419 %vec0 = call <4 x i16> asm "; def $0", "=v"()
420 %vec1 = call <4 x i16> asm "; def $0", "=v"()
421 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 1>
422 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
426 define void @v_shuffle_v2i16_v4i16__7_2(ptr addrspace(1) inreg %ptr) {
427 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_2:
429 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX900-NEXT: ;;#ASMSTART
431 ; GFX900-NEXT: ; def v[0:1]
432 ; GFX900-NEXT: ;;#ASMEND
433 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
434 ; GFX900-NEXT: ;;#ASMSTART
435 ; GFX900-NEXT: ; def v[2:3]
436 ; GFX900-NEXT: ;;#ASMEND
437 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16
438 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
439 ; GFX900-NEXT: s_waitcnt vmcnt(0)
440 ; GFX900-NEXT: s_setpc_b64 s[30:31]
442 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_2:
444 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445 ; GFX90A-NEXT: ;;#ASMSTART
446 ; GFX90A-NEXT: ; def v[0:1]
447 ; GFX90A-NEXT: ;;#ASMEND
448 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
449 ; GFX90A-NEXT: ;;#ASMSTART
450 ; GFX90A-NEXT: ; def v[2:3]
451 ; GFX90A-NEXT: ;;#ASMEND
452 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16
453 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
454 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
455 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
457 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_2:
459 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
460 ; GFX940-NEXT: ;;#ASMSTART
461 ; GFX940-NEXT: ; def v[0:1]
462 ; GFX940-NEXT: ;;#ASMEND
463 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
464 ; GFX940-NEXT: ;;#ASMSTART
465 ; GFX940-NEXT: ; def v[2:3]
466 ; GFX940-NEXT: ;;#ASMEND
467 ; GFX940-NEXT: s_nop 0
468 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16
469 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
470 ; GFX940-NEXT: s_waitcnt vmcnt(0)
471 ; GFX940-NEXT: s_setpc_b64 s[30:31]
472 %vec0 = call <4 x i16> asm "; def $0", "=v"()
473 %vec1 = call <4 x i16> asm "; def $0", "=v"()
474 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 2>
475 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
479 define void @v_shuffle_v2i16_v4i16__7_3(ptr addrspace(1) inreg %ptr) {
480 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_3:
482 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483 ; GFX900-NEXT: ;;#ASMSTART
484 ; GFX900-NEXT: ; def v[0:1]
485 ; GFX900-NEXT: ;;#ASMEND
486 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
487 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
488 ; GFX900-NEXT: ;;#ASMSTART
489 ; GFX900-NEXT: ; def v[2:3]
490 ; GFX900-NEXT: ;;#ASMEND
491 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
492 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
493 ; GFX900-NEXT: s_waitcnt vmcnt(0)
494 ; GFX900-NEXT: s_setpc_b64 s[30:31]
496 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_3:
498 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX90A-NEXT: ;;#ASMSTART
500 ; GFX90A-NEXT: ; def v[0:1]
501 ; GFX90A-NEXT: ;;#ASMEND
502 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
503 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
504 ; GFX90A-NEXT: ;;#ASMSTART
505 ; GFX90A-NEXT: ; def v[2:3]
506 ; GFX90A-NEXT: ;;#ASMEND
507 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
508 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
509 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
510 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
512 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_3:
514 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515 ; GFX940-NEXT: ;;#ASMSTART
516 ; GFX940-NEXT: ; def v[0:1]
517 ; GFX940-NEXT: ;;#ASMEND
518 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
519 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
520 ; GFX940-NEXT: ;;#ASMSTART
521 ; GFX940-NEXT: ; def v[2:3]
522 ; GFX940-NEXT: ;;#ASMEND
523 ; GFX940-NEXT: s_nop 0
524 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
525 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
526 ; GFX940-NEXT: s_waitcnt vmcnt(0)
527 ; GFX940-NEXT: s_setpc_b64 s[30:31]
528 %vec0 = call <4 x i16> asm "; def $0", "=v"()
529 %vec1 = call <4 x i16> asm "; def $0", "=v"()
530 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 3>
531 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
535 define void @v_shuffle_v2i16_v4i16__7_4(ptr addrspace(1) inreg %ptr) {
536 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_4:
538 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539 ; GFX900-NEXT: ;;#ASMSTART
540 ; GFX900-NEXT: ; def v[0:1]
541 ; GFX900-NEXT: ;;#ASMEND
542 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
543 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16
544 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
545 ; GFX900-NEXT: s_waitcnt vmcnt(0)
546 ; GFX900-NEXT: s_setpc_b64 s[30:31]
548 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_4:
550 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551 ; GFX90A-NEXT: ;;#ASMSTART
552 ; GFX90A-NEXT: ; def v[0:1]
553 ; GFX90A-NEXT: ;;#ASMEND
554 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
555 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16
556 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
557 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
558 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
560 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_4:
562 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563 ; GFX940-NEXT: ;;#ASMSTART
564 ; GFX940-NEXT: ; def v[0:1]
565 ; GFX940-NEXT: ;;#ASMEND
566 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
567 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16
568 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
569 ; GFX940-NEXT: s_waitcnt vmcnt(0)
570 ; GFX940-NEXT: s_setpc_b64 s[30:31]
571 %vec0 = call <4 x i16> asm "; def $0", "=v"()
572 %vec1 = call <4 x i16> asm "; def $0", "=v"()
573 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 4>
574 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
578 define void @v_shuffle_v2i16_v4i16__7_5(ptr addrspace(1) inreg %ptr) {
579 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_5:
581 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX900-NEXT: ;;#ASMSTART
583 ; GFX900-NEXT: ; def v[0:1]
584 ; GFX900-NEXT: ;;#ASMEND
585 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
586 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
587 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
588 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
589 ; GFX900-NEXT: s_waitcnt vmcnt(0)
590 ; GFX900-NEXT: s_setpc_b64 s[30:31]
592 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_5:
594 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595 ; GFX90A-NEXT: ;;#ASMSTART
596 ; GFX90A-NEXT: ; def v[0:1]
597 ; GFX90A-NEXT: ;;#ASMEND
598 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
599 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
600 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
601 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
602 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
603 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
605 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_5:
607 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608 ; GFX940-NEXT: ;;#ASMSTART
609 ; GFX940-NEXT: ; def v[0:1]
610 ; GFX940-NEXT: ;;#ASMEND
611 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
612 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
613 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
614 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
615 ; GFX940-NEXT: s_waitcnt vmcnt(0)
616 ; GFX940-NEXT: s_setpc_b64 s[30:31]
617 %vec0 = call <4 x i16> asm "; def $0", "=v"()
618 %vec1 = call <4 x i16> asm "; def $0", "=v"()
619 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 5>
620 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
624 define void @v_shuffle_v2i16_v4i16__7_6(ptr addrspace(1) inreg %ptr) {
625 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_6:
627 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628 ; GFX900-NEXT: ;;#ASMSTART
629 ; GFX900-NEXT: ; def v[0:1]
630 ; GFX900-NEXT: ;;#ASMEND
631 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
632 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16
633 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
634 ; GFX900-NEXT: s_waitcnt vmcnt(0)
635 ; GFX900-NEXT: s_setpc_b64 s[30:31]
637 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_6:
639 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640 ; GFX90A-NEXT: ;;#ASMSTART
641 ; GFX90A-NEXT: ; def v[0:1]
642 ; GFX90A-NEXT: ;;#ASMEND
643 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
644 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16
645 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
646 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
647 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
649 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_6:
651 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652 ; GFX940-NEXT: ;;#ASMSTART
653 ; GFX940-NEXT: ; def v[0:1]
654 ; GFX940-NEXT: ;;#ASMEND
655 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
656 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16
657 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
658 ; GFX940-NEXT: s_waitcnt vmcnt(0)
659 ; GFX940-NEXT: s_setpc_b64 s[30:31]
660 %vec0 = call <4 x i16> asm "; def $0", "=v"()
661 %vec1 = call <4 x i16> asm "; def $0", "=v"()
662 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 6>
663 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
667 define void @v_shuffle_v2i16_v4i16__7_7(ptr addrspace(1) inreg %ptr) {
668 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__7_7:
670 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
671 ; GFX900-NEXT: ;;#ASMSTART
672 ; GFX900-NEXT: ; def v[0:1]
673 ; GFX900-NEXT: ;;#ASMEND
674 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
675 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
676 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
677 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
678 ; GFX900-NEXT: s_waitcnt vmcnt(0)
679 ; GFX900-NEXT: s_setpc_b64 s[30:31]
681 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__7_7:
683 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX90A-NEXT: ;;#ASMSTART
685 ; GFX90A-NEXT: ; def v[0:1]
686 ; GFX90A-NEXT: ;;#ASMEND
687 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
688 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
689 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
690 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
691 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
692 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
694 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_7:
696 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
697 ; GFX940-NEXT: ;;#ASMSTART
698 ; GFX940-NEXT: ; def v[0:1]
699 ; GFX940-NEXT: ;;#ASMEND
700 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
701 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
702 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
703 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
704 ; GFX940-NEXT: s_waitcnt vmcnt(0)
705 ; GFX940-NEXT: s_setpc_b64 s[30:31]
706 %vec0 = call <4 x i16> asm "; def $0", "=v"()
707 %vec1 = call <4 x i16> asm "; def $0", "=v"()
708 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 7>
709 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
713 define void @v_shuffle_v2i16_v4i16__u_0(ptr addrspace(1) inreg %ptr) {
714 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__u_0:
716 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717 ; GFX900-NEXT: ;;#ASMSTART
718 ; GFX900-NEXT: ; def v[0:1]
719 ; GFX900-NEXT: ;;#ASMEND
720 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
721 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
722 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
723 ; GFX900-NEXT: s_waitcnt vmcnt(0)
724 ; GFX900-NEXT: s_setpc_b64 s[30:31]
726 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__u_0:
728 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
729 ; GFX90A-NEXT: ;;#ASMSTART
730 ; GFX90A-NEXT: ; def v[0:1]
731 ; GFX90A-NEXT: ;;#ASMEND
732 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
733 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0
734 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
735 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
736 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
738 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_0:
740 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
741 ; GFX940-NEXT: ;;#ASMSTART
742 ; GFX940-NEXT: ; def v[0:1]
743 ; GFX940-NEXT: ;;#ASMEND
744 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
745 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0
746 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
747 ; GFX940-NEXT: s_waitcnt vmcnt(0)
748 ; GFX940-NEXT: s_setpc_b64 s[30:31]
749 %vec0 = call <4 x i16> asm "; def $0", "=v"()
750 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 0>
751 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
755 define void @v_shuffle_v2i16_v4i16__0_0(ptr addrspace(1) inreg %ptr) {
756 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_0:
758 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759 ; GFX900-NEXT: ;;#ASMSTART
760 ; GFX900-NEXT: ; def v[0:1]
761 ; GFX900-NEXT: ;;#ASMEND
762 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
763 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
764 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
765 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
766 ; GFX900-NEXT: s_waitcnt vmcnt(0)
767 ; GFX900-NEXT: s_setpc_b64 s[30:31]
769 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_0:
771 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772 ; GFX90A-NEXT: ;;#ASMSTART
773 ; GFX90A-NEXT: ; def v[0:1]
774 ; GFX90A-NEXT: ;;#ASMEND
775 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
776 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
777 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
778 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
779 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
780 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
782 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_0:
784 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785 ; GFX940-NEXT: ;;#ASMSTART
786 ; GFX940-NEXT: ; def v[0:1]
787 ; GFX940-NEXT: ;;#ASMEND
788 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
789 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
790 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
791 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
792 ; GFX940-NEXT: s_waitcnt vmcnt(0)
793 ; GFX940-NEXT: s_setpc_b64 s[30:31]
794 %vec0 = call <4 x i16> asm "; def $0", "=v"()
795 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> zeroinitializer
796 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
800 define void @v_shuffle_v2i16_v4i16__1_0(ptr addrspace(1) inreg %ptr) {
801 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_0:
803 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804 ; GFX900-NEXT: ;;#ASMSTART
805 ; GFX900-NEXT: ; def v[0:1]
806 ; GFX900-NEXT: ;;#ASMEND
807 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
808 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16
809 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
810 ; GFX900-NEXT: s_waitcnt vmcnt(0)
811 ; GFX900-NEXT: s_setpc_b64 s[30:31]
813 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_0:
815 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816 ; GFX90A-NEXT: ;;#ASMSTART
817 ; GFX90A-NEXT: ; def v[0:1]
818 ; GFX90A-NEXT: ;;#ASMEND
819 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
820 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16
821 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
822 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
823 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
825 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_0:
827 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
828 ; GFX940-NEXT: ;;#ASMSTART
829 ; GFX940-NEXT: ; def v[0:1]
830 ; GFX940-NEXT: ;;#ASMEND
831 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
832 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16
833 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
834 ; GFX940-NEXT: s_waitcnt vmcnt(0)
835 ; GFX940-NEXT: s_setpc_b64 s[30:31]
836 %vec0 = call <4 x i16> asm "; def $0", "=v"()
837 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 0>
838 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
842 define void @v_shuffle_v2i16_v4i16__2_0(ptr addrspace(1) inreg %ptr) {
843 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_0:
845 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
846 ; GFX900-NEXT: ;;#ASMSTART
847 ; GFX900-NEXT: ; def v[0:1]
848 ; GFX900-NEXT: ;;#ASMEND
849 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
850 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
851 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
852 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
853 ; GFX900-NEXT: s_waitcnt vmcnt(0)
854 ; GFX900-NEXT: s_setpc_b64 s[30:31]
856 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_0:
858 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859 ; GFX90A-NEXT: ;;#ASMSTART
860 ; GFX90A-NEXT: ; def v[0:1]
861 ; GFX90A-NEXT: ;;#ASMEND
862 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
863 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
864 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
865 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
866 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
867 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
869 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_0:
871 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872 ; GFX940-NEXT: ;;#ASMSTART
873 ; GFX940-NEXT: ; def v[0:1]
874 ; GFX940-NEXT: ;;#ASMEND
875 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
876 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
877 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
878 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
879 ; GFX940-NEXT: s_waitcnt vmcnt(0)
880 ; GFX940-NEXT: s_setpc_b64 s[30:31]
881 %vec0 = call <4 x i16> asm "; def $0", "=v"()
882 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 0>
883 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
887 define void @v_shuffle_v2i16_v4i16__3_0(ptr addrspace(1) inreg %ptr) {
888 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_0:
890 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891 ; GFX900-NEXT: ;;#ASMSTART
892 ; GFX900-NEXT: ; def v[0:1]
893 ; GFX900-NEXT: ;;#ASMEND
894 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
895 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16
896 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
897 ; GFX900-NEXT: s_waitcnt vmcnt(0)
898 ; GFX900-NEXT: s_setpc_b64 s[30:31]
900 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_0:
902 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903 ; GFX90A-NEXT: ;;#ASMSTART
904 ; GFX90A-NEXT: ; def v[0:1]
905 ; GFX90A-NEXT: ;;#ASMEND
906 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
907 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16
908 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
909 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
910 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
912 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_0:
914 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915 ; GFX940-NEXT: ;;#ASMSTART
916 ; GFX940-NEXT: ; def v[0:1]
917 ; GFX940-NEXT: ;;#ASMEND
918 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
919 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16
920 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
921 ; GFX940-NEXT: s_waitcnt vmcnt(0)
922 ; GFX940-NEXT: s_setpc_b64 s[30:31]
923 %vec0 = call <4 x i16> asm "; def $0", "=v"()
924 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 0>
925 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
929 define void @v_shuffle_v2i16_v4i16__4_0(ptr addrspace(1) inreg %ptr) {
930 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__4_0:
932 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933 ; GFX900-NEXT: ;;#ASMSTART
934 ; GFX900-NEXT: ; def v[0:1]
935 ; GFX900-NEXT: ;;#ASMEND
936 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
937 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
938 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
939 ; GFX900-NEXT: s_waitcnt vmcnt(0)
940 ; GFX900-NEXT: s_setpc_b64 s[30:31]
942 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__4_0:
944 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
945 ; GFX90A-NEXT: ;;#ASMSTART
946 ; GFX90A-NEXT: ; def v[0:1]
947 ; GFX90A-NEXT: ;;#ASMEND
948 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
949 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0
950 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
951 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
952 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
954 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_0:
956 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
957 ; GFX940-NEXT: ;;#ASMSTART
958 ; GFX940-NEXT: ; def v[0:1]
959 ; GFX940-NEXT: ;;#ASMEND
960 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
961 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0
962 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
963 ; GFX940-NEXT: s_waitcnt vmcnt(0)
964 ; GFX940-NEXT: s_setpc_b64 s[30:31]
965 %vec0 = call <4 x i16> asm "; def $0", "=v"()
966 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 0>
967 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
971 define void @v_shuffle_v2i16_v4i16__5_0(ptr addrspace(1) inreg %ptr) {
972 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_0:
974 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975 ; GFX900-NEXT: ;;#ASMSTART
976 ; GFX900-NEXT: ; def v[0:1]
977 ; GFX900-NEXT: ;;#ASMEND
978 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
979 ; GFX900-NEXT: ;;#ASMSTART
980 ; GFX900-NEXT: ; def v[1:2]
981 ; GFX900-NEXT: ;;#ASMEND
982 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16
983 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
984 ; GFX900-NEXT: s_waitcnt vmcnt(0)
985 ; GFX900-NEXT: s_setpc_b64 s[30:31]
987 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_0:
989 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
990 ; GFX90A-NEXT: ;;#ASMSTART
991 ; GFX90A-NEXT: ; def v[0:1]
992 ; GFX90A-NEXT: ;;#ASMEND
993 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
994 ; GFX90A-NEXT: ;;#ASMSTART
995 ; GFX90A-NEXT: ; def v[2:3]
996 ; GFX90A-NEXT: ;;#ASMEND
997 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16
998 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
999 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1000 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1002 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_0:
1004 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1005 ; GFX940-NEXT: ;;#ASMSTART
1006 ; GFX940-NEXT: ; def v[0:1]
1007 ; GFX940-NEXT: ;;#ASMEND
1008 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1009 ; GFX940-NEXT: ;;#ASMSTART
1010 ; GFX940-NEXT: ; def v[2:3]
1011 ; GFX940-NEXT: ;;#ASMEND
1012 ; GFX940-NEXT: s_nop 0
1013 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16
1014 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
1015 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1016 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1017 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1018 %vec1 = call <4 x i16> asm "; def $0", "=v"()
1019 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 0>
1020 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1024 define void @v_shuffle_v2i16_v4i16__6_0(ptr addrspace(1) inreg %ptr) {
1025 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_0:
1027 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1028 ; GFX900-NEXT: ;;#ASMSTART
1029 ; GFX900-NEXT: ; def v[0:1]
1030 ; GFX900-NEXT: ;;#ASMEND
1031 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1032 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1033 ; GFX900-NEXT: ;;#ASMSTART
1034 ; GFX900-NEXT: ; def v[1:2]
1035 ; GFX900-NEXT: ;;#ASMEND
1036 ; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
1037 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
1038 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1039 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1041 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_0:
1043 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044 ; GFX90A-NEXT: ;;#ASMSTART
1045 ; GFX90A-NEXT: ; def v[0:1]
1046 ; GFX90A-NEXT: ;;#ASMEND
1047 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1048 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1049 ; GFX90A-NEXT: ;;#ASMSTART
1050 ; GFX90A-NEXT: ; def v[2:3]
1051 ; GFX90A-NEXT: ;;#ASMEND
1052 ; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4
1053 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
1054 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1055 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1057 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_0:
1059 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1060 ; GFX940-NEXT: ;;#ASMSTART
1061 ; GFX940-NEXT: ; def v[0:1]
1062 ; GFX940-NEXT: ;;#ASMEND
1063 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1064 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1065 ; GFX940-NEXT: ;;#ASMSTART
1066 ; GFX940-NEXT: ; def v[2:3]
1067 ; GFX940-NEXT: ;;#ASMEND
1068 ; GFX940-NEXT: s_nop 0
1069 ; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2
1070 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
1071 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1072 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1073 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1074 %vec1 = call <4 x i16> asm "; def $0", "=v"()
1075 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 0>
1076 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1080 define void @v_shuffle_v2i16_v4i16__u_1(ptr addrspace(1) inreg %ptr) {
1081 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__u_1:
1083 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1084 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1085 ; GFX900-NEXT: ;;#ASMSTART
1086 ; GFX900-NEXT: ; def v[0:1]
1087 ; GFX900-NEXT: ;;#ASMEND
1088 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1089 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1090 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1092 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__u_1:
1094 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1095 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1096 ; GFX90A-NEXT: ;;#ASMSTART
1097 ; GFX90A-NEXT: ; def v[0:1]
1098 ; GFX90A-NEXT: ;;#ASMEND
1099 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1100 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1101 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1103 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_1:
1105 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1107 ; GFX940-NEXT: ;;#ASMSTART
1108 ; GFX940-NEXT: ; def v[0:1]
1109 ; GFX940-NEXT: ;;#ASMEND
1110 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1111 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1112 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1113 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1114 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 1>
1115 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1119 define void @v_shuffle_v2i16_v4i16__0_1(ptr addrspace(1) inreg %ptr) {
1120 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_1:
1122 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1124 ; GFX900-NEXT: ;;#ASMSTART
1125 ; GFX900-NEXT: ; def v[0:1]
1126 ; GFX900-NEXT: ;;#ASMEND
1127 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1128 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1129 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1131 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_1:
1133 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1134 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1135 ; GFX90A-NEXT: ;;#ASMSTART
1136 ; GFX90A-NEXT: ; def v[0:1]
1137 ; GFX90A-NEXT: ;;#ASMEND
1138 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1139 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1140 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1142 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_1:
1144 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1145 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1146 ; GFX940-NEXT: ;;#ASMSTART
1147 ; GFX940-NEXT: ; def v[0:1]
1148 ; GFX940-NEXT: ;;#ASMEND
1149 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1150 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1151 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1152 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1153 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
1154 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1158 define void @v_shuffle_v2i16_v4i16__1_1(ptr addrspace(1) inreg %ptr) {
1159 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_1:
1161 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1162 ; GFX900-NEXT: ;;#ASMSTART
1163 ; GFX900-NEXT: ; def v[0:1]
1164 ; GFX900-NEXT: ;;#ASMEND
1165 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1166 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1167 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
1168 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1169 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1170 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1172 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_1:
1174 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1175 ; GFX90A-NEXT: ;;#ASMSTART
1176 ; GFX90A-NEXT: ; def v[0:1]
1177 ; GFX90A-NEXT: ;;#ASMEND
1178 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1179 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1180 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
1181 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1182 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1183 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1185 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_1:
1187 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1188 ; GFX940-NEXT: ;;#ASMSTART
1189 ; GFX940-NEXT: ; def v[0:1]
1190 ; GFX940-NEXT: ;;#ASMEND
1191 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1192 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1193 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
1194 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1195 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1196 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1197 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1198 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 1>
1199 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1203 define void @v_shuffle_v2i16_v4i16__2_1(ptr addrspace(1) inreg %ptr) {
1204 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_1:
1206 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1207 ; GFX900-NEXT: ;;#ASMSTART
1208 ; GFX900-NEXT: ; def v[0:1]
1209 ; GFX900-NEXT: ;;#ASMEND
1210 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1211 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1212 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0
1213 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1214 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1215 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1217 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_1:
1219 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1220 ; GFX90A-NEXT: ;;#ASMSTART
1221 ; GFX90A-NEXT: ; def v[0:1]
1222 ; GFX90A-NEXT: ;;#ASMEND
1223 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1224 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1225 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0
1226 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1227 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1228 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1230 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_1:
1232 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233 ; GFX940-NEXT: ;;#ASMSTART
1234 ; GFX940-NEXT: ; def v[0:1]
1235 ; GFX940-NEXT: ;;#ASMEND
1236 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1237 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1238 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0
1239 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1240 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1241 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1242 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1243 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 1>
1244 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1248 define void @v_shuffle_v2i16_v4i16__3_1(ptr addrspace(1) inreg %ptr) {
1249 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_1:
1251 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252 ; GFX900-NEXT: ;;#ASMSTART
1253 ; GFX900-NEXT: ; def v[0:1]
1254 ; GFX900-NEXT: ;;#ASMEND
1255 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1256 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1257 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
1258 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1259 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1260 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1262 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_1:
1264 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265 ; GFX90A-NEXT: ;;#ASMSTART
1266 ; GFX90A-NEXT: ; def v[0:1]
1267 ; GFX90A-NEXT: ;;#ASMEND
1268 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1269 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1270 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
1271 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1272 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1273 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1275 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_1:
1277 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1278 ; GFX940-NEXT: ;;#ASMSTART
1279 ; GFX940-NEXT: ; def v[0:1]
1280 ; GFX940-NEXT: ;;#ASMEND
1281 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1282 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1283 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
1284 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1285 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1286 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1287 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1288 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 1>
1289 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1293 define void @v_shuffle_v2i16_v4i16__4_1(ptr addrspace(1) inreg %ptr) {
1294 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__4_1:
1296 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1298 ; GFX900-NEXT: ;;#ASMSTART
1299 ; GFX900-NEXT: ; def v[0:1]
1300 ; GFX900-NEXT: ;;#ASMEND
1301 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1302 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1303 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1305 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__4_1:
1307 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1309 ; GFX90A-NEXT: ;;#ASMSTART
1310 ; GFX90A-NEXT: ; def v[0:1]
1311 ; GFX90A-NEXT: ;;#ASMEND
1312 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1313 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1314 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1316 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_1:
1318 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1319 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1320 ; GFX940-NEXT: ;;#ASMSTART
1321 ; GFX940-NEXT: ; def v[0:1]
1322 ; GFX940-NEXT: ;;#ASMEND
1323 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1324 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1325 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1326 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1327 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 1>
1328 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1332 define void @v_shuffle_v2i16_v4i16__5_1(ptr addrspace(1) inreg %ptr) {
1333 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_1:
1335 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1336 ; GFX900-NEXT: ;;#ASMSTART
1337 ; GFX900-NEXT: ; def v[0:1]
1338 ; GFX900-NEXT: ;;#ASMEND
1339 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1340 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1341 ; GFX900-NEXT: ;;#ASMSTART
1342 ; GFX900-NEXT: ; def v[1:2]
1343 ; GFX900-NEXT: ;;#ASMEND
1344 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
1345 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
1346 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1347 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1349 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_1:
1351 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352 ; GFX90A-NEXT: ;;#ASMSTART
1353 ; GFX90A-NEXT: ; def v[0:1]
1354 ; GFX90A-NEXT: ;;#ASMEND
1355 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1356 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1357 ; GFX90A-NEXT: ;;#ASMSTART
1358 ; GFX90A-NEXT: ; def v[2:3]
1359 ; GFX90A-NEXT: ;;#ASMEND
1360 ; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4
1361 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
1362 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1363 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1365 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_1:
1367 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1368 ; GFX940-NEXT: ;;#ASMSTART
1369 ; GFX940-NEXT: ; def v[0:1]
1370 ; GFX940-NEXT: ;;#ASMEND
1371 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1372 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1373 ; GFX940-NEXT: ;;#ASMSTART
1374 ; GFX940-NEXT: ; def v[2:3]
1375 ; GFX940-NEXT: ;;#ASMEND
1376 ; GFX940-NEXT: s_nop 0
1377 ; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2
1378 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
1379 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1380 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1381 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1382 %vec1 = call <4 x i16> asm "; def $0", "=v"()
1383 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 1>
1384 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1388 define void @v_shuffle_v2i16_v4i16__6_1(ptr addrspace(1) inreg %ptr) {
1389 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_1:
1391 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392 ; GFX900-NEXT: ;;#ASMSTART
1393 ; GFX900-NEXT: ; def v[0:1]
1394 ; GFX900-NEXT: ;;#ASMEND
1395 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1396 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
1397 ; GFX900-NEXT: ;;#ASMSTART
1398 ; GFX900-NEXT: ; def v[1:2]
1399 ; GFX900-NEXT: ;;#ASMEND
1400 ; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
1401 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
1402 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1403 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1405 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_1:
1407 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1408 ; GFX90A-NEXT: ;;#ASMSTART
1409 ; GFX90A-NEXT: ; def v[0:1]
1410 ; GFX90A-NEXT: ;;#ASMEND
1411 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1412 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1413 ; GFX90A-NEXT: ;;#ASMSTART
1414 ; GFX90A-NEXT: ; def v[2:3]
1415 ; GFX90A-NEXT: ;;#ASMEND
1416 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0
1417 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
1418 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1419 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1421 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_1:
1423 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424 ; GFX940-NEXT: ;;#ASMSTART
1425 ; GFX940-NEXT: ; def v[0:1]
1426 ; GFX940-NEXT: ;;#ASMEND
1427 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1428 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1429 ; GFX940-NEXT: ;;#ASMSTART
1430 ; GFX940-NEXT: ; def v[2:3]
1431 ; GFX940-NEXT: ;;#ASMEND
1432 ; GFX940-NEXT: s_nop 0
1433 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0
1434 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
1435 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1436 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1437 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1438 %vec1 = call <4 x i16> asm "; def $0", "=v"()
1439 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 1>
1440 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1444 define void @v_shuffle_v2i16_v4i16__u_2(ptr addrspace(1) inreg %ptr) {
1445 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__u_2:
1447 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1448 ; GFX900-NEXT: ;;#ASMSTART
1449 ; GFX900-NEXT: ; def v[0:1]
1450 ; GFX900-NEXT: ;;#ASMEND
1451 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1452 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
1453 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1454 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1455 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1457 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__u_2:
1459 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460 ; GFX90A-NEXT: ;;#ASMSTART
1461 ; GFX90A-NEXT: ; def v[0:1]
1462 ; GFX90A-NEXT: ;;#ASMEND
1463 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1464 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
1465 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1466 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1467 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1469 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_2:
1471 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472 ; GFX940-NEXT: ;;#ASMSTART
1473 ; GFX940-NEXT: ; def v[0:1]
1474 ; GFX940-NEXT: ;;#ASMEND
1475 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1476 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
1477 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1478 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1479 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1480 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1481 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 2>
1482 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1486 define void @v_shuffle_v2i16_v4i16__0_2(ptr addrspace(1) inreg %ptr) {
1487 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_2:
1489 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490 ; GFX900-NEXT: ;;#ASMSTART
1491 ; GFX900-NEXT: ; def v[0:1]
1492 ; GFX900-NEXT: ;;#ASMEND
1493 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1494 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1495 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
1496 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1497 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1498 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1500 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_2:
1502 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503 ; GFX90A-NEXT: ;;#ASMSTART
1504 ; GFX90A-NEXT: ; def v[0:1]
1505 ; GFX90A-NEXT: ;;#ASMEND
1506 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1507 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1508 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
1509 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1510 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1511 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1513 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_2:
1515 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1516 ; GFX940-NEXT: ;;#ASMSTART
1517 ; GFX940-NEXT: ; def v[0:1]
1518 ; GFX940-NEXT: ;;#ASMEND
1519 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1520 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1521 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
1522 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1523 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1524 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1525 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1526 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 2>
1527 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1531 define void @v_shuffle_v2i16_v4i16__1_2(ptr addrspace(1) inreg %ptr) {
1532 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_2:
1534 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535 ; GFX900-NEXT: ;;#ASMSTART
1536 ; GFX900-NEXT: ; def v[0:1]
1537 ; GFX900-NEXT: ;;#ASMEND
1538 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1539 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16
1540 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1541 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1542 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1544 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_2:
1546 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547 ; GFX90A-NEXT: ;;#ASMSTART
1548 ; GFX90A-NEXT: ; def v[0:1]
1549 ; GFX90A-NEXT: ;;#ASMEND
1550 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1551 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16
1552 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1553 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1554 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1556 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_2:
1558 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1559 ; GFX940-NEXT: ;;#ASMSTART
1560 ; GFX940-NEXT: ; def v[0:1]
1561 ; GFX940-NEXT: ;;#ASMEND
1562 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1563 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16
1564 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1565 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1566 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1567 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1568 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
1569 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1573 define void @v_shuffle_v2i16_v4i16__2_2(ptr addrspace(1) inreg %ptr) {
1574 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_2:
1576 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577 ; GFX900-NEXT: ;;#ASMSTART
1578 ; GFX900-NEXT: ; def v[0:1]
1579 ; GFX900-NEXT: ;;#ASMEND
1580 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1581 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1582 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
1583 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1584 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1585 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1587 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_2:
1589 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1590 ; GFX90A-NEXT: ;;#ASMSTART
1591 ; GFX90A-NEXT: ; def v[0:1]
1592 ; GFX90A-NEXT: ;;#ASMEND
1593 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1594 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1595 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
1596 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1597 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1598 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1600 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_2:
1602 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1603 ; GFX940-NEXT: ;;#ASMSTART
1604 ; GFX940-NEXT: ; def v[0:1]
1605 ; GFX940-NEXT: ;;#ASMEND
1606 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1607 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1608 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
1609 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1610 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1611 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1612 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1613 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 2>
1614 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1618 define void @v_shuffle_v2i16_v4i16__3_2(ptr addrspace(1) inreg %ptr) {
1619 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_2:
1621 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622 ; GFX900-NEXT: ;;#ASMSTART
1623 ; GFX900-NEXT: ; def v[0:1]
1624 ; GFX900-NEXT: ;;#ASMEND
1625 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1626 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16
1627 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1628 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1629 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1631 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_2:
1633 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1634 ; GFX90A-NEXT: ;;#ASMSTART
1635 ; GFX90A-NEXT: ; def v[0:1]
1636 ; GFX90A-NEXT: ;;#ASMEND
1637 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1638 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16
1639 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1640 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1641 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1643 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_2:
1645 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1646 ; GFX940-NEXT: ;;#ASMSTART
1647 ; GFX940-NEXT: ; def v[0:1]
1648 ; GFX940-NEXT: ;;#ASMEND
1649 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1650 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16
1651 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1652 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1653 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1654 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1655 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 2>
1656 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1660 define void @v_shuffle_v2i16_v4i16__4_2(ptr addrspace(1) inreg %ptr) {
1661 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__4_2:
1663 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1664 ; GFX900-NEXT: ;;#ASMSTART
1665 ; GFX900-NEXT: ; def v[0:1]
1666 ; GFX900-NEXT: ;;#ASMEND
1667 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1668 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
1669 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1670 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1671 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1673 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__4_2:
1675 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676 ; GFX90A-NEXT: ;;#ASMSTART
1677 ; GFX90A-NEXT: ; def v[0:1]
1678 ; GFX90A-NEXT: ;;#ASMEND
1679 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1680 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
1681 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1682 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1683 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1685 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_2:
1687 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1688 ; GFX940-NEXT: ;;#ASMSTART
1689 ; GFX940-NEXT: ; def v[0:1]
1690 ; GFX940-NEXT: ;;#ASMEND
1691 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1692 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
1693 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1694 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1695 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1696 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1697 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 2>
1698 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1702 define void @v_shuffle_v2i16_v4i16__5_2(ptr addrspace(1) inreg %ptr) {
1703 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_2:
1705 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1706 ; GFX900-NEXT: ;;#ASMSTART
1707 ; GFX900-NEXT: ; def v[0:1]
1708 ; GFX900-NEXT: ;;#ASMEND
1709 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
1710 ; GFX900-NEXT: ;;#ASMSTART
1711 ; GFX900-NEXT: ; def v[2:3]
1712 ; GFX900-NEXT: ;;#ASMEND
1713 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16
1714 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
1715 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1716 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1718 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_2:
1720 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1721 ; GFX90A-NEXT: ;;#ASMSTART
1722 ; GFX90A-NEXT: ; def v[0:1]
1723 ; GFX90A-NEXT: ;;#ASMEND
1724 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1725 ; GFX90A-NEXT: ;;#ASMSTART
1726 ; GFX90A-NEXT: ; def v[2:3]
1727 ; GFX90A-NEXT: ;;#ASMEND
1728 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16
1729 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
1730 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1731 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1733 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_2:
1735 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736 ; GFX940-NEXT: ;;#ASMSTART
1737 ; GFX940-NEXT: ; def v[0:1]
1738 ; GFX940-NEXT: ;;#ASMEND
1739 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1740 ; GFX940-NEXT: ;;#ASMSTART
1741 ; GFX940-NEXT: ; def v[2:3]
1742 ; GFX940-NEXT: ;;#ASMEND
1743 ; GFX940-NEXT: s_nop 0
1744 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16
1745 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
1746 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1747 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1748 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1749 %vec1 = call <4 x i16> asm "; def $0", "=v"()
1750 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 2>
1751 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1755 define void @v_shuffle_v2i16_v4i16__6_2(ptr addrspace(1) inreg %ptr) {
1756 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_2:
1758 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759 ; GFX900-NEXT: ;;#ASMSTART
1760 ; GFX900-NEXT: ; def v[0:1]
1761 ; GFX900-NEXT: ;;#ASMEND
1762 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
1763 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
1764 ; GFX900-NEXT: ;;#ASMSTART
1765 ; GFX900-NEXT: ; def v[2:3]
1766 ; GFX900-NEXT: ;;#ASMEND
1767 ; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4
1768 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
1769 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1770 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1772 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_2:
1774 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1775 ; GFX90A-NEXT: ;;#ASMSTART
1776 ; GFX90A-NEXT: ; def v[0:1]
1777 ; GFX90A-NEXT: ;;#ASMEND
1778 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
1779 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
1780 ; GFX90A-NEXT: ;;#ASMSTART
1781 ; GFX90A-NEXT: ; def v[2:3]
1782 ; GFX90A-NEXT: ;;#ASMEND
1783 ; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4
1784 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
1785 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1786 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1788 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_2:
1790 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1791 ; GFX940-NEXT: ;;#ASMSTART
1792 ; GFX940-NEXT: ; def v[0:1]
1793 ; GFX940-NEXT: ;;#ASMEND
1794 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
1795 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
1796 ; GFX940-NEXT: ;;#ASMSTART
1797 ; GFX940-NEXT: ; def v[2:3]
1798 ; GFX940-NEXT: ;;#ASMEND
1799 ; GFX940-NEXT: s_nop 0
1800 ; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2
1801 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
1802 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1803 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1804 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1805 %vec1 = call <4 x i16> asm "; def $0", "=v"()
1806 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 2>
1807 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1811 define void @v_shuffle_v2i16_v4i16__u_3(ptr addrspace(1) inreg %ptr) {
1812 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__u_3:
1814 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1815 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1816 ; GFX900-NEXT: ;;#ASMSTART
1817 ; GFX900-NEXT: ; def v[0:1]
1818 ; GFX900-NEXT: ;;#ASMEND
1819 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1820 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1821 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1823 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__u_3:
1825 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1827 ; GFX90A-NEXT: ;;#ASMSTART
1828 ; GFX90A-NEXT: ; def v[0:1]
1829 ; GFX90A-NEXT: ;;#ASMEND
1830 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1831 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1832 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1834 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_3:
1836 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1837 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1838 ; GFX940-NEXT: ;;#ASMSTART
1839 ; GFX940-NEXT: ; def v[0:1]
1840 ; GFX940-NEXT: ;;#ASMEND
1841 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1842 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1843 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1844 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1845 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
1846 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1850 define void @v_shuffle_v2i16_v4i16__0_3(ptr addrspace(1) inreg %ptr) {
1851 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_3:
1853 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1854 ; GFX900-NEXT: ;;#ASMSTART
1855 ; GFX900-NEXT: ; def v[0:1]
1856 ; GFX900-NEXT: ;;#ASMEND
1857 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
1858 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1859 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
1860 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1861 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1862 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1864 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_3:
1866 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1867 ; GFX90A-NEXT: ;;#ASMSTART
1868 ; GFX90A-NEXT: ; def v[0:1]
1869 ; GFX90A-NEXT: ;;#ASMEND
1870 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
1871 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1872 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
1873 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1874 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1875 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1877 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_3:
1879 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1880 ; GFX940-NEXT: ;;#ASMSTART
1881 ; GFX940-NEXT: ; def v[0:1]
1882 ; GFX940-NEXT: ;;#ASMEND
1883 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
1884 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1885 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
1886 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1887 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1888 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1889 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1890 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 3>
1891 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1895 define void @v_shuffle_v2i16_v4i16__1_3(ptr addrspace(1) inreg %ptr) {
1896 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_3:
1898 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1899 ; GFX900-NEXT: ;;#ASMSTART
1900 ; GFX900-NEXT: ; def v[0:1]
1901 ; GFX900-NEXT: ;;#ASMEND
1902 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1903 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1904 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
1905 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1906 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1907 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1909 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_3:
1911 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1912 ; GFX90A-NEXT: ;;#ASMSTART
1913 ; GFX90A-NEXT: ; def v[0:1]
1914 ; GFX90A-NEXT: ;;#ASMEND
1915 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
1916 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1917 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
1918 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
1919 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1920 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1922 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_3:
1924 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1925 ; GFX940-NEXT: ;;#ASMSTART
1926 ; GFX940-NEXT: ; def v[0:1]
1927 ; GFX940-NEXT: ;;#ASMEND
1928 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
1929 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1930 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
1931 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
1932 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1933 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1934 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1935 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 3>
1936 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1940 define void @v_shuffle_v2i16_v4i16__2_3(ptr addrspace(1) inreg %ptr) {
1941 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_3:
1943 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1944 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1945 ; GFX900-NEXT: ;;#ASMSTART
1946 ; GFX900-NEXT: ; def v[0:1]
1947 ; GFX900-NEXT: ;;#ASMEND
1948 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
1949 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1950 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1952 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_3:
1954 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1955 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1956 ; GFX90A-NEXT: ;;#ASMSTART
1957 ; GFX90A-NEXT: ; def v[0:1]
1958 ; GFX90A-NEXT: ;;#ASMEND
1959 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
1960 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
1961 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1963 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_3:
1965 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1966 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
1967 ; GFX940-NEXT: ;;#ASMSTART
1968 ; GFX940-NEXT: ; def v[0:1]
1969 ; GFX940-NEXT: ;;#ASMEND
1970 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
1971 ; GFX940-NEXT: s_waitcnt vmcnt(0)
1972 ; GFX940-NEXT: s_setpc_b64 s[30:31]
1973 %vec0 = call <4 x i16> asm "; def $0", "=v"()
1974 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
1975 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
1979 define void @v_shuffle_v2i16_v4i16__3_3(ptr addrspace(1) inreg %ptr) {
1980 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_3:
1982 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1983 ; GFX900-NEXT: ;;#ASMSTART
1984 ; GFX900-NEXT: ; def v[0:1]
1985 ; GFX900-NEXT: ;;#ASMEND
1986 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
1987 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
1988 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
1989 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
1990 ; GFX900-NEXT: s_waitcnt vmcnt(0)
1991 ; GFX900-NEXT: s_setpc_b64 s[30:31]
1993 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_3:
1995 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1996 ; GFX90A-NEXT: ;;#ASMSTART
1997 ; GFX90A-NEXT: ; def v[0:1]
1998 ; GFX90A-NEXT: ;;#ASMEND
1999 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2000 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2001 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
2002 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2003 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2004 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2006 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_3:
2008 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2009 ; GFX940-NEXT: ;;#ASMSTART
2010 ; GFX940-NEXT: ; def v[0:1]
2011 ; GFX940-NEXT: ;;#ASMEND
2012 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2013 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2014 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
2015 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2016 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2017 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2018 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2019 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 3>
2020 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2024 define void @v_shuffle_v2i16_v4i16__4_3(ptr addrspace(1) inreg %ptr) {
2025 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__4_3:
2027 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2028 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2029 ; GFX900-NEXT: ;;#ASMSTART
2030 ; GFX900-NEXT: ; def v[0:1]
2031 ; GFX900-NEXT: ;;#ASMEND
2032 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
2033 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2034 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2036 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__4_3:
2038 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2040 ; GFX90A-NEXT: ;;#ASMSTART
2041 ; GFX90A-NEXT: ; def v[0:1]
2042 ; GFX90A-NEXT: ;;#ASMEND
2043 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
2044 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2045 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2047 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_3:
2049 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2050 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2051 ; GFX940-NEXT: ;;#ASMSTART
2052 ; GFX940-NEXT: ; def v[0:1]
2053 ; GFX940-NEXT: ;;#ASMEND
2054 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
2055 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2056 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2057 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2058 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 3>
2059 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2063 define void @v_shuffle_v2i16_v4i16__5_3(ptr addrspace(1) inreg %ptr) {
2064 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_3:
2066 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2067 ; GFX900-NEXT: ;;#ASMSTART
2068 ; GFX900-NEXT: ; def v[0:1]
2069 ; GFX900-NEXT: ;;#ASMEND
2070 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2071 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2072 ; GFX900-NEXT: ;;#ASMSTART
2073 ; GFX900-NEXT: ; def v[2:3]
2074 ; GFX900-NEXT: ;;#ASMEND
2075 ; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4
2076 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2077 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2078 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2080 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_3:
2082 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2083 ; GFX90A-NEXT: ;;#ASMSTART
2084 ; GFX90A-NEXT: ; def v[0:1]
2085 ; GFX90A-NEXT: ;;#ASMEND
2086 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2087 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2088 ; GFX90A-NEXT: ;;#ASMSTART
2089 ; GFX90A-NEXT: ; def v[2:3]
2090 ; GFX90A-NEXT: ;;#ASMEND
2091 ; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4
2092 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2093 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2094 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2096 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_3:
2098 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2099 ; GFX940-NEXT: ;;#ASMSTART
2100 ; GFX940-NEXT: ; def v[0:1]
2101 ; GFX940-NEXT: ;;#ASMEND
2102 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2103 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2104 ; GFX940-NEXT: ;;#ASMSTART
2105 ; GFX940-NEXT: ; def v[2:3]
2106 ; GFX940-NEXT: ;;#ASMEND
2107 ; GFX940-NEXT: s_nop 0
2108 ; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2
2109 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2110 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2111 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2112 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2113 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2114 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 3>
2115 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2119 define void @v_shuffle_v2i16_v4i16__6_3(ptr addrspace(1) inreg %ptr) {
2120 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_3:
2122 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2123 ; GFX900-NEXT: ;;#ASMSTART
2124 ; GFX900-NEXT: ; def v[0:1]
2125 ; GFX900-NEXT: ;;#ASMEND
2126 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2127 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2128 ; GFX900-NEXT: ;;#ASMSTART
2129 ; GFX900-NEXT: ; def v[2:3]
2130 ; GFX900-NEXT: ;;#ASMEND
2131 ; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1
2132 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2133 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2134 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2136 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_3:
2138 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2139 ; GFX90A-NEXT: ;;#ASMSTART
2140 ; GFX90A-NEXT: ; def v[0:1]
2141 ; GFX90A-NEXT: ;;#ASMEND
2142 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2143 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2144 ; GFX90A-NEXT: ;;#ASMSTART
2145 ; GFX90A-NEXT: ; def v[2:3]
2146 ; GFX90A-NEXT: ;;#ASMEND
2147 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1
2148 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2149 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2150 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2152 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_3:
2154 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2155 ; GFX940-NEXT: ;;#ASMSTART
2156 ; GFX940-NEXT: ; def v[0:1]
2157 ; GFX940-NEXT: ;;#ASMEND
2158 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2159 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2160 ; GFX940-NEXT: ;;#ASMSTART
2161 ; GFX940-NEXT: ; def v[2:3]
2162 ; GFX940-NEXT: ;;#ASMEND
2163 ; GFX940-NEXT: s_nop 0
2164 ; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1
2165 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2166 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2167 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2168 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2169 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2170 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 3>
2171 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2175 define void @v_shuffle_v2i16_v4i16__u_4(ptr addrspace(1) inreg %ptr) {
2176 ; GFX9-LABEL: v_shuffle_v2i16_v4i16__u_4:
2178 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2179 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2180 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2181 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 4>
2182 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2186 define void @v_shuffle_v2i16_v4i16__0_4(ptr addrspace(1) inreg %ptr) {
2187 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_4:
2189 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2190 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2191 ; GFX900-NEXT: ;;#ASMSTART
2192 ; GFX900-NEXT: ; def v[0:1]
2193 ; GFX900-NEXT: ;;#ASMEND
2194 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2195 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2196 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2198 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_4:
2200 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2201 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2202 ; GFX90A-NEXT: ;;#ASMSTART
2203 ; GFX90A-NEXT: ; def v[0:1]
2204 ; GFX90A-NEXT: ;;#ASMEND
2205 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2206 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2207 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2209 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_4:
2211 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2212 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2213 ; GFX940-NEXT: ;;#ASMSTART
2214 ; GFX940-NEXT: ; def v[0:1]
2215 ; GFX940-NEXT: ;;#ASMEND
2216 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2217 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2218 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2219 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2220 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 4>
2221 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2225 define void @v_shuffle_v2i16_v4i16__1_4(ptr addrspace(1) inreg %ptr) {
2226 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_4:
2228 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2229 ; GFX900-NEXT: ;;#ASMSTART
2230 ; GFX900-NEXT: ; def v[0:1]
2231 ; GFX900-NEXT: ;;#ASMEND
2232 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2233 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16
2234 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2235 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2236 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2238 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_4:
2240 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2241 ; GFX90A-NEXT: ;;#ASMSTART
2242 ; GFX90A-NEXT: ; def v[0:1]
2243 ; GFX90A-NEXT: ;;#ASMEND
2244 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2245 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16
2246 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2247 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2248 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2250 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_4:
2252 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2253 ; GFX940-NEXT: ;;#ASMSTART
2254 ; GFX940-NEXT: ; def v[0:1]
2255 ; GFX940-NEXT: ;;#ASMEND
2256 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2257 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16
2258 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2259 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2260 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2261 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2262 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 4>
2263 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2267 define void @v_shuffle_v2i16_v4i16__2_4(ptr addrspace(1) inreg %ptr) {
2268 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_4:
2270 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2271 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2272 ; GFX900-NEXT: ;;#ASMSTART
2273 ; GFX900-NEXT: ; def v[0:1]
2274 ; GFX900-NEXT: ;;#ASMEND
2275 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
2276 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2277 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2279 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_4:
2281 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2282 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2283 ; GFX90A-NEXT: ;;#ASMSTART
2284 ; GFX90A-NEXT: ; def v[0:1]
2285 ; GFX90A-NEXT: ;;#ASMEND
2286 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
2287 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2288 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2290 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_4:
2292 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2293 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2294 ; GFX940-NEXT: ;;#ASMSTART
2295 ; GFX940-NEXT: ; def v[0:1]
2296 ; GFX940-NEXT: ;;#ASMEND
2297 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
2298 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2299 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2300 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2301 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 4>
2302 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2306 define void @v_shuffle_v2i16_v4i16__3_4(ptr addrspace(1) inreg %ptr) {
2307 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_4:
2309 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310 ; GFX900-NEXT: ;;#ASMSTART
2311 ; GFX900-NEXT: ; def v[0:1]
2312 ; GFX900-NEXT: ;;#ASMEND
2313 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2314 ; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16
2315 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2316 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2317 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2319 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_4:
2321 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2322 ; GFX90A-NEXT: ;;#ASMSTART
2323 ; GFX90A-NEXT: ; def v[0:1]
2324 ; GFX90A-NEXT: ;;#ASMEND
2325 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2326 ; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16
2327 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2328 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2329 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2331 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_4:
2333 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334 ; GFX940-NEXT: ;;#ASMSTART
2335 ; GFX940-NEXT: ; def v[0:1]
2336 ; GFX940-NEXT: ;;#ASMEND
2337 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2338 ; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16
2339 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2340 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2341 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2342 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2343 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 4>
2344 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2348 define void @v_shuffle_v2i16_v4i16__4_4(ptr addrspace(1) inreg %ptr) {
2349 ; GFX9-LABEL: v_shuffle_v2i16_v4i16__4_4:
2351 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2352 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2353 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2354 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 4>
2355 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2359 define void @v_shuffle_v2i16_v4i16__5_4(ptr addrspace(1) inreg %ptr) {
2360 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_4:
2362 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2363 ; GFX900-NEXT: ;;#ASMSTART
2364 ; GFX900-NEXT: ; def v[0:1]
2365 ; GFX900-NEXT: ;;#ASMEND
2366 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2367 ; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16
2368 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2369 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2370 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2372 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_4:
2374 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375 ; GFX90A-NEXT: ;;#ASMSTART
2376 ; GFX90A-NEXT: ; def v[0:1]
2377 ; GFX90A-NEXT: ;;#ASMEND
2378 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2379 ; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16
2380 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2381 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2382 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2384 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_4:
2386 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2387 ; GFX940-NEXT: ;;#ASMSTART
2388 ; GFX940-NEXT: ; def v[0:1]
2389 ; GFX940-NEXT: ;;#ASMEND
2390 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2391 ; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16
2392 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2393 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2394 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2395 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2396 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2397 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 4>
2398 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2402 define void @v_shuffle_v2i16_v4i16__6_4(ptr addrspace(1) inreg %ptr) {
2403 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_4:
2405 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2406 ; GFX900-NEXT: ;;#ASMSTART
2407 ; GFX900-NEXT: ; def v[0:1]
2408 ; GFX900-NEXT: ;;#ASMEND
2409 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2410 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2411 ; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4
2412 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2413 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2414 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2416 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_4:
2418 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2419 ; GFX90A-NEXT: ;;#ASMSTART
2420 ; GFX90A-NEXT: ; def v[0:1]
2421 ; GFX90A-NEXT: ;;#ASMEND
2422 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2423 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2424 ; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4
2425 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2426 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2427 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2429 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_4:
2431 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2432 ; GFX940-NEXT: ;;#ASMSTART
2433 ; GFX940-NEXT: ; def v[0:1]
2434 ; GFX940-NEXT: ;;#ASMEND
2435 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2436 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2437 ; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2
2438 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2439 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2440 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2441 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2442 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2443 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 4>
2444 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2448 define void @v_shuffle_v2i16_v4i16__u_5(ptr addrspace(1) inreg %ptr) {
2449 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__u_5:
2451 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2452 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2453 ; GFX900-NEXT: ;;#ASMSTART
2454 ; GFX900-NEXT: ; def v[0:1]
2455 ; GFX900-NEXT: ;;#ASMEND
2456 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2457 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2458 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2460 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__u_5:
2462 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2463 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2464 ; GFX90A-NEXT: ;;#ASMSTART
2465 ; GFX90A-NEXT: ; def v[0:1]
2466 ; GFX90A-NEXT: ;;#ASMEND
2467 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2468 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2469 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2471 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_5:
2473 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2475 ; GFX940-NEXT: ;;#ASMSTART
2476 ; GFX940-NEXT: ; def v[0:1]
2477 ; GFX940-NEXT: ;;#ASMEND
2478 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2479 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2480 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2481 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2482 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2483 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 poison, i32 5>
2484 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2488 define void @v_shuffle_v2i16_v4i16__0_5(ptr addrspace(1) inreg %ptr) {
2489 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_5:
2491 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2492 ; GFX900-NEXT: ;;#ASMSTART
2493 ; GFX900-NEXT: ; def v[0:1]
2494 ; GFX900-NEXT: ;;#ASMEND
2495 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2496 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2497 ; GFX900-NEXT: ;;#ASMSTART
2498 ; GFX900-NEXT: ; def v[1:2]
2499 ; GFX900-NEXT: ;;#ASMEND
2500 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
2501 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
2502 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2503 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2505 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_5:
2507 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2508 ; GFX90A-NEXT: ;;#ASMSTART
2509 ; GFX90A-NEXT: ; def v[0:1]
2510 ; GFX90A-NEXT: ;;#ASMEND
2511 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2512 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2513 ; GFX90A-NEXT: ;;#ASMSTART
2514 ; GFX90A-NEXT: ; def v[2:3]
2515 ; GFX90A-NEXT: ;;#ASMEND
2516 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2
2517 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2518 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2519 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2521 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_5:
2523 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2524 ; GFX940-NEXT: ;;#ASMSTART
2525 ; GFX940-NEXT: ; def v[0:1]
2526 ; GFX940-NEXT: ;;#ASMEND
2527 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2528 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2529 ; GFX940-NEXT: ;;#ASMSTART
2530 ; GFX940-NEXT: ; def v[2:3]
2531 ; GFX940-NEXT: ;;#ASMEND
2532 ; GFX940-NEXT: s_nop 0
2533 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2
2534 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2535 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2536 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2537 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2538 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2539 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 0, i32 5>
2540 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2544 define void @v_shuffle_v2i16_v4i16__1_5(ptr addrspace(1) inreg %ptr) {
2545 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_5:
2547 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548 ; GFX900-NEXT: ;;#ASMSTART
2549 ; GFX900-NEXT: ; def v[0:1]
2550 ; GFX900-NEXT: ;;#ASMEND
2551 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2552 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2553 ; GFX900-NEXT: ;;#ASMSTART
2554 ; GFX900-NEXT: ; def v[1:2]
2555 ; GFX900-NEXT: ;;#ASMEND
2556 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
2557 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
2558 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2559 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2561 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_5:
2563 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2564 ; GFX90A-NEXT: ;;#ASMSTART
2565 ; GFX90A-NEXT: ; def v[0:1]
2566 ; GFX90A-NEXT: ;;#ASMEND
2567 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2568 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2569 ; GFX90A-NEXT: ;;#ASMSTART
2570 ; GFX90A-NEXT: ; def v[2:3]
2571 ; GFX90A-NEXT: ;;#ASMEND
2572 ; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4
2573 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2574 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2575 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2577 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_5:
2579 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2580 ; GFX940-NEXT: ;;#ASMSTART
2581 ; GFX940-NEXT: ; def v[0:1]
2582 ; GFX940-NEXT: ;;#ASMEND
2583 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2584 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2585 ; GFX940-NEXT: ;;#ASMSTART
2586 ; GFX940-NEXT: ; def v[2:3]
2587 ; GFX940-NEXT: ;;#ASMEND
2588 ; GFX940-NEXT: s_nop 0
2589 ; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2
2590 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2591 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2592 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2593 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2594 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2595 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 1, i32 5>
2596 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2600 define void @v_shuffle_v2i16_v4i16__2_5(ptr addrspace(1) inreg %ptr) {
2601 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_5:
2603 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2604 ; GFX900-NEXT: ;;#ASMSTART
2605 ; GFX900-NEXT: ; def v[0:1]
2606 ; GFX900-NEXT: ;;#ASMEND
2607 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2608 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2609 ; GFX900-NEXT: ;;#ASMSTART
2610 ; GFX900-NEXT: ; def v[2:3]
2611 ; GFX900-NEXT: ;;#ASMEND
2612 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2
2613 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2614 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2615 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2617 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_5:
2619 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2620 ; GFX90A-NEXT: ;;#ASMSTART
2621 ; GFX90A-NEXT: ; def v[0:1]
2622 ; GFX90A-NEXT: ;;#ASMEND
2623 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2624 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2625 ; GFX90A-NEXT: ;;#ASMSTART
2626 ; GFX90A-NEXT: ; def v[2:3]
2627 ; GFX90A-NEXT: ;;#ASMEND
2628 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2
2629 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2630 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2631 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2633 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_5:
2635 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2636 ; GFX940-NEXT: ;;#ASMSTART
2637 ; GFX940-NEXT: ; def v[0:1]
2638 ; GFX940-NEXT: ;;#ASMEND
2639 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2640 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2641 ; GFX940-NEXT: ;;#ASMSTART
2642 ; GFX940-NEXT: ; def v[2:3]
2643 ; GFX940-NEXT: ;;#ASMEND
2644 ; GFX940-NEXT: s_nop 0
2645 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2
2646 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2647 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2648 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2649 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2650 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2651 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 2, i32 5>
2652 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2656 define void @v_shuffle_v2i16_v4i16__3_5(ptr addrspace(1) inreg %ptr) {
2657 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_5:
2659 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2660 ; GFX900-NEXT: ;;#ASMSTART
2661 ; GFX900-NEXT: ; def v[0:1]
2662 ; GFX900-NEXT: ;;#ASMEND
2663 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2664 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
2665 ; GFX900-NEXT: ;;#ASMSTART
2666 ; GFX900-NEXT: ; def v[2:3]
2667 ; GFX900-NEXT: ;;#ASMEND
2668 ; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4
2669 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
2670 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2671 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2673 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_5:
2675 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2676 ; GFX90A-NEXT: ;;#ASMSTART
2677 ; GFX90A-NEXT: ; def v[0:1]
2678 ; GFX90A-NEXT: ;;#ASMEND
2679 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2680 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2681 ; GFX90A-NEXT: ;;#ASMSTART
2682 ; GFX90A-NEXT: ; def v[2:3]
2683 ; GFX90A-NEXT: ;;#ASMEND
2684 ; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4
2685 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2686 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2687 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2689 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_5:
2691 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2692 ; GFX940-NEXT: ;;#ASMSTART
2693 ; GFX940-NEXT: ; def v[0:1]
2694 ; GFX940-NEXT: ;;#ASMEND
2695 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2696 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2697 ; GFX940-NEXT: ;;#ASMSTART
2698 ; GFX940-NEXT: ; def v[2:3]
2699 ; GFX940-NEXT: ;;#ASMEND
2700 ; GFX940-NEXT: s_nop 0
2701 ; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2
2702 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2703 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2704 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2705 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2706 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2707 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 3, i32 5>
2708 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2712 define void @v_shuffle_v2i16_v4i16__4_5(ptr addrspace(1) inreg %ptr) {
2713 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__4_5:
2715 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2716 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2717 ; GFX900-NEXT: ;;#ASMSTART
2718 ; GFX900-NEXT: ; def v[0:1]
2719 ; GFX900-NEXT: ;;#ASMEND
2720 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2721 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2722 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2724 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__4_5:
2726 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2727 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2728 ; GFX90A-NEXT: ;;#ASMSTART
2729 ; GFX90A-NEXT: ; def v[0:1]
2730 ; GFX90A-NEXT: ;;#ASMEND
2731 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2732 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2733 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2735 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_5:
2737 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2738 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2739 ; GFX940-NEXT: ;;#ASMSTART
2740 ; GFX940-NEXT: ; def v[0:1]
2741 ; GFX940-NEXT: ;;#ASMEND
2742 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2743 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2744 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2745 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2746 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2747 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 4, i32 5>
2748 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2752 define void @v_shuffle_v2i16_v4i16__5_5(ptr addrspace(1) inreg %ptr) {
2753 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_5:
2755 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2756 ; GFX900-NEXT: ;;#ASMSTART
2757 ; GFX900-NEXT: ; def v[0:1]
2758 ; GFX900-NEXT: ;;#ASMEND
2759 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
2760 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2761 ; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4
2762 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2763 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2764 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2766 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_5:
2768 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2769 ; GFX90A-NEXT: ;;#ASMSTART
2770 ; GFX90A-NEXT: ; def v[0:1]
2771 ; GFX90A-NEXT: ;;#ASMEND
2772 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
2773 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2774 ; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4
2775 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2776 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2777 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2779 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_5:
2781 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2782 ; GFX940-NEXT: ;;#ASMSTART
2783 ; GFX940-NEXT: ; def v[0:1]
2784 ; GFX940-NEXT: ;;#ASMEND
2785 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
2786 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2787 ; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2
2788 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2789 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2790 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2791 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2792 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2793 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 5>
2794 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2798 define void @v_shuffle_v2i16_v4i16__6_5(ptr addrspace(1) inreg %ptr) {
2799 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_5:
2801 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2802 ; GFX900-NEXT: ;;#ASMSTART
2803 ; GFX900-NEXT: ; def v[0:1]
2804 ; GFX900-NEXT: ;;#ASMEND
2805 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
2806 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2807 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0
2808 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2809 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2810 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2812 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_5:
2814 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2815 ; GFX90A-NEXT: ;;#ASMSTART
2816 ; GFX90A-NEXT: ; def v[0:1]
2817 ; GFX90A-NEXT: ;;#ASMEND
2818 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
2819 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2820 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0
2821 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2822 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2823 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2825 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_5:
2827 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2828 ; GFX940-NEXT: ;;#ASMSTART
2829 ; GFX940-NEXT: ; def v[0:1]
2830 ; GFX940-NEXT: ;;#ASMEND
2831 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
2832 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2833 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0
2834 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2835 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2836 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2837 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2838 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2839 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 5>
2840 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2844 define void @v_shuffle_v2i16_v4i16__u_6(ptr addrspace(1) inreg %ptr) {
2845 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__u_6:
2847 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2848 ; GFX900-NEXT: ;;#ASMSTART
2849 ; GFX900-NEXT: ; def v[0:1]
2850 ; GFX900-NEXT: ;;#ASMEND
2851 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
2852 ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2853 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
2854 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2855 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2857 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__u_6:
2859 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2860 ; GFX90A-NEXT: ;;#ASMSTART
2861 ; GFX90A-NEXT: ; def v[0:1]
2862 ; GFX90A-NEXT: ;;#ASMEND
2863 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
2864 ; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2865 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
2866 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2867 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2869 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_6:
2871 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2872 ; GFX940-NEXT: ;;#ASMSTART
2873 ; GFX940-NEXT: ; def v[0:1]
2874 ; GFX940-NEXT: ;;#ASMEND
2875 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
2876 ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1
2877 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
2878 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2879 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2880 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2881 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2882 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 poison, i32 6>
2883 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2887 define void @v_shuffle_v2i16_v4i16__0_6(ptr addrspace(1) inreg %ptr) {
2888 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_6:
2890 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2891 ; GFX900-NEXT: ;;#ASMSTART
2892 ; GFX900-NEXT: ; def v[0:1]
2893 ; GFX900-NEXT: ;;#ASMEND
2894 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
2895 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2896 ; GFX900-NEXT: ;;#ASMSTART
2897 ; GFX900-NEXT: ; def v[1:2]
2898 ; GFX900-NEXT: ;;#ASMEND
2899 ; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4
2900 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
2901 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2902 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2904 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_6:
2906 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2907 ; GFX90A-NEXT: ;;#ASMSTART
2908 ; GFX90A-NEXT: ; def v[0:1]
2909 ; GFX90A-NEXT: ;;#ASMEND
2910 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
2911 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2912 ; GFX90A-NEXT: ;;#ASMSTART
2913 ; GFX90A-NEXT: ; def v[2:3]
2914 ; GFX90A-NEXT: ;;#ASMEND
2915 ; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4
2916 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2917 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2918 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2920 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_6:
2922 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2923 ; GFX940-NEXT: ;;#ASMSTART
2924 ; GFX940-NEXT: ; def v[0:1]
2925 ; GFX940-NEXT: ;;#ASMEND
2926 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
2927 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2928 ; GFX940-NEXT: ;;#ASMSTART
2929 ; GFX940-NEXT: ; def v[2:3]
2930 ; GFX940-NEXT: ;;#ASMEND
2931 ; GFX940-NEXT: s_nop 0
2932 ; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2
2933 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2934 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2935 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2936 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2937 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2938 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 0, i32 6>
2939 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2943 define void @v_shuffle_v2i16_v4i16__1_6(ptr addrspace(1) inreg %ptr) {
2944 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_6:
2946 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2947 ; GFX900-NEXT: ;;#ASMSTART
2948 ; GFX900-NEXT: ; def v[0:1]
2949 ; GFX900-NEXT: ;;#ASMEND
2950 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
2951 ; GFX900-NEXT: ;;#ASMSTART
2952 ; GFX900-NEXT: ; def v[1:2]
2953 ; GFX900-NEXT: ;;#ASMEND
2954 ; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16
2955 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
2956 ; GFX900-NEXT: s_waitcnt vmcnt(0)
2957 ; GFX900-NEXT: s_setpc_b64 s[30:31]
2959 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_6:
2961 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2962 ; GFX90A-NEXT: ;;#ASMSTART
2963 ; GFX90A-NEXT: ; def v[0:1]
2964 ; GFX90A-NEXT: ;;#ASMEND
2965 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
2966 ; GFX90A-NEXT: ;;#ASMSTART
2967 ; GFX90A-NEXT: ; def v[2:3]
2968 ; GFX90A-NEXT: ;;#ASMEND
2969 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16
2970 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
2971 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
2972 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2974 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_6:
2976 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977 ; GFX940-NEXT: ;;#ASMSTART
2978 ; GFX940-NEXT: ; def v[0:1]
2979 ; GFX940-NEXT: ;;#ASMEND
2980 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
2981 ; GFX940-NEXT: ;;#ASMSTART
2982 ; GFX940-NEXT: ; def v[2:3]
2983 ; GFX940-NEXT: ;;#ASMEND
2984 ; GFX940-NEXT: s_nop 0
2985 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16
2986 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
2987 ; GFX940-NEXT: s_waitcnt vmcnt(0)
2988 ; GFX940-NEXT: s_setpc_b64 s[30:31]
2989 %vec0 = call <4 x i16> asm "; def $0", "=v"()
2990 %vec1 = call <4 x i16> asm "; def $0", "=v"()
2991 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 1, i32 6>
2992 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
2996 define void @v_shuffle_v2i16_v4i16__2_6(ptr addrspace(1) inreg %ptr) {
2997 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_6:
2999 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3000 ; GFX900-NEXT: ;;#ASMSTART
3001 ; GFX900-NEXT: ; def v[0:1]
3002 ; GFX900-NEXT: ;;#ASMEND
3003 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3004 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3005 ; GFX900-NEXT: ;;#ASMSTART
3006 ; GFX900-NEXT: ; def v[2:3]
3007 ; GFX900-NEXT: ;;#ASMEND
3008 ; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4
3009 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3010 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3011 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3013 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_6:
3015 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3016 ; GFX90A-NEXT: ;;#ASMSTART
3017 ; GFX90A-NEXT: ; def v[0:1]
3018 ; GFX90A-NEXT: ;;#ASMEND
3019 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3020 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3021 ; GFX90A-NEXT: ;;#ASMSTART
3022 ; GFX90A-NEXT: ; def v[2:3]
3023 ; GFX90A-NEXT: ;;#ASMEND
3024 ; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4
3025 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3026 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3027 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3029 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_6:
3031 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3032 ; GFX940-NEXT: ;;#ASMSTART
3033 ; GFX940-NEXT: ; def v[0:1]
3034 ; GFX940-NEXT: ;;#ASMEND
3035 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3036 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3037 ; GFX940-NEXT: ;;#ASMSTART
3038 ; GFX940-NEXT: ; def v[2:3]
3039 ; GFX940-NEXT: ;;#ASMEND
3040 ; GFX940-NEXT: s_nop 0
3041 ; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2
3042 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3043 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3044 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3045 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3046 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3047 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 2, i32 6>
3048 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3052 define void @v_shuffle_v2i16_v4i16__3_6(ptr addrspace(1) inreg %ptr) {
3053 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_6:
3055 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3056 ; GFX900-NEXT: ;;#ASMSTART
3057 ; GFX900-NEXT: ; def v[0:1]
3058 ; GFX900-NEXT: ;;#ASMEND
3059 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3060 ; GFX900-NEXT: ;;#ASMSTART
3061 ; GFX900-NEXT: ; def v[2:3]
3062 ; GFX900-NEXT: ;;#ASMEND
3063 ; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16
3064 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3065 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3066 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3068 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_6:
3070 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3071 ; GFX90A-NEXT: ;;#ASMSTART
3072 ; GFX90A-NEXT: ; def v[0:1]
3073 ; GFX90A-NEXT: ;;#ASMEND
3074 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3075 ; GFX90A-NEXT: ;;#ASMSTART
3076 ; GFX90A-NEXT: ; def v[2:3]
3077 ; GFX90A-NEXT: ;;#ASMEND
3078 ; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16
3079 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3080 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3081 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3083 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_6:
3085 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3086 ; GFX940-NEXT: ;;#ASMSTART
3087 ; GFX940-NEXT: ; def v[0:1]
3088 ; GFX940-NEXT: ;;#ASMEND
3089 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3090 ; GFX940-NEXT: ;;#ASMSTART
3091 ; GFX940-NEXT: ; def v[2:3]
3092 ; GFX940-NEXT: ;;#ASMEND
3093 ; GFX940-NEXT: s_nop 0
3094 ; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16
3095 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3096 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3097 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3098 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3099 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3100 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 3, i32 6>
3101 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3105 define void @v_shuffle_v2i16_v4i16__4_6(ptr addrspace(1) inreg %ptr) {
3106 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__4_6:
3108 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3109 ; GFX900-NEXT: ;;#ASMSTART
3110 ; GFX900-NEXT: ; def v[0:1]
3111 ; GFX900-NEXT: ;;#ASMEND
3112 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3113 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3114 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
3115 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3116 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3117 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3119 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__4_6:
3121 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3122 ; GFX90A-NEXT: ;;#ASMSTART
3123 ; GFX90A-NEXT: ; def v[0:1]
3124 ; GFX90A-NEXT: ;;#ASMEND
3125 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3126 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3127 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
3128 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3129 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3130 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3132 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_6:
3134 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3135 ; GFX940-NEXT: ;;#ASMSTART
3136 ; GFX940-NEXT: ; def v[0:1]
3137 ; GFX940-NEXT: ;;#ASMEND
3138 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3139 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3140 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
3141 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3142 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3143 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3144 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3145 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3146 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 4, i32 6>
3147 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3151 define void @v_shuffle_v2i16_v4i16__5_6(ptr addrspace(1) inreg %ptr) {
3152 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_6:
3154 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3155 ; GFX900-NEXT: ;;#ASMSTART
3156 ; GFX900-NEXT: ; def v[0:1]
3157 ; GFX900-NEXT: ;;#ASMEND
3158 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3159 ; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16
3160 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3161 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3162 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3164 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_6:
3166 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3167 ; GFX90A-NEXT: ;;#ASMSTART
3168 ; GFX90A-NEXT: ; def v[0:1]
3169 ; GFX90A-NEXT: ;;#ASMEND
3170 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3171 ; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16
3172 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3173 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3174 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3176 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_6:
3178 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3179 ; GFX940-NEXT: ;;#ASMSTART
3180 ; GFX940-NEXT: ; def v[0:1]
3181 ; GFX940-NEXT: ;;#ASMEND
3182 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3183 ; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16
3184 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3185 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3186 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3187 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3188 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3189 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 6>
3190 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3194 define void @v_shuffle_v2i16_v4i16__6_6(ptr addrspace(1) inreg %ptr) {
3195 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_6:
3197 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3198 ; GFX900-NEXT: ;;#ASMSTART
3199 ; GFX900-NEXT: ; def v[0:1]
3200 ; GFX900-NEXT: ;;#ASMEND
3201 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100
3202 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3203 ; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4
3204 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3205 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3206 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3208 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_6:
3210 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3211 ; GFX90A-NEXT: ;;#ASMSTART
3212 ; GFX90A-NEXT: ; def v[0:1]
3213 ; GFX90A-NEXT: ;;#ASMEND
3214 ; GFX90A-NEXT: s_mov_b32 s4, 0x5040100
3215 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3216 ; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4
3217 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3218 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3219 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3221 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_6:
3223 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3224 ; GFX940-NEXT: ;;#ASMSTART
3225 ; GFX940-NEXT: ; def v[0:1]
3226 ; GFX940-NEXT: ;;#ASMEND
3227 ; GFX940-NEXT: s_mov_b32 s2, 0x5040100
3228 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3229 ; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2
3230 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3231 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3232 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3233 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3234 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3235 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 6>
3236 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3240 define void @v_shuffle_v2i16_v4i16__u_7(ptr addrspace(1) inreg %ptr) {
3241 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__u_7:
3243 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3244 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3245 ; GFX900-NEXT: ;;#ASMSTART
3246 ; GFX900-NEXT: ; def v[0:1]
3247 ; GFX900-NEXT: ;;#ASMEND
3248 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3249 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3250 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3252 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__u_7:
3254 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3255 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3256 ; GFX90A-NEXT: ;;#ASMSTART
3257 ; GFX90A-NEXT: ; def v[0:1]
3258 ; GFX90A-NEXT: ;;#ASMEND
3259 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3260 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3261 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3263 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_7:
3265 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3266 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3267 ; GFX940-NEXT: ;;#ASMSTART
3268 ; GFX940-NEXT: ; def v[0:1]
3269 ; GFX940-NEXT: ;;#ASMEND
3270 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3271 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3272 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3273 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3274 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3275 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 poison, i32 7>
3276 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3280 define void @v_shuffle_v2i16_v4i16__0_7(ptr addrspace(1) inreg %ptr) {
3281 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__0_7:
3283 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3284 ; GFX900-NEXT: ;;#ASMSTART
3285 ; GFX900-NEXT: ; def v[0:1]
3286 ; GFX900-NEXT: ;;#ASMEND
3287 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3288 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3289 ; GFX900-NEXT: ;;#ASMSTART
3290 ; GFX900-NEXT: ; def v[1:2]
3291 ; GFX900-NEXT: ;;#ASMEND
3292 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2
3293 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3294 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3295 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3297 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__0_7:
3299 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3300 ; GFX90A-NEXT: ;;#ASMSTART
3301 ; GFX90A-NEXT: ; def v[0:1]
3302 ; GFX90A-NEXT: ;;#ASMEND
3303 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3304 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3305 ; GFX90A-NEXT: ;;#ASMSTART
3306 ; GFX90A-NEXT: ; def v[2:3]
3307 ; GFX90A-NEXT: ;;#ASMEND
3308 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3
3309 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3310 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3311 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3313 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_7:
3315 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3316 ; GFX940-NEXT: ;;#ASMSTART
3317 ; GFX940-NEXT: ; def v[0:1]
3318 ; GFX940-NEXT: ;;#ASMEND
3319 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3320 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3321 ; GFX940-NEXT: ;;#ASMSTART
3322 ; GFX940-NEXT: ; def v[2:3]
3323 ; GFX940-NEXT: ;;#ASMEND
3324 ; GFX940-NEXT: s_nop 0
3325 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3
3326 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3327 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3328 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3329 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3330 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3331 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 0, i32 7>
3332 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3336 define void @v_shuffle_v2i16_v4i16__1_7(ptr addrspace(1) inreg %ptr) {
3337 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__1_7:
3339 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3340 ; GFX900-NEXT: ;;#ASMSTART
3341 ; GFX900-NEXT: ; def v[0:1]
3342 ; GFX900-NEXT: ;;#ASMEND
3343 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3344 ; GFX900-NEXT: v_mov_b32_e32 v3, 0
3345 ; GFX900-NEXT: ;;#ASMSTART
3346 ; GFX900-NEXT: ; def v[1:2]
3347 ; GFX900-NEXT: ;;#ASMEND
3348 ; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4
3349 ; GFX900-NEXT: global_store_dword v3, v0, s[16:17]
3350 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3351 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3353 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__1_7:
3355 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3356 ; GFX90A-NEXT: ;;#ASMSTART
3357 ; GFX90A-NEXT: ; def v[0:1]
3358 ; GFX90A-NEXT: ;;#ASMEND
3359 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3360 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3361 ; GFX90A-NEXT: ;;#ASMSTART
3362 ; GFX90A-NEXT: ; def v[2:3]
3363 ; GFX90A-NEXT: ;;#ASMEND
3364 ; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4
3365 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3366 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3367 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3369 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_7:
3371 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3372 ; GFX940-NEXT: ;;#ASMSTART
3373 ; GFX940-NEXT: ; def v[0:1]
3374 ; GFX940-NEXT: ;;#ASMEND
3375 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3376 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3377 ; GFX940-NEXT: ;;#ASMSTART
3378 ; GFX940-NEXT: ; def v[2:3]
3379 ; GFX940-NEXT: ;;#ASMEND
3380 ; GFX940-NEXT: s_nop 0
3381 ; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2
3382 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3383 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3384 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3385 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3386 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3387 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 1, i32 7>
3388 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3392 define void @v_shuffle_v2i16_v4i16__2_7(ptr addrspace(1) inreg %ptr) {
3393 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__2_7:
3395 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3396 ; GFX900-NEXT: ;;#ASMSTART
3397 ; GFX900-NEXT: ; def v[0:1]
3398 ; GFX900-NEXT: ;;#ASMEND
3399 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3400 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3401 ; GFX900-NEXT: ;;#ASMSTART
3402 ; GFX900-NEXT: ; def v[2:3]
3403 ; GFX900-NEXT: ;;#ASMEND
3404 ; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3
3405 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3406 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3407 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3409 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__2_7:
3411 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3412 ; GFX90A-NEXT: ;;#ASMSTART
3413 ; GFX90A-NEXT: ; def v[0:1]
3414 ; GFX90A-NEXT: ;;#ASMEND
3415 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3416 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3417 ; GFX90A-NEXT: ;;#ASMSTART
3418 ; GFX90A-NEXT: ; def v[2:3]
3419 ; GFX90A-NEXT: ;;#ASMEND
3420 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3
3421 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3422 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3423 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3425 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_7:
3427 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3428 ; GFX940-NEXT: ;;#ASMSTART
3429 ; GFX940-NEXT: ; def v[0:1]
3430 ; GFX940-NEXT: ;;#ASMEND
3431 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3432 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3433 ; GFX940-NEXT: ;;#ASMSTART
3434 ; GFX940-NEXT: ; def v[2:3]
3435 ; GFX940-NEXT: ;;#ASMEND
3436 ; GFX940-NEXT: s_nop 0
3437 ; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3
3438 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3439 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3440 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3441 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3442 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3443 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 2, i32 7>
3444 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3448 define void @v_shuffle_v2i16_v4i16__3_7(ptr addrspace(1) inreg %ptr) {
3449 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__3_7:
3451 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3452 ; GFX900-NEXT: ;;#ASMSTART
3453 ; GFX900-NEXT: ; def v[0:1]
3454 ; GFX900-NEXT: ;;#ASMEND
3455 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3456 ; GFX900-NEXT: v_mov_b32_e32 v4, 0
3457 ; GFX900-NEXT: ;;#ASMSTART
3458 ; GFX900-NEXT: ; def v[2:3]
3459 ; GFX900-NEXT: ;;#ASMEND
3460 ; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4
3461 ; GFX900-NEXT: global_store_dword v4, v0, s[16:17]
3462 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3463 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3465 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__3_7:
3467 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3468 ; GFX90A-NEXT: ;;#ASMSTART
3469 ; GFX90A-NEXT: ; def v[0:1]
3470 ; GFX90A-NEXT: ;;#ASMEND
3471 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3472 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
3473 ; GFX90A-NEXT: ;;#ASMSTART
3474 ; GFX90A-NEXT: ; def v[2:3]
3475 ; GFX90A-NEXT: ;;#ASMEND
3476 ; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4
3477 ; GFX90A-NEXT: global_store_dword v4, v0, s[16:17]
3478 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3479 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3481 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_7:
3483 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3484 ; GFX940-NEXT: ;;#ASMSTART
3485 ; GFX940-NEXT: ; def v[0:1]
3486 ; GFX940-NEXT: ;;#ASMEND
3487 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3488 ; GFX940-NEXT: v_mov_b32_e32 v4, 0
3489 ; GFX940-NEXT: ;;#ASMSTART
3490 ; GFX940-NEXT: ; def v[2:3]
3491 ; GFX940-NEXT: ;;#ASMEND
3492 ; GFX940-NEXT: s_nop 0
3493 ; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2
3494 ; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1
3495 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3496 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3497 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3498 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3499 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 3, i32 7>
3500 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3504 define void @v_shuffle_v2i16_v4i16__4_7(ptr addrspace(1) inreg %ptr) {
3505 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__4_7:
3507 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3508 ; GFX900-NEXT: ;;#ASMSTART
3509 ; GFX900-NEXT: ; def v[0:1]
3510 ; GFX900-NEXT: ;;#ASMEND
3511 ; GFX900-NEXT: s_mov_b32 s4, 0xffff
3512 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3513 ; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1
3514 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3515 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3516 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3518 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__4_7:
3520 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3521 ; GFX90A-NEXT: ;;#ASMSTART
3522 ; GFX90A-NEXT: ; def v[0:1]
3523 ; GFX90A-NEXT: ;;#ASMEND
3524 ; GFX90A-NEXT: s_mov_b32 s4, 0xffff
3525 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3526 ; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1
3527 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3528 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3529 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3531 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_7:
3533 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3534 ; GFX940-NEXT: ;;#ASMSTART
3535 ; GFX940-NEXT: ; def v[0:1]
3536 ; GFX940-NEXT: ;;#ASMEND
3537 ; GFX940-NEXT: s_mov_b32 s2, 0xffff
3538 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3539 ; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1
3540 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3541 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3542 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3543 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3544 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3545 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 4, i32 7>
3546 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3550 define void @v_shuffle_v2i16_v4i16__5_7(ptr addrspace(1) inreg %ptr) {
3551 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__5_7:
3553 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3554 ; GFX900-NEXT: ;;#ASMSTART
3555 ; GFX900-NEXT: ; def v[0:1]
3556 ; GFX900-NEXT: ;;#ASMEND
3557 ; GFX900-NEXT: s_mov_b32 s4, 0x7060302
3558 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3559 ; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4
3560 ; GFX900-NEXT: global_store_dword v2, v0, s[16:17]
3561 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3562 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3564 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__5_7:
3566 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3567 ; GFX90A-NEXT: ;;#ASMSTART
3568 ; GFX90A-NEXT: ; def v[0:1]
3569 ; GFX90A-NEXT: ;;#ASMEND
3570 ; GFX90A-NEXT: s_mov_b32 s4, 0x7060302
3571 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3572 ; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4
3573 ; GFX90A-NEXT: global_store_dword v2, v0, s[16:17]
3574 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3575 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3577 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_7:
3579 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3580 ; GFX940-NEXT: ;;#ASMSTART
3581 ; GFX940-NEXT: ; def v[0:1]
3582 ; GFX940-NEXT: ;;#ASMEND
3583 ; GFX940-NEXT: s_mov_b32 s2, 0x7060302
3584 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3585 ; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2
3586 ; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1
3587 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3588 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3589 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3590 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3591 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 7>
3592 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3596 define void @v_shuffle_v2i16_v4i16__6_7(ptr addrspace(1) inreg %ptr) {
3597 ; GFX900-LABEL: v_shuffle_v2i16_v4i16__6_7:
3599 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3600 ; GFX900-NEXT: v_mov_b32_e32 v2, 0
3601 ; GFX900-NEXT: ;;#ASMSTART
3602 ; GFX900-NEXT: ; def v[0:1]
3603 ; GFX900-NEXT: ;;#ASMEND
3604 ; GFX900-NEXT: global_store_dword v2, v1, s[16:17]
3605 ; GFX900-NEXT: s_waitcnt vmcnt(0)
3606 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3608 ; GFX90A-LABEL: v_shuffle_v2i16_v4i16__6_7:
3610 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3611 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
3612 ; GFX90A-NEXT: ;;#ASMSTART
3613 ; GFX90A-NEXT: ; def v[0:1]
3614 ; GFX90A-NEXT: ;;#ASMEND
3615 ; GFX90A-NEXT: global_store_dword v2, v1, s[16:17]
3616 ; GFX90A-NEXT: s_waitcnt vmcnt(0)
3617 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3619 ; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_7:
3621 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3622 ; GFX940-NEXT: v_mov_b32_e32 v2, 0
3623 ; GFX940-NEXT: ;;#ASMSTART
3624 ; GFX940-NEXT: ; def v[0:1]
3625 ; GFX940-NEXT: ;;#ASMEND
3626 ; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1
3627 ; GFX940-NEXT: s_waitcnt vmcnt(0)
3628 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3629 %vec0 = call <4 x i16> asm "; def $0", "=v"()
3630 %vec1 = call <4 x i16> asm "; def $0", "=v"()
3631 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 7>
3632 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4
3636 define void @s_shuffle_v2i16_v4i16__u_u() {
3637 ; GFX9-LABEL: s_shuffle_v2i16_v4i16__u_u:
3639 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3640 ; GFX9-NEXT: ;;#ASMSTART
3641 ; GFX9-NEXT: ; use s8
3642 ; GFX9-NEXT: ;;#ASMEND
3643 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3644 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3645 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> poison
3646 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3650 define void @s_shuffle_v2i16_v4i16__0_u() {
3651 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_u:
3653 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3654 ; GFX900-NEXT: ;;#ASMSTART
3655 ; GFX900-NEXT: ; def s[8:9]
3656 ; GFX900-NEXT: ;;#ASMEND
3657 ; GFX900-NEXT: ;;#ASMSTART
3658 ; GFX900-NEXT: ; use s8
3659 ; GFX900-NEXT: ;;#ASMEND
3660 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3662 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_u:
3664 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3665 ; GFX90A-NEXT: ;;#ASMSTART
3666 ; GFX90A-NEXT: ; def s[8:9]
3667 ; GFX90A-NEXT: ;;#ASMEND
3668 ; GFX90A-NEXT: ;;#ASMSTART
3669 ; GFX90A-NEXT: ; use s8
3670 ; GFX90A-NEXT: ;;#ASMEND
3671 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3673 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_u:
3675 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3676 ; GFX940-NEXT: ;;#ASMSTART
3677 ; GFX940-NEXT: ; def s[8:9]
3678 ; GFX940-NEXT: ;;#ASMEND
3679 ; GFX940-NEXT: s_nop 0
3680 ; GFX940-NEXT: ;;#ASMSTART
3681 ; GFX940-NEXT: ; use s8
3682 ; GFX940-NEXT: ;;#ASMEND
3683 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3684 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3685 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
3686 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3690 define void @s_shuffle_v2i16_v4i16__1_u() {
3691 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_u:
3693 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3694 ; GFX900-NEXT: ;;#ASMSTART
3695 ; GFX900-NEXT: ; def s[4:5]
3696 ; GFX900-NEXT: ;;#ASMEND
3697 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
3698 ; GFX900-NEXT: ;;#ASMSTART
3699 ; GFX900-NEXT: ; use s8
3700 ; GFX900-NEXT: ;;#ASMEND
3701 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3703 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_u:
3705 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3706 ; GFX90A-NEXT: ;;#ASMSTART
3707 ; GFX90A-NEXT: ; def s[4:5]
3708 ; GFX90A-NEXT: ;;#ASMEND
3709 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
3710 ; GFX90A-NEXT: ;;#ASMSTART
3711 ; GFX90A-NEXT: ; use s8
3712 ; GFX90A-NEXT: ;;#ASMEND
3713 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3715 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_u:
3717 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3718 ; GFX940-NEXT: ;;#ASMSTART
3719 ; GFX940-NEXT: ; def s[0:1]
3720 ; GFX940-NEXT: ;;#ASMEND
3721 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
3722 ; GFX940-NEXT: ;;#ASMSTART
3723 ; GFX940-NEXT: ; use s8
3724 ; GFX940-NEXT: ;;#ASMEND
3725 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3726 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3727 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 poison>
3728 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3732 define void @s_shuffle_v2i16_v4i16__2_u() {
3733 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_u:
3735 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3736 ; GFX900-NEXT: ;;#ASMSTART
3737 ; GFX900-NEXT: ; def s[4:5]
3738 ; GFX900-NEXT: ;;#ASMEND
3739 ; GFX900-NEXT: s_mov_b32 s8, s5
3740 ; GFX900-NEXT: ;;#ASMSTART
3741 ; GFX900-NEXT: ; use s8
3742 ; GFX900-NEXT: ;;#ASMEND
3743 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3745 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_u:
3747 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3748 ; GFX90A-NEXT: ;;#ASMSTART
3749 ; GFX90A-NEXT: ; def s[4:5]
3750 ; GFX90A-NEXT: ;;#ASMEND
3751 ; GFX90A-NEXT: s_mov_b32 s8, s5
3752 ; GFX90A-NEXT: ;;#ASMSTART
3753 ; GFX90A-NEXT: ; use s8
3754 ; GFX90A-NEXT: ;;#ASMEND
3755 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3757 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_u:
3759 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3760 ; GFX940-NEXT: ;;#ASMSTART
3761 ; GFX940-NEXT: ; def s[0:1]
3762 ; GFX940-NEXT: ;;#ASMEND
3763 ; GFX940-NEXT: s_mov_b32 s8, s1
3764 ; GFX940-NEXT: ;;#ASMSTART
3765 ; GFX940-NEXT: ; use s8
3766 ; GFX940-NEXT: ;;#ASMEND
3767 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3768 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3769 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 poison>
3770 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3774 define void @s_shuffle_v2i16_v4i16__3_u() {
3775 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_u:
3777 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3778 ; GFX900-NEXT: ;;#ASMSTART
3779 ; GFX900-NEXT: ; def s[4:5]
3780 ; GFX900-NEXT: ;;#ASMEND
3781 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
3782 ; GFX900-NEXT: ;;#ASMSTART
3783 ; GFX900-NEXT: ; use s8
3784 ; GFX900-NEXT: ;;#ASMEND
3785 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3787 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_u:
3789 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3790 ; GFX90A-NEXT: ;;#ASMSTART
3791 ; GFX90A-NEXT: ; def s[4:5]
3792 ; GFX90A-NEXT: ;;#ASMEND
3793 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
3794 ; GFX90A-NEXT: ;;#ASMSTART
3795 ; GFX90A-NEXT: ; use s8
3796 ; GFX90A-NEXT: ;;#ASMEND
3797 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3799 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_u:
3801 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3802 ; GFX940-NEXT: ;;#ASMSTART
3803 ; GFX940-NEXT: ; def s[0:1]
3804 ; GFX940-NEXT: ;;#ASMEND
3805 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
3806 ; GFX940-NEXT: ;;#ASMSTART
3807 ; GFX940-NEXT: ; use s8
3808 ; GFX940-NEXT: ;;#ASMEND
3809 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3810 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3811 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 poison>
3812 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3816 define void @s_shuffle_v2i16_v4i16__4_u() {
3817 ; GFX9-LABEL: s_shuffle_v2i16_v4i16__4_u:
3819 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3820 ; GFX9-NEXT: ;;#ASMSTART
3821 ; GFX9-NEXT: ; use s8
3822 ; GFX9-NEXT: ;;#ASMEND
3823 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3824 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3825 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 poison>
3826 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3830 define void @s_shuffle_v2i16_v4i16__5_u() {
3831 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_u:
3833 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3834 ; GFX900-NEXT: ;;#ASMSTART
3835 ; GFX900-NEXT: ; def s[4:5]
3836 ; GFX900-NEXT: ;;#ASMEND
3837 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
3838 ; GFX900-NEXT: ;;#ASMSTART
3839 ; GFX900-NEXT: ; use s8
3840 ; GFX900-NEXT: ;;#ASMEND
3841 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3843 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_u:
3845 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3846 ; GFX90A-NEXT: ;;#ASMSTART
3847 ; GFX90A-NEXT: ; def s[4:5]
3848 ; GFX90A-NEXT: ;;#ASMEND
3849 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
3850 ; GFX90A-NEXT: ;;#ASMSTART
3851 ; GFX90A-NEXT: ; use s8
3852 ; GFX90A-NEXT: ;;#ASMEND
3853 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3855 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_u:
3857 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3858 ; GFX940-NEXT: ;;#ASMSTART
3859 ; GFX940-NEXT: ; def s[0:1]
3860 ; GFX940-NEXT: ;;#ASMEND
3861 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
3862 ; GFX940-NEXT: ;;#ASMSTART
3863 ; GFX940-NEXT: ; use s8
3864 ; GFX940-NEXT: ;;#ASMEND
3865 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3866 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3867 %vec1 = call <4 x i16> asm "; def $0", "=s"()
3868 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 poison>
3869 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3873 define void @s_shuffle_v2i16_v4i16__6_u() {
3874 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_u:
3876 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3877 ; GFX900-NEXT: ;;#ASMSTART
3878 ; GFX900-NEXT: ; def s[4:5]
3879 ; GFX900-NEXT: ;;#ASMEND
3880 ; GFX900-NEXT: s_mov_b32 s8, s5
3881 ; GFX900-NEXT: ;;#ASMSTART
3882 ; GFX900-NEXT: ; use s8
3883 ; GFX900-NEXT: ;;#ASMEND
3884 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3886 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_u:
3888 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3889 ; GFX90A-NEXT: ;;#ASMSTART
3890 ; GFX90A-NEXT: ; def s[4:5]
3891 ; GFX90A-NEXT: ;;#ASMEND
3892 ; GFX90A-NEXT: s_mov_b32 s8, s5
3893 ; GFX90A-NEXT: ;;#ASMSTART
3894 ; GFX90A-NEXT: ; use s8
3895 ; GFX90A-NEXT: ;;#ASMEND
3896 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3898 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_u:
3900 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3901 ; GFX940-NEXT: ;;#ASMSTART
3902 ; GFX940-NEXT: ; def s[0:1]
3903 ; GFX940-NEXT: ;;#ASMEND
3904 ; GFX940-NEXT: s_mov_b32 s8, s1
3905 ; GFX940-NEXT: ;;#ASMSTART
3906 ; GFX940-NEXT: ; use s8
3907 ; GFX940-NEXT: ;;#ASMEND
3908 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3909 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3910 %vec1 = call <4 x i16> asm "; def $0", "=s"()
3911 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 poison>
3912 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3916 define void @s_shuffle_v2i16_v4i16__7_u() {
3917 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_u:
3919 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3920 ; GFX900-NEXT: ;;#ASMSTART
3921 ; GFX900-NEXT: ; def s[4:5]
3922 ; GFX900-NEXT: ;;#ASMEND
3923 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
3924 ; GFX900-NEXT: ;;#ASMSTART
3925 ; GFX900-NEXT: ; use s8
3926 ; GFX900-NEXT: ;;#ASMEND
3927 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3929 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_u:
3931 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3932 ; GFX90A-NEXT: ;;#ASMSTART
3933 ; GFX90A-NEXT: ; def s[4:5]
3934 ; GFX90A-NEXT: ;;#ASMEND
3935 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
3936 ; GFX90A-NEXT: ;;#ASMSTART
3937 ; GFX90A-NEXT: ; use s8
3938 ; GFX90A-NEXT: ;;#ASMEND
3939 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3941 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_u:
3943 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3944 ; GFX940-NEXT: ;;#ASMSTART
3945 ; GFX940-NEXT: ; def s[0:1]
3946 ; GFX940-NEXT: ;;#ASMEND
3947 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
3948 ; GFX940-NEXT: ;;#ASMSTART
3949 ; GFX940-NEXT: ; use s8
3950 ; GFX940-NEXT: ;;#ASMEND
3951 ; GFX940-NEXT: s_setpc_b64 s[30:31]
3952 %vec0 = call <4 x i16> asm "; def $0", "=s"()
3953 %vec1 = call <4 x i16> asm "; def $0", "=s"()
3954 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 poison>
3955 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
3959 define void @s_shuffle_v2i16_v4i16__7_0() {
3960 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_0:
3962 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3963 ; GFX900-NEXT: ;;#ASMSTART
3964 ; GFX900-NEXT: ; def s[4:5]
3965 ; GFX900-NEXT: ;;#ASMEND
3966 ; GFX900-NEXT: ;;#ASMSTART
3967 ; GFX900-NEXT: ; def s[6:7]
3968 ; GFX900-NEXT: ;;#ASMEND
3969 ; GFX900-NEXT: s_lshr_b32 s5, s7, 16
3970 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3971 ; GFX900-NEXT: ;;#ASMSTART
3972 ; GFX900-NEXT: ; use s8
3973 ; GFX900-NEXT: ;;#ASMEND
3974 ; GFX900-NEXT: s_setpc_b64 s[30:31]
3976 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_0:
3978 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3979 ; GFX90A-NEXT: ;;#ASMSTART
3980 ; GFX90A-NEXT: ; def s[4:5]
3981 ; GFX90A-NEXT: ;;#ASMEND
3982 ; GFX90A-NEXT: ;;#ASMSTART
3983 ; GFX90A-NEXT: ; def s[6:7]
3984 ; GFX90A-NEXT: ;;#ASMEND
3985 ; GFX90A-NEXT: s_lshr_b32 s5, s7, 16
3986 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
3987 ; GFX90A-NEXT: ;;#ASMSTART
3988 ; GFX90A-NEXT: ; use s8
3989 ; GFX90A-NEXT: ;;#ASMEND
3990 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
3992 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_0:
3994 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3995 ; GFX940-NEXT: ;;#ASMSTART
3996 ; GFX940-NEXT: ; def s[0:1]
3997 ; GFX940-NEXT: ;;#ASMEND
3998 ; GFX940-NEXT: ;;#ASMSTART
3999 ; GFX940-NEXT: ; def s[2:3]
4000 ; GFX940-NEXT: ;;#ASMEND
4001 ; GFX940-NEXT: s_lshr_b32 s1, s3, 16
4002 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4003 ; GFX940-NEXT: ;;#ASMSTART
4004 ; GFX940-NEXT: ; use s8
4005 ; GFX940-NEXT: ;;#ASMEND
4006 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4007 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4008 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4009 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 0>
4010 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4014 define void @s_shuffle_v2i16_v4i16__7_1() {
4015 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_1:
4017 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4018 ; GFX900-NEXT: ;;#ASMSTART
4019 ; GFX900-NEXT: ; def s[4:5]
4020 ; GFX900-NEXT: ;;#ASMEND
4021 ; GFX900-NEXT: ;;#ASMSTART
4022 ; GFX900-NEXT: ; def s[6:7]
4023 ; GFX900-NEXT: ;;#ASMEND
4024 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4
4025 ; GFX900-NEXT: ;;#ASMSTART
4026 ; GFX900-NEXT: ; use s8
4027 ; GFX900-NEXT: ;;#ASMEND
4028 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4030 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_1:
4032 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4033 ; GFX90A-NEXT: ;;#ASMSTART
4034 ; GFX90A-NEXT: ; def s[4:5]
4035 ; GFX90A-NEXT: ;;#ASMEND
4036 ; GFX90A-NEXT: ;;#ASMSTART
4037 ; GFX90A-NEXT: ; def s[6:7]
4038 ; GFX90A-NEXT: ;;#ASMEND
4039 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4
4040 ; GFX90A-NEXT: ;;#ASMSTART
4041 ; GFX90A-NEXT: ; use s8
4042 ; GFX90A-NEXT: ;;#ASMEND
4043 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4045 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_1:
4047 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4048 ; GFX940-NEXT: ;;#ASMSTART
4049 ; GFX940-NEXT: ; def s[0:1]
4050 ; GFX940-NEXT: ;;#ASMEND
4051 ; GFX940-NEXT: ;;#ASMSTART
4052 ; GFX940-NEXT: ; def s[2:3]
4053 ; GFX940-NEXT: ;;#ASMEND
4054 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0
4055 ; GFX940-NEXT: ;;#ASMSTART
4056 ; GFX940-NEXT: ; use s8
4057 ; GFX940-NEXT: ;;#ASMEND
4058 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4059 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4060 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4061 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 1>
4062 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4066 define void @s_shuffle_v2i16_v4i16__7_2() {
4067 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_2:
4069 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4070 ; GFX900-NEXT: ;;#ASMSTART
4071 ; GFX900-NEXT: ; def s[4:5]
4072 ; GFX900-NEXT: ;;#ASMEND
4073 ; GFX900-NEXT: ;;#ASMSTART
4074 ; GFX900-NEXT: ; def s[6:7]
4075 ; GFX900-NEXT: ;;#ASMEND
4076 ; GFX900-NEXT: s_lshr_b32 s4, s7, 16
4077 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
4078 ; GFX900-NEXT: ;;#ASMSTART
4079 ; GFX900-NEXT: ; use s8
4080 ; GFX900-NEXT: ;;#ASMEND
4081 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4083 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_2:
4085 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4086 ; GFX90A-NEXT: ;;#ASMSTART
4087 ; GFX90A-NEXT: ; def s[4:5]
4088 ; GFX90A-NEXT: ;;#ASMEND
4089 ; GFX90A-NEXT: ;;#ASMSTART
4090 ; GFX90A-NEXT: ; def s[6:7]
4091 ; GFX90A-NEXT: ;;#ASMEND
4092 ; GFX90A-NEXT: s_lshr_b32 s4, s7, 16
4093 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
4094 ; GFX90A-NEXT: ;;#ASMSTART
4095 ; GFX90A-NEXT: ; use s8
4096 ; GFX90A-NEXT: ;;#ASMEND
4097 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4099 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_2:
4101 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4102 ; GFX940-NEXT: ;;#ASMSTART
4103 ; GFX940-NEXT: ; def s[0:1]
4104 ; GFX940-NEXT: ;;#ASMEND
4105 ; GFX940-NEXT: ;;#ASMSTART
4106 ; GFX940-NEXT: ; def s[2:3]
4107 ; GFX940-NEXT: ;;#ASMEND
4108 ; GFX940-NEXT: s_lshr_b32 s0, s3, 16
4109 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
4110 ; GFX940-NEXT: ;;#ASMSTART
4111 ; GFX940-NEXT: ; use s8
4112 ; GFX940-NEXT: ;;#ASMEND
4113 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4114 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4115 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4116 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 2>
4117 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4121 define void @s_shuffle_v2i16_v4i16__7_3() {
4122 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_3:
4124 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4125 ; GFX900-NEXT: ;;#ASMSTART
4126 ; GFX900-NEXT: ; def s[4:5]
4127 ; GFX900-NEXT: ;;#ASMEND
4128 ; GFX900-NEXT: ;;#ASMSTART
4129 ; GFX900-NEXT: ; def s[6:7]
4130 ; GFX900-NEXT: ;;#ASMEND
4131 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5
4132 ; GFX900-NEXT: ;;#ASMSTART
4133 ; GFX900-NEXT: ; use s8
4134 ; GFX900-NEXT: ;;#ASMEND
4135 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4137 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_3:
4139 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4140 ; GFX90A-NEXT: ;;#ASMSTART
4141 ; GFX90A-NEXT: ; def s[4:5]
4142 ; GFX90A-NEXT: ;;#ASMEND
4143 ; GFX90A-NEXT: ;;#ASMSTART
4144 ; GFX90A-NEXT: ; def s[6:7]
4145 ; GFX90A-NEXT: ;;#ASMEND
4146 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5
4147 ; GFX90A-NEXT: ;;#ASMSTART
4148 ; GFX90A-NEXT: ; use s8
4149 ; GFX90A-NEXT: ;;#ASMEND
4150 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4152 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_3:
4154 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4155 ; GFX940-NEXT: ;;#ASMSTART
4156 ; GFX940-NEXT: ; def s[0:1]
4157 ; GFX940-NEXT: ;;#ASMEND
4158 ; GFX940-NEXT: ;;#ASMSTART
4159 ; GFX940-NEXT: ; def s[2:3]
4160 ; GFX940-NEXT: ;;#ASMEND
4161 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1
4162 ; GFX940-NEXT: ;;#ASMSTART
4163 ; GFX940-NEXT: ; use s8
4164 ; GFX940-NEXT: ;;#ASMEND
4165 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4166 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4167 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4168 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 3>
4169 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4173 define void @s_shuffle_v2i16_v4i16__7_4() {
4174 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_4:
4176 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4177 ; GFX900-NEXT: ;;#ASMSTART
4178 ; GFX900-NEXT: ; def s[4:5]
4179 ; GFX900-NEXT: ;;#ASMEND
4180 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
4181 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4182 ; GFX900-NEXT: ;;#ASMSTART
4183 ; GFX900-NEXT: ; use s8
4184 ; GFX900-NEXT: ;;#ASMEND
4185 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4187 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_4:
4189 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4190 ; GFX90A-NEXT: ;;#ASMSTART
4191 ; GFX90A-NEXT: ; def s[4:5]
4192 ; GFX90A-NEXT: ;;#ASMEND
4193 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
4194 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4195 ; GFX90A-NEXT: ;;#ASMSTART
4196 ; GFX90A-NEXT: ; use s8
4197 ; GFX90A-NEXT: ;;#ASMEND
4198 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4200 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_4:
4202 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4203 ; GFX940-NEXT: ;;#ASMSTART
4204 ; GFX940-NEXT: ; def s[0:1]
4205 ; GFX940-NEXT: ;;#ASMEND
4206 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
4207 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4208 ; GFX940-NEXT: ;;#ASMSTART
4209 ; GFX940-NEXT: ; use s8
4210 ; GFX940-NEXT: ;;#ASMEND
4211 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4212 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4213 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4214 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 4>
4215 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4219 define void @s_shuffle_v2i16_v4i16__7_5() {
4220 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_5:
4222 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4223 ; GFX900-NEXT: ;;#ASMSTART
4224 ; GFX900-NEXT: ; def s[4:5]
4225 ; GFX900-NEXT: ;;#ASMEND
4226 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4
4227 ; GFX900-NEXT: ;;#ASMSTART
4228 ; GFX900-NEXT: ; use s8
4229 ; GFX900-NEXT: ;;#ASMEND
4230 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4232 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_5:
4234 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4235 ; GFX90A-NEXT: ;;#ASMSTART
4236 ; GFX90A-NEXT: ; def s[4:5]
4237 ; GFX90A-NEXT: ;;#ASMEND
4238 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4
4239 ; GFX90A-NEXT: ;;#ASMSTART
4240 ; GFX90A-NEXT: ; use s8
4241 ; GFX90A-NEXT: ;;#ASMEND
4242 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4244 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_5:
4246 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4247 ; GFX940-NEXT: ;;#ASMSTART
4248 ; GFX940-NEXT: ; def s[0:1]
4249 ; GFX940-NEXT: ;;#ASMEND
4250 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0
4251 ; GFX940-NEXT: ;;#ASMSTART
4252 ; GFX940-NEXT: ; use s8
4253 ; GFX940-NEXT: ;;#ASMEND
4254 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4255 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4256 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4257 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 5>
4258 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4262 define void @s_shuffle_v2i16_v4i16__7_6() {
4263 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_6:
4265 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4266 ; GFX900-NEXT: ;;#ASMSTART
4267 ; GFX900-NEXT: ; def s[4:5]
4268 ; GFX900-NEXT: ;;#ASMEND
4269 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
4270 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
4271 ; GFX900-NEXT: ;;#ASMSTART
4272 ; GFX900-NEXT: ; use s8
4273 ; GFX900-NEXT: ;;#ASMEND
4274 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4276 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_6:
4278 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4279 ; GFX90A-NEXT: ;;#ASMSTART
4280 ; GFX90A-NEXT: ; def s[4:5]
4281 ; GFX90A-NEXT: ;;#ASMEND
4282 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
4283 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
4284 ; GFX90A-NEXT: ;;#ASMSTART
4285 ; GFX90A-NEXT: ; use s8
4286 ; GFX90A-NEXT: ;;#ASMEND
4287 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4289 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_6:
4291 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4292 ; GFX940-NEXT: ;;#ASMSTART
4293 ; GFX940-NEXT: ; def s[0:1]
4294 ; GFX940-NEXT: ;;#ASMEND
4295 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
4296 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
4297 ; GFX940-NEXT: ;;#ASMSTART
4298 ; GFX940-NEXT: ; use s8
4299 ; GFX940-NEXT: ;;#ASMEND
4300 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4301 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4302 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4303 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 6>
4304 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4308 define void @s_shuffle_v2i16_v4i16__7_7() {
4309 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__7_7:
4311 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4312 ; GFX900-NEXT: ;;#ASMSTART
4313 ; GFX900-NEXT: ; def s[4:5]
4314 ; GFX900-NEXT: ;;#ASMEND
4315 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5
4316 ; GFX900-NEXT: ;;#ASMSTART
4317 ; GFX900-NEXT: ; use s8
4318 ; GFX900-NEXT: ;;#ASMEND
4319 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4321 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__7_7:
4323 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4324 ; GFX90A-NEXT: ;;#ASMSTART
4325 ; GFX90A-NEXT: ; def s[4:5]
4326 ; GFX90A-NEXT: ;;#ASMEND
4327 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5
4328 ; GFX90A-NEXT: ;;#ASMSTART
4329 ; GFX90A-NEXT: ; use s8
4330 ; GFX90A-NEXT: ;;#ASMEND
4331 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4333 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_7:
4335 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4336 ; GFX940-NEXT: ;;#ASMSTART
4337 ; GFX940-NEXT: ; def s[0:1]
4338 ; GFX940-NEXT: ;;#ASMEND
4339 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1
4340 ; GFX940-NEXT: ;;#ASMSTART
4341 ; GFX940-NEXT: ; use s8
4342 ; GFX940-NEXT: ;;#ASMEND
4343 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4344 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4345 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4346 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 7, i32 7>
4347 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4351 define void @s_shuffle_v2i16_v4i16__u_0() {
4352 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__u_0:
4354 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4355 ; GFX900-NEXT: ;;#ASMSTART
4356 ; GFX900-NEXT: ; def s[4:5]
4357 ; GFX900-NEXT: ;;#ASMEND
4358 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
4359 ; GFX900-NEXT: ;;#ASMSTART
4360 ; GFX900-NEXT: ; use s8
4361 ; GFX900-NEXT: ;;#ASMEND
4362 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4364 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__u_0:
4366 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4367 ; GFX90A-NEXT: ;;#ASMSTART
4368 ; GFX90A-NEXT: ; def s[4:5]
4369 ; GFX90A-NEXT: ;;#ASMEND
4370 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
4371 ; GFX90A-NEXT: ;;#ASMSTART
4372 ; GFX90A-NEXT: ; use s8
4373 ; GFX90A-NEXT: ;;#ASMEND
4374 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4376 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_0:
4378 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4379 ; GFX940-NEXT: ;;#ASMSTART
4380 ; GFX940-NEXT: ; def s[0:1]
4381 ; GFX940-NEXT: ;;#ASMEND
4382 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
4383 ; GFX940-NEXT: ;;#ASMSTART
4384 ; GFX940-NEXT: ; use s8
4385 ; GFX940-NEXT: ;;#ASMEND
4386 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4387 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4388 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 0>
4389 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4393 define void @s_shuffle_v2i16_v4i16__0_0() {
4394 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_0:
4396 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4397 ; GFX900-NEXT: ;;#ASMSTART
4398 ; GFX900-NEXT: ; def s[4:5]
4399 ; GFX900-NEXT: ;;#ASMEND
4400 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4401 ; GFX900-NEXT: ;;#ASMSTART
4402 ; GFX900-NEXT: ; use s8
4403 ; GFX900-NEXT: ;;#ASMEND
4404 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4406 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_0:
4408 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4409 ; GFX90A-NEXT: ;;#ASMSTART
4410 ; GFX90A-NEXT: ; def s[4:5]
4411 ; GFX90A-NEXT: ;;#ASMEND
4412 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4
4413 ; GFX90A-NEXT: ;;#ASMSTART
4414 ; GFX90A-NEXT: ; use s8
4415 ; GFX90A-NEXT: ;;#ASMEND
4416 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4418 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_0:
4420 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4421 ; GFX940-NEXT: ;;#ASMSTART
4422 ; GFX940-NEXT: ; def s[0:1]
4423 ; GFX940-NEXT: ;;#ASMEND
4424 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0
4425 ; GFX940-NEXT: ;;#ASMSTART
4426 ; GFX940-NEXT: ; use s8
4427 ; GFX940-NEXT: ;;#ASMEND
4428 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4429 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4430 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> zeroinitializer
4431 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4435 define void @s_shuffle_v2i16_v4i16__1_0() {
4436 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_0:
4438 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4439 ; GFX900-NEXT: ;;#ASMSTART
4440 ; GFX900-NEXT: ; def s[4:5]
4441 ; GFX900-NEXT: ;;#ASMEND
4442 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
4443 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4444 ; GFX900-NEXT: ;;#ASMSTART
4445 ; GFX900-NEXT: ; use s8
4446 ; GFX900-NEXT: ;;#ASMEND
4447 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4449 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_0:
4451 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4452 ; GFX90A-NEXT: ;;#ASMSTART
4453 ; GFX90A-NEXT: ; def s[4:5]
4454 ; GFX90A-NEXT: ;;#ASMEND
4455 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
4456 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4457 ; GFX90A-NEXT: ;;#ASMSTART
4458 ; GFX90A-NEXT: ; use s8
4459 ; GFX90A-NEXT: ;;#ASMEND
4460 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4462 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_0:
4464 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4465 ; GFX940-NEXT: ;;#ASMSTART
4466 ; GFX940-NEXT: ; def s[0:1]
4467 ; GFX940-NEXT: ;;#ASMEND
4468 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
4469 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4470 ; GFX940-NEXT: ;;#ASMSTART
4471 ; GFX940-NEXT: ; use s8
4472 ; GFX940-NEXT: ;;#ASMEND
4473 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4474 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4475 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 0>
4476 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4480 define void @s_shuffle_v2i16_v4i16__2_0() {
4481 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_0:
4483 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4484 ; GFX900-NEXT: ;;#ASMSTART
4485 ; GFX900-NEXT: ; def s[4:5]
4486 ; GFX900-NEXT: ;;#ASMEND
4487 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4488 ; GFX900-NEXT: ;;#ASMSTART
4489 ; GFX900-NEXT: ; use s8
4490 ; GFX900-NEXT: ;;#ASMEND
4491 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4493 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_0:
4495 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4496 ; GFX90A-NEXT: ;;#ASMSTART
4497 ; GFX90A-NEXT: ; def s[4:5]
4498 ; GFX90A-NEXT: ;;#ASMEND
4499 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4500 ; GFX90A-NEXT: ;;#ASMSTART
4501 ; GFX90A-NEXT: ; use s8
4502 ; GFX90A-NEXT: ;;#ASMEND
4503 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4505 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_0:
4507 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4508 ; GFX940-NEXT: ;;#ASMSTART
4509 ; GFX940-NEXT: ; def s[0:1]
4510 ; GFX940-NEXT: ;;#ASMEND
4511 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4512 ; GFX940-NEXT: ;;#ASMSTART
4513 ; GFX940-NEXT: ; use s8
4514 ; GFX940-NEXT: ;;#ASMEND
4515 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4516 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4517 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 0>
4518 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4522 define void @s_shuffle_v2i16_v4i16__3_0() {
4523 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_0:
4525 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4526 ; GFX900-NEXT: ;;#ASMSTART
4527 ; GFX900-NEXT: ; def s[4:5]
4528 ; GFX900-NEXT: ;;#ASMEND
4529 ; GFX900-NEXT: s_lshr_b32 s5, s5, 16
4530 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4531 ; GFX900-NEXT: ;;#ASMSTART
4532 ; GFX900-NEXT: ; use s8
4533 ; GFX900-NEXT: ;;#ASMEND
4534 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4536 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_0:
4538 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4539 ; GFX90A-NEXT: ;;#ASMSTART
4540 ; GFX90A-NEXT: ; def s[4:5]
4541 ; GFX90A-NEXT: ;;#ASMEND
4542 ; GFX90A-NEXT: s_lshr_b32 s5, s5, 16
4543 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4544 ; GFX90A-NEXT: ;;#ASMSTART
4545 ; GFX90A-NEXT: ; use s8
4546 ; GFX90A-NEXT: ;;#ASMEND
4547 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4549 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_0:
4551 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4552 ; GFX940-NEXT: ;;#ASMSTART
4553 ; GFX940-NEXT: ; def s[0:1]
4554 ; GFX940-NEXT: ;;#ASMEND
4555 ; GFX940-NEXT: s_lshr_b32 s1, s1, 16
4556 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4557 ; GFX940-NEXT: ;;#ASMSTART
4558 ; GFX940-NEXT: ; use s8
4559 ; GFX940-NEXT: ;;#ASMEND
4560 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4561 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4562 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 0>
4563 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4567 define void @s_shuffle_v2i16_v4i16__4_0() {
4568 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__4_0:
4570 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4571 ; GFX900-NEXT: ;;#ASMSTART
4572 ; GFX900-NEXT: ; def s[4:5]
4573 ; GFX900-NEXT: ;;#ASMEND
4574 ; GFX900-NEXT: s_lshl_b32 s8, s4, 16
4575 ; GFX900-NEXT: ;;#ASMSTART
4576 ; GFX900-NEXT: ; use s8
4577 ; GFX900-NEXT: ;;#ASMEND
4578 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4580 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__4_0:
4582 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4583 ; GFX90A-NEXT: ;;#ASMSTART
4584 ; GFX90A-NEXT: ; def s[4:5]
4585 ; GFX90A-NEXT: ;;#ASMEND
4586 ; GFX90A-NEXT: s_lshl_b32 s8, s4, 16
4587 ; GFX90A-NEXT: ;;#ASMSTART
4588 ; GFX90A-NEXT: ; use s8
4589 ; GFX90A-NEXT: ;;#ASMEND
4590 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4592 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_0:
4594 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4595 ; GFX940-NEXT: ;;#ASMSTART
4596 ; GFX940-NEXT: ; def s[0:1]
4597 ; GFX940-NEXT: ;;#ASMEND
4598 ; GFX940-NEXT: s_lshl_b32 s8, s0, 16
4599 ; GFX940-NEXT: ;;#ASMSTART
4600 ; GFX940-NEXT: ; use s8
4601 ; GFX940-NEXT: ;;#ASMEND
4602 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4603 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4604 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 0>
4605 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4609 define void @s_shuffle_v2i16_v4i16__5_0() {
4610 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_0:
4612 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4613 ; GFX900-NEXT: ;;#ASMSTART
4614 ; GFX900-NEXT: ; def s[4:5]
4615 ; GFX900-NEXT: ;;#ASMEND
4616 ; GFX900-NEXT: ;;#ASMSTART
4617 ; GFX900-NEXT: ; def s[6:7]
4618 ; GFX900-NEXT: ;;#ASMEND
4619 ; GFX900-NEXT: s_lshr_b32 s5, s6, 16
4620 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4621 ; GFX900-NEXT: ;;#ASMSTART
4622 ; GFX900-NEXT: ; use s8
4623 ; GFX900-NEXT: ;;#ASMEND
4624 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4626 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_0:
4628 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4629 ; GFX90A-NEXT: ;;#ASMSTART
4630 ; GFX90A-NEXT: ; def s[4:5]
4631 ; GFX90A-NEXT: ;;#ASMEND
4632 ; GFX90A-NEXT: ;;#ASMSTART
4633 ; GFX90A-NEXT: ; def s[6:7]
4634 ; GFX90A-NEXT: ;;#ASMEND
4635 ; GFX90A-NEXT: s_lshr_b32 s5, s6, 16
4636 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
4637 ; GFX90A-NEXT: ;;#ASMSTART
4638 ; GFX90A-NEXT: ; use s8
4639 ; GFX90A-NEXT: ;;#ASMEND
4640 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4642 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_0:
4644 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4645 ; GFX940-NEXT: ;;#ASMSTART
4646 ; GFX940-NEXT: ; def s[0:1]
4647 ; GFX940-NEXT: ;;#ASMEND
4648 ; GFX940-NEXT: ;;#ASMSTART
4649 ; GFX940-NEXT: ; def s[2:3]
4650 ; GFX940-NEXT: ;;#ASMEND
4651 ; GFX940-NEXT: s_lshr_b32 s1, s2, 16
4652 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
4653 ; GFX940-NEXT: ;;#ASMSTART
4654 ; GFX940-NEXT: ; use s8
4655 ; GFX940-NEXT: ;;#ASMEND
4656 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4657 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4658 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4659 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 0>
4660 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4664 define void @s_shuffle_v2i16_v4i16__6_0() {
4665 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_0:
4667 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4668 ; GFX900-NEXT: ;;#ASMSTART
4669 ; GFX900-NEXT: ; def s[4:5]
4670 ; GFX900-NEXT: ;;#ASMEND
4671 ; GFX900-NEXT: ;;#ASMSTART
4672 ; GFX900-NEXT: ; def s[6:7]
4673 ; GFX900-NEXT: ;;#ASMEND
4674 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4
4675 ; GFX900-NEXT: ;;#ASMSTART
4676 ; GFX900-NEXT: ; use s8
4677 ; GFX900-NEXT: ;;#ASMEND
4678 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4680 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_0:
4682 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4683 ; GFX90A-NEXT: ;;#ASMSTART
4684 ; GFX90A-NEXT: ; def s[4:5]
4685 ; GFX90A-NEXT: ;;#ASMEND
4686 ; GFX90A-NEXT: ;;#ASMSTART
4687 ; GFX90A-NEXT: ; def s[6:7]
4688 ; GFX90A-NEXT: ;;#ASMEND
4689 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4
4690 ; GFX90A-NEXT: ;;#ASMSTART
4691 ; GFX90A-NEXT: ; use s8
4692 ; GFX90A-NEXT: ;;#ASMEND
4693 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4695 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_0:
4697 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4698 ; GFX940-NEXT: ;;#ASMSTART
4699 ; GFX940-NEXT: ; def s[0:1]
4700 ; GFX940-NEXT: ;;#ASMEND
4701 ; GFX940-NEXT: ;;#ASMSTART
4702 ; GFX940-NEXT: ; def s[2:3]
4703 ; GFX940-NEXT: ;;#ASMEND
4704 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0
4705 ; GFX940-NEXT: ;;#ASMSTART
4706 ; GFX940-NEXT: ; use s8
4707 ; GFX940-NEXT: ;;#ASMEND
4708 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4709 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4710 %vec1 = call <4 x i16> asm "; def $0", "=s"()
4711 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 0>
4712 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4716 define void @s_shuffle_v2i16_v4i16__u_1() {
4717 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__u_1:
4719 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4720 ; GFX900-NEXT: ;;#ASMSTART
4721 ; GFX900-NEXT: ; def s[8:9]
4722 ; GFX900-NEXT: ;;#ASMEND
4723 ; GFX900-NEXT: ;;#ASMSTART
4724 ; GFX900-NEXT: ; use s8
4725 ; GFX900-NEXT: ;;#ASMEND
4726 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4728 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__u_1:
4730 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4731 ; GFX90A-NEXT: ;;#ASMSTART
4732 ; GFX90A-NEXT: ; def s[8:9]
4733 ; GFX90A-NEXT: ;;#ASMEND
4734 ; GFX90A-NEXT: ;;#ASMSTART
4735 ; GFX90A-NEXT: ; use s8
4736 ; GFX90A-NEXT: ;;#ASMEND
4737 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4739 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_1:
4741 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4742 ; GFX940-NEXT: ;;#ASMSTART
4743 ; GFX940-NEXT: ; def s[8:9]
4744 ; GFX940-NEXT: ;;#ASMEND
4745 ; GFX940-NEXT: s_nop 0
4746 ; GFX940-NEXT: ;;#ASMSTART
4747 ; GFX940-NEXT: ; use s8
4748 ; GFX940-NEXT: ;;#ASMEND
4749 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4750 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4751 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 1>
4752 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4756 define void @s_shuffle_v2i16_v4i16__0_1() {
4757 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_1:
4759 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4760 ; GFX900-NEXT: ;;#ASMSTART
4761 ; GFX900-NEXT: ; def s[8:9]
4762 ; GFX900-NEXT: ;;#ASMEND
4763 ; GFX900-NEXT: ;;#ASMSTART
4764 ; GFX900-NEXT: ; use s8
4765 ; GFX900-NEXT: ;;#ASMEND
4766 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4768 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_1:
4770 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4771 ; GFX90A-NEXT: ;;#ASMSTART
4772 ; GFX90A-NEXT: ; def s[8:9]
4773 ; GFX90A-NEXT: ;;#ASMEND
4774 ; GFX90A-NEXT: ;;#ASMSTART
4775 ; GFX90A-NEXT: ; use s8
4776 ; GFX90A-NEXT: ;;#ASMEND
4777 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4779 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_1:
4781 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4782 ; GFX940-NEXT: ;;#ASMSTART
4783 ; GFX940-NEXT: ; def s[8:9]
4784 ; GFX940-NEXT: ;;#ASMEND
4785 ; GFX940-NEXT: s_nop 0
4786 ; GFX940-NEXT: ;;#ASMSTART
4787 ; GFX940-NEXT: ; use s8
4788 ; GFX940-NEXT: ;;#ASMEND
4789 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4790 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4791 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
4792 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4796 define void @s_shuffle_v2i16_v4i16__1_1() {
4797 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_1:
4799 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4800 ; GFX900-NEXT: ;;#ASMSTART
4801 ; GFX900-NEXT: ; def s[4:5]
4802 ; GFX900-NEXT: ;;#ASMEND
4803 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4
4804 ; GFX900-NEXT: ;;#ASMSTART
4805 ; GFX900-NEXT: ; use s8
4806 ; GFX900-NEXT: ;;#ASMEND
4807 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4809 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_1:
4811 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4812 ; GFX90A-NEXT: ;;#ASMSTART
4813 ; GFX90A-NEXT: ; def s[4:5]
4814 ; GFX90A-NEXT: ;;#ASMEND
4815 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4
4816 ; GFX90A-NEXT: ;;#ASMSTART
4817 ; GFX90A-NEXT: ; use s8
4818 ; GFX90A-NEXT: ;;#ASMEND
4819 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4821 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_1:
4823 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4824 ; GFX940-NEXT: ;;#ASMSTART
4825 ; GFX940-NEXT: ; def s[0:1]
4826 ; GFX940-NEXT: ;;#ASMEND
4827 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0
4828 ; GFX940-NEXT: ;;#ASMSTART
4829 ; GFX940-NEXT: ; use s8
4830 ; GFX940-NEXT: ;;#ASMEND
4831 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4832 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4833 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 1>
4834 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4838 define void @s_shuffle_v2i16_v4i16__2_1() {
4839 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_1:
4841 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4842 ; GFX900-NEXT: ;;#ASMSTART
4843 ; GFX900-NEXT: ; def s[4:5]
4844 ; GFX900-NEXT: ;;#ASMEND
4845 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4
4846 ; GFX900-NEXT: ;;#ASMSTART
4847 ; GFX900-NEXT: ; use s8
4848 ; GFX900-NEXT: ;;#ASMEND
4849 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4851 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_1:
4853 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4854 ; GFX90A-NEXT: ;;#ASMSTART
4855 ; GFX90A-NEXT: ; def s[4:5]
4856 ; GFX90A-NEXT: ;;#ASMEND
4857 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4
4858 ; GFX90A-NEXT: ;;#ASMSTART
4859 ; GFX90A-NEXT: ; use s8
4860 ; GFX90A-NEXT: ;;#ASMEND
4861 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4863 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_1:
4865 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4866 ; GFX940-NEXT: ;;#ASMSTART
4867 ; GFX940-NEXT: ; def s[0:1]
4868 ; GFX940-NEXT: ;;#ASMEND
4869 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0
4870 ; GFX940-NEXT: ;;#ASMSTART
4871 ; GFX940-NEXT: ; use s8
4872 ; GFX940-NEXT: ;;#ASMEND
4873 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4874 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4875 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 1>
4876 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4880 define void @s_shuffle_v2i16_v4i16__3_1() {
4881 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_1:
4883 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4884 ; GFX900-NEXT: ;;#ASMSTART
4885 ; GFX900-NEXT: ; def s[4:5]
4886 ; GFX900-NEXT: ;;#ASMEND
4887 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4
4888 ; GFX900-NEXT: ;;#ASMSTART
4889 ; GFX900-NEXT: ; use s8
4890 ; GFX900-NEXT: ;;#ASMEND
4891 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4893 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_1:
4895 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4896 ; GFX90A-NEXT: ;;#ASMSTART
4897 ; GFX90A-NEXT: ; def s[4:5]
4898 ; GFX90A-NEXT: ;;#ASMEND
4899 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4
4900 ; GFX90A-NEXT: ;;#ASMSTART
4901 ; GFX90A-NEXT: ; use s8
4902 ; GFX90A-NEXT: ;;#ASMEND
4903 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4905 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_1:
4907 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4908 ; GFX940-NEXT: ;;#ASMSTART
4909 ; GFX940-NEXT: ; def s[0:1]
4910 ; GFX940-NEXT: ;;#ASMEND
4911 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0
4912 ; GFX940-NEXT: ;;#ASMSTART
4913 ; GFX940-NEXT: ; use s8
4914 ; GFX940-NEXT: ;;#ASMEND
4915 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4916 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4917 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 1>
4918 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4922 define void @s_shuffle_v2i16_v4i16__4_1() {
4923 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__4_1:
4925 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4926 ; GFX900-NEXT: ;;#ASMSTART
4927 ; GFX900-NEXT: ; def s[8:9]
4928 ; GFX900-NEXT: ;;#ASMEND
4929 ; GFX900-NEXT: ;;#ASMSTART
4930 ; GFX900-NEXT: ; use s8
4931 ; GFX900-NEXT: ;;#ASMEND
4932 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4934 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__4_1:
4936 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4937 ; GFX90A-NEXT: ;;#ASMSTART
4938 ; GFX90A-NEXT: ; def s[8:9]
4939 ; GFX90A-NEXT: ;;#ASMEND
4940 ; GFX90A-NEXT: ;;#ASMSTART
4941 ; GFX90A-NEXT: ; use s8
4942 ; GFX90A-NEXT: ;;#ASMEND
4943 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4945 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_1:
4947 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4948 ; GFX940-NEXT: ;;#ASMSTART
4949 ; GFX940-NEXT: ; def s[8:9]
4950 ; GFX940-NEXT: ;;#ASMEND
4951 ; GFX940-NEXT: s_nop 0
4952 ; GFX940-NEXT: ;;#ASMSTART
4953 ; GFX940-NEXT: ; use s8
4954 ; GFX940-NEXT: ;;#ASMEND
4955 ; GFX940-NEXT: s_setpc_b64 s[30:31]
4956 %vec0 = call <4 x i16> asm "; def $0", "=s"()
4957 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 1>
4958 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
4962 define void @s_shuffle_v2i16_v4i16__5_1() {
4963 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_1:
4965 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4966 ; GFX900-NEXT: ;;#ASMSTART
4967 ; GFX900-NEXT: ; def s[4:5]
4968 ; GFX900-NEXT: ;;#ASMEND
4969 ; GFX900-NEXT: ;;#ASMSTART
4970 ; GFX900-NEXT: ; def s[6:7]
4971 ; GFX900-NEXT: ;;#ASMEND
4972 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s4
4973 ; GFX900-NEXT: ;;#ASMSTART
4974 ; GFX900-NEXT: ; use s8
4975 ; GFX900-NEXT: ;;#ASMEND
4976 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4978 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_1:
4980 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4981 ; GFX90A-NEXT: ;;#ASMSTART
4982 ; GFX90A-NEXT: ; def s[4:5]
4983 ; GFX90A-NEXT: ;;#ASMEND
4984 ; GFX90A-NEXT: ;;#ASMSTART
4985 ; GFX90A-NEXT: ; def s[6:7]
4986 ; GFX90A-NEXT: ;;#ASMEND
4987 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s4
4988 ; GFX90A-NEXT: ;;#ASMSTART
4989 ; GFX90A-NEXT: ; use s8
4990 ; GFX90A-NEXT: ;;#ASMEND
4991 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
4993 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_1:
4995 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4996 ; GFX940-NEXT: ;;#ASMSTART
4997 ; GFX940-NEXT: ; def s[0:1]
4998 ; GFX940-NEXT: ;;#ASMEND
4999 ; GFX940-NEXT: ;;#ASMSTART
5000 ; GFX940-NEXT: ; def s[2:3]
5001 ; GFX940-NEXT: ;;#ASMEND
5002 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0
5003 ; GFX940-NEXT: ;;#ASMSTART
5004 ; GFX940-NEXT: ; use s8
5005 ; GFX940-NEXT: ;;#ASMEND
5006 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5007 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5008 %vec1 = call <4 x i16> asm "; def $0", "=s"()
5009 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 1>
5010 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5014 define void @s_shuffle_v2i16_v4i16__6_1() {
5015 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_1:
5017 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5018 ; GFX900-NEXT: ;;#ASMSTART
5019 ; GFX900-NEXT: ; def s[4:5]
5020 ; GFX900-NEXT: ;;#ASMEND
5021 ; GFX900-NEXT: ;;#ASMSTART
5022 ; GFX900-NEXT: ; def s[6:7]
5023 ; GFX900-NEXT: ;;#ASMEND
5024 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4
5025 ; GFX900-NEXT: ;;#ASMSTART
5026 ; GFX900-NEXT: ; use s8
5027 ; GFX900-NEXT: ;;#ASMEND
5028 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5030 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_1:
5032 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5033 ; GFX90A-NEXT: ;;#ASMSTART
5034 ; GFX90A-NEXT: ; def s[4:5]
5035 ; GFX90A-NEXT: ;;#ASMEND
5036 ; GFX90A-NEXT: ;;#ASMSTART
5037 ; GFX90A-NEXT: ; def s[6:7]
5038 ; GFX90A-NEXT: ;;#ASMEND
5039 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4
5040 ; GFX90A-NEXT: ;;#ASMSTART
5041 ; GFX90A-NEXT: ; use s8
5042 ; GFX90A-NEXT: ;;#ASMEND
5043 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5045 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_1:
5047 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5048 ; GFX940-NEXT: ;;#ASMSTART
5049 ; GFX940-NEXT: ; def s[0:1]
5050 ; GFX940-NEXT: ;;#ASMEND
5051 ; GFX940-NEXT: ;;#ASMSTART
5052 ; GFX940-NEXT: ; def s[2:3]
5053 ; GFX940-NEXT: ;;#ASMEND
5054 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0
5055 ; GFX940-NEXT: ;;#ASMSTART
5056 ; GFX940-NEXT: ; use s8
5057 ; GFX940-NEXT: ;;#ASMEND
5058 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5059 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5060 %vec1 = call <4 x i16> asm "; def $0", "=s"()
5061 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 1>
5062 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5066 define void @s_shuffle_v2i16_v4i16__u_2() {
5067 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__u_2:
5069 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5070 ; GFX900-NEXT: ;;#ASMSTART
5071 ; GFX900-NEXT: ; def s[4:5]
5072 ; GFX900-NEXT: ;;#ASMEND
5073 ; GFX900-NEXT: s_lshl_b32 s8, s5, 16
5074 ; GFX900-NEXT: ;;#ASMSTART
5075 ; GFX900-NEXT: ; use s8
5076 ; GFX900-NEXT: ;;#ASMEND
5077 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5079 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__u_2:
5081 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5082 ; GFX90A-NEXT: ;;#ASMSTART
5083 ; GFX90A-NEXT: ; def s[4:5]
5084 ; GFX90A-NEXT: ;;#ASMEND
5085 ; GFX90A-NEXT: s_lshl_b32 s8, s5, 16
5086 ; GFX90A-NEXT: ;;#ASMSTART
5087 ; GFX90A-NEXT: ; use s8
5088 ; GFX90A-NEXT: ;;#ASMEND
5089 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5091 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_2:
5093 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5094 ; GFX940-NEXT: ;;#ASMSTART
5095 ; GFX940-NEXT: ; def s[0:1]
5096 ; GFX940-NEXT: ;;#ASMEND
5097 ; GFX940-NEXT: s_lshl_b32 s8, s1, 16
5098 ; GFX940-NEXT: ;;#ASMSTART
5099 ; GFX940-NEXT: ; use s8
5100 ; GFX940-NEXT: ;;#ASMEND
5101 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5102 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5103 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 2>
5104 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5108 define void @s_shuffle_v2i16_v4i16__0_2() {
5109 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_2:
5111 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5112 ; GFX900-NEXT: ;;#ASMSTART
5113 ; GFX900-NEXT: ; def s[4:5]
5114 ; GFX900-NEXT: ;;#ASMEND
5115 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5116 ; GFX900-NEXT: ;;#ASMSTART
5117 ; GFX900-NEXT: ; use s8
5118 ; GFX900-NEXT: ;;#ASMEND
5119 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5121 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_2:
5123 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5124 ; GFX90A-NEXT: ;;#ASMSTART
5125 ; GFX90A-NEXT: ; def s[4:5]
5126 ; GFX90A-NEXT: ;;#ASMEND
5127 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5128 ; GFX90A-NEXT: ;;#ASMSTART
5129 ; GFX90A-NEXT: ; use s8
5130 ; GFX90A-NEXT: ;;#ASMEND
5131 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5133 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_2:
5135 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5136 ; GFX940-NEXT: ;;#ASMSTART
5137 ; GFX940-NEXT: ; def s[0:1]
5138 ; GFX940-NEXT: ;;#ASMEND
5139 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
5140 ; GFX940-NEXT: ;;#ASMSTART
5141 ; GFX940-NEXT: ; use s8
5142 ; GFX940-NEXT: ;;#ASMEND
5143 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5144 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5145 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 2>
5146 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5150 define void @s_shuffle_v2i16_v4i16__1_2() {
5151 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_2:
5153 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5154 ; GFX900-NEXT: ;;#ASMSTART
5155 ; GFX900-NEXT: ; def s[4:5]
5156 ; GFX900-NEXT: ;;#ASMEND
5157 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
5158 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5159 ; GFX900-NEXT: ;;#ASMSTART
5160 ; GFX900-NEXT: ; use s8
5161 ; GFX900-NEXT: ;;#ASMEND
5162 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5164 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_2:
5166 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5167 ; GFX90A-NEXT: ;;#ASMSTART
5168 ; GFX90A-NEXT: ; def s[4:5]
5169 ; GFX90A-NEXT: ;;#ASMEND
5170 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
5171 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5172 ; GFX90A-NEXT: ;;#ASMSTART
5173 ; GFX90A-NEXT: ; use s8
5174 ; GFX90A-NEXT: ;;#ASMEND
5175 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5177 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_2:
5179 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5180 ; GFX940-NEXT: ;;#ASMSTART
5181 ; GFX940-NEXT: ; def s[0:1]
5182 ; GFX940-NEXT: ;;#ASMEND
5183 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
5184 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
5185 ; GFX940-NEXT: ;;#ASMSTART
5186 ; GFX940-NEXT: ; use s8
5187 ; GFX940-NEXT: ;;#ASMEND
5188 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5189 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5190 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
5191 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5195 define void @s_shuffle_v2i16_v4i16__2_2() {
5196 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_2:
5198 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5199 ; GFX900-NEXT: ;;#ASMSTART
5200 ; GFX900-NEXT: ; def s[4:5]
5201 ; GFX900-NEXT: ;;#ASMEND
5202 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5203 ; GFX900-NEXT: ;;#ASMSTART
5204 ; GFX900-NEXT: ; use s8
5205 ; GFX900-NEXT: ;;#ASMEND
5206 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5208 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_2:
5210 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5211 ; GFX90A-NEXT: ;;#ASMSTART
5212 ; GFX90A-NEXT: ; def s[4:5]
5213 ; GFX90A-NEXT: ;;#ASMEND
5214 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
5215 ; GFX90A-NEXT: ;;#ASMSTART
5216 ; GFX90A-NEXT: ; use s8
5217 ; GFX90A-NEXT: ;;#ASMEND
5218 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5220 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_2:
5222 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5223 ; GFX940-NEXT: ;;#ASMSTART
5224 ; GFX940-NEXT: ; def s[0:1]
5225 ; GFX940-NEXT: ;;#ASMEND
5226 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
5227 ; GFX940-NEXT: ;;#ASMSTART
5228 ; GFX940-NEXT: ; use s8
5229 ; GFX940-NEXT: ;;#ASMEND
5230 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5231 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5232 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 2>
5233 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5237 define void @s_shuffle_v2i16_v4i16__3_2() {
5238 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_2:
5240 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5241 ; GFX900-NEXT: ;;#ASMSTART
5242 ; GFX900-NEXT: ; def s[4:5]
5243 ; GFX900-NEXT: ;;#ASMEND
5244 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
5245 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5246 ; GFX900-NEXT: ;;#ASMSTART
5247 ; GFX900-NEXT: ; use s8
5248 ; GFX900-NEXT: ;;#ASMEND
5249 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5251 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_2:
5253 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5254 ; GFX90A-NEXT: ;;#ASMSTART
5255 ; GFX90A-NEXT: ; def s[4:5]
5256 ; GFX90A-NEXT: ;;#ASMEND
5257 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
5258 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5259 ; GFX90A-NEXT: ;;#ASMSTART
5260 ; GFX90A-NEXT: ; use s8
5261 ; GFX90A-NEXT: ;;#ASMEND
5262 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5264 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_2:
5266 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5267 ; GFX940-NEXT: ;;#ASMSTART
5268 ; GFX940-NEXT: ; def s[0:1]
5269 ; GFX940-NEXT: ;;#ASMEND
5270 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
5271 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
5272 ; GFX940-NEXT: ;;#ASMSTART
5273 ; GFX940-NEXT: ; use s8
5274 ; GFX940-NEXT: ;;#ASMEND
5275 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5276 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5277 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 2>
5278 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5282 define void @s_shuffle_v2i16_v4i16__4_2() {
5283 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__4_2:
5285 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5286 ; GFX900-NEXT: ;;#ASMSTART
5287 ; GFX900-NEXT: ; def s[4:5]
5288 ; GFX900-NEXT: ;;#ASMEND
5289 ; GFX900-NEXT: s_lshl_b32 s8, s5, 16
5290 ; GFX900-NEXT: ;;#ASMSTART
5291 ; GFX900-NEXT: ; use s8
5292 ; GFX900-NEXT: ;;#ASMEND
5293 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5295 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__4_2:
5297 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5298 ; GFX90A-NEXT: ;;#ASMSTART
5299 ; GFX90A-NEXT: ; def s[4:5]
5300 ; GFX90A-NEXT: ;;#ASMEND
5301 ; GFX90A-NEXT: s_lshl_b32 s8, s5, 16
5302 ; GFX90A-NEXT: ;;#ASMSTART
5303 ; GFX90A-NEXT: ; use s8
5304 ; GFX90A-NEXT: ;;#ASMEND
5305 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5307 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_2:
5309 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5310 ; GFX940-NEXT: ;;#ASMSTART
5311 ; GFX940-NEXT: ; def s[0:1]
5312 ; GFX940-NEXT: ;;#ASMEND
5313 ; GFX940-NEXT: s_lshl_b32 s8, s1, 16
5314 ; GFX940-NEXT: ;;#ASMSTART
5315 ; GFX940-NEXT: ; use s8
5316 ; GFX940-NEXT: ;;#ASMEND
5317 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5318 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5319 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 2>
5320 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5324 define void @s_shuffle_v2i16_v4i16__5_2() {
5325 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_2:
5327 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5328 ; GFX900-NEXT: ;;#ASMSTART
5329 ; GFX900-NEXT: ; def s[4:5]
5330 ; GFX900-NEXT: ;;#ASMEND
5331 ; GFX900-NEXT: ;;#ASMSTART
5332 ; GFX900-NEXT: ; def s[6:7]
5333 ; GFX900-NEXT: ;;#ASMEND
5334 ; GFX900-NEXT: s_lshr_b32 s4, s6, 16
5335 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5336 ; GFX900-NEXT: ;;#ASMSTART
5337 ; GFX900-NEXT: ; use s8
5338 ; GFX900-NEXT: ;;#ASMEND
5339 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5341 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_2:
5343 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5344 ; GFX90A-NEXT: ;;#ASMSTART
5345 ; GFX90A-NEXT: ; def s[4:5]
5346 ; GFX90A-NEXT: ;;#ASMEND
5347 ; GFX90A-NEXT: ;;#ASMSTART
5348 ; GFX90A-NEXT: ; def s[6:7]
5349 ; GFX90A-NEXT: ;;#ASMEND
5350 ; GFX90A-NEXT: s_lshr_b32 s4, s6, 16
5351 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
5352 ; GFX90A-NEXT: ;;#ASMSTART
5353 ; GFX90A-NEXT: ; use s8
5354 ; GFX90A-NEXT: ;;#ASMEND
5355 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5357 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_2:
5359 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5360 ; GFX940-NEXT: ;;#ASMSTART
5361 ; GFX940-NEXT: ; def s[0:1]
5362 ; GFX940-NEXT: ;;#ASMEND
5363 ; GFX940-NEXT: ;;#ASMSTART
5364 ; GFX940-NEXT: ; def s[2:3]
5365 ; GFX940-NEXT: ;;#ASMEND
5366 ; GFX940-NEXT: s_lshr_b32 s0, s2, 16
5367 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
5368 ; GFX940-NEXT: ;;#ASMSTART
5369 ; GFX940-NEXT: ; use s8
5370 ; GFX940-NEXT: ;;#ASMEND
5371 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5372 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5373 %vec1 = call <4 x i16> asm "; def $0", "=s"()
5374 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 2>
5375 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5379 define void @s_shuffle_v2i16_v4i16__6_2() {
5380 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_2:
5382 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5383 ; GFX900-NEXT: ;;#ASMSTART
5384 ; GFX900-NEXT: ; def s[4:5]
5385 ; GFX900-NEXT: ;;#ASMEND
5386 ; GFX900-NEXT: ;;#ASMSTART
5387 ; GFX900-NEXT: ; def s[6:7]
5388 ; GFX900-NEXT: ;;#ASMEND
5389 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5
5390 ; GFX900-NEXT: ;;#ASMSTART
5391 ; GFX900-NEXT: ; use s8
5392 ; GFX900-NEXT: ;;#ASMEND
5393 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5395 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_2:
5397 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5398 ; GFX90A-NEXT: ;;#ASMSTART
5399 ; GFX90A-NEXT: ; def s[4:5]
5400 ; GFX90A-NEXT: ;;#ASMEND
5401 ; GFX90A-NEXT: ;;#ASMSTART
5402 ; GFX90A-NEXT: ; def s[6:7]
5403 ; GFX90A-NEXT: ;;#ASMEND
5404 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5
5405 ; GFX90A-NEXT: ;;#ASMSTART
5406 ; GFX90A-NEXT: ; use s8
5407 ; GFX90A-NEXT: ;;#ASMEND
5408 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5410 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_2:
5412 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5413 ; GFX940-NEXT: ;;#ASMSTART
5414 ; GFX940-NEXT: ; def s[0:1]
5415 ; GFX940-NEXT: ;;#ASMEND
5416 ; GFX940-NEXT: ;;#ASMSTART
5417 ; GFX940-NEXT: ; def s[2:3]
5418 ; GFX940-NEXT: ;;#ASMEND
5419 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1
5420 ; GFX940-NEXT: ;;#ASMSTART
5421 ; GFX940-NEXT: ; use s8
5422 ; GFX940-NEXT: ;;#ASMEND
5423 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5424 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5425 %vec1 = call <4 x i16> asm "; def $0", "=s"()
5426 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 2>
5427 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5431 define void @s_shuffle_v2i16_v4i16__u_3() {
5432 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__u_3:
5434 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5435 ; GFX900-NEXT: ;;#ASMSTART
5436 ; GFX900-NEXT: ; def s[4:5]
5437 ; GFX900-NEXT: ;;#ASMEND
5438 ; GFX900-NEXT: s_mov_b32 s8, s5
5439 ; GFX900-NEXT: ;;#ASMSTART
5440 ; GFX900-NEXT: ; use s8
5441 ; GFX900-NEXT: ;;#ASMEND
5442 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5444 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__u_3:
5446 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5447 ; GFX90A-NEXT: ;;#ASMSTART
5448 ; GFX90A-NEXT: ; def s[4:5]
5449 ; GFX90A-NEXT: ;;#ASMEND
5450 ; GFX90A-NEXT: s_mov_b32 s8, s5
5451 ; GFX90A-NEXT: ;;#ASMSTART
5452 ; GFX90A-NEXT: ; use s8
5453 ; GFX90A-NEXT: ;;#ASMEND
5454 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5456 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_3:
5458 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5459 ; GFX940-NEXT: ;;#ASMSTART
5460 ; GFX940-NEXT: ; def s[0:1]
5461 ; GFX940-NEXT: ;;#ASMEND
5462 ; GFX940-NEXT: s_mov_b32 s8, s1
5463 ; GFX940-NEXT: ;;#ASMSTART
5464 ; GFX940-NEXT: ; use s8
5465 ; GFX940-NEXT: ;;#ASMEND
5466 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5467 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5468 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
5469 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5473 define void @s_shuffle_v2i16_v4i16__0_3() {
5474 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_3:
5476 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5477 ; GFX900-NEXT: ;;#ASMSTART
5478 ; GFX900-NEXT: ; def s[4:5]
5479 ; GFX900-NEXT: ;;#ASMEND
5480 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5
5481 ; GFX900-NEXT: ;;#ASMSTART
5482 ; GFX900-NEXT: ; use s8
5483 ; GFX900-NEXT: ;;#ASMEND
5484 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5486 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_3:
5488 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5489 ; GFX90A-NEXT: ;;#ASMSTART
5490 ; GFX90A-NEXT: ; def s[4:5]
5491 ; GFX90A-NEXT: ;;#ASMEND
5492 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5
5493 ; GFX90A-NEXT: ;;#ASMSTART
5494 ; GFX90A-NEXT: ; use s8
5495 ; GFX90A-NEXT: ;;#ASMEND
5496 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5498 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_3:
5500 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5501 ; GFX940-NEXT: ;;#ASMSTART
5502 ; GFX940-NEXT: ; def s[0:1]
5503 ; GFX940-NEXT: ;;#ASMEND
5504 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1
5505 ; GFX940-NEXT: ;;#ASMSTART
5506 ; GFX940-NEXT: ; use s8
5507 ; GFX940-NEXT: ;;#ASMEND
5508 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5509 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5510 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 3>
5511 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5515 define void @s_shuffle_v2i16_v4i16__1_3() {
5516 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_3:
5518 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5519 ; GFX900-NEXT: ;;#ASMSTART
5520 ; GFX900-NEXT: ; def s[4:5]
5521 ; GFX900-NEXT: ;;#ASMEND
5522 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5
5523 ; GFX900-NEXT: ;;#ASMSTART
5524 ; GFX900-NEXT: ; use s8
5525 ; GFX900-NEXT: ;;#ASMEND
5526 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5528 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_3:
5530 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5531 ; GFX90A-NEXT: ;;#ASMSTART
5532 ; GFX90A-NEXT: ; def s[4:5]
5533 ; GFX90A-NEXT: ;;#ASMEND
5534 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5
5535 ; GFX90A-NEXT: ;;#ASMSTART
5536 ; GFX90A-NEXT: ; use s8
5537 ; GFX90A-NEXT: ;;#ASMEND
5538 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5540 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_3:
5542 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5543 ; GFX940-NEXT: ;;#ASMSTART
5544 ; GFX940-NEXT: ; def s[0:1]
5545 ; GFX940-NEXT: ;;#ASMEND
5546 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1
5547 ; GFX940-NEXT: ;;#ASMSTART
5548 ; GFX940-NEXT: ; use s8
5549 ; GFX940-NEXT: ;;#ASMEND
5550 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5551 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5552 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 3>
5553 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5557 define void @s_shuffle_v2i16_v4i16__2_3() {
5558 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_3:
5560 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5561 ; GFX900-NEXT: ;;#ASMSTART
5562 ; GFX900-NEXT: ; def s[4:5]
5563 ; GFX900-NEXT: ;;#ASMEND
5564 ; GFX900-NEXT: s_mov_b32 s8, s5
5565 ; GFX900-NEXT: ;;#ASMSTART
5566 ; GFX900-NEXT: ; use s8
5567 ; GFX900-NEXT: ;;#ASMEND
5568 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5570 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_3:
5572 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5573 ; GFX90A-NEXT: ;;#ASMSTART
5574 ; GFX90A-NEXT: ; def s[4:5]
5575 ; GFX90A-NEXT: ;;#ASMEND
5576 ; GFX90A-NEXT: s_mov_b32 s8, s5
5577 ; GFX90A-NEXT: ;;#ASMSTART
5578 ; GFX90A-NEXT: ; use s8
5579 ; GFX90A-NEXT: ;;#ASMEND
5580 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5582 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_3:
5584 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5585 ; GFX940-NEXT: ;;#ASMSTART
5586 ; GFX940-NEXT: ; def s[0:1]
5587 ; GFX940-NEXT: ;;#ASMEND
5588 ; GFX940-NEXT: s_mov_b32 s8, s1
5589 ; GFX940-NEXT: ;;#ASMSTART
5590 ; GFX940-NEXT: ; use s8
5591 ; GFX940-NEXT: ;;#ASMEND
5592 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5593 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5594 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
5595 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5599 define void @s_shuffle_v2i16_v4i16__3_3() {
5600 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_3:
5602 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5603 ; GFX900-NEXT: ;;#ASMSTART
5604 ; GFX900-NEXT: ; def s[4:5]
5605 ; GFX900-NEXT: ;;#ASMEND
5606 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5
5607 ; GFX900-NEXT: ;;#ASMSTART
5608 ; GFX900-NEXT: ; use s8
5609 ; GFX900-NEXT: ;;#ASMEND
5610 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5612 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_3:
5614 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5615 ; GFX90A-NEXT: ;;#ASMSTART
5616 ; GFX90A-NEXT: ; def s[4:5]
5617 ; GFX90A-NEXT: ;;#ASMEND
5618 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5
5619 ; GFX90A-NEXT: ;;#ASMSTART
5620 ; GFX90A-NEXT: ; use s8
5621 ; GFX90A-NEXT: ;;#ASMEND
5622 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5624 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_3:
5626 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5627 ; GFX940-NEXT: ;;#ASMSTART
5628 ; GFX940-NEXT: ; def s[0:1]
5629 ; GFX940-NEXT: ;;#ASMEND
5630 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1
5631 ; GFX940-NEXT: ;;#ASMSTART
5632 ; GFX940-NEXT: ; use s8
5633 ; GFX940-NEXT: ;;#ASMEND
5634 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5635 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5636 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 3>
5637 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5641 define void @s_shuffle_v2i16_v4i16__4_3() {
5642 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__4_3:
5644 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5645 ; GFX900-NEXT: ;;#ASMSTART
5646 ; GFX900-NEXT: ; def s[4:5]
5647 ; GFX900-NEXT: ;;#ASMEND
5648 ; GFX900-NEXT: s_mov_b32 s8, s5
5649 ; GFX900-NEXT: ;;#ASMSTART
5650 ; GFX900-NEXT: ; use s8
5651 ; GFX900-NEXT: ;;#ASMEND
5652 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5654 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__4_3:
5656 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5657 ; GFX90A-NEXT: ;;#ASMSTART
5658 ; GFX90A-NEXT: ; def s[4:5]
5659 ; GFX90A-NEXT: ;;#ASMEND
5660 ; GFX90A-NEXT: s_mov_b32 s8, s5
5661 ; GFX90A-NEXT: ;;#ASMSTART
5662 ; GFX90A-NEXT: ; use s8
5663 ; GFX90A-NEXT: ;;#ASMEND
5664 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5666 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_3:
5668 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5669 ; GFX940-NEXT: ;;#ASMSTART
5670 ; GFX940-NEXT: ; def s[0:1]
5671 ; GFX940-NEXT: ;;#ASMEND
5672 ; GFX940-NEXT: s_mov_b32 s8, s1
5673 ; GFX940-NEXT: ;;#ASMSTART
5674 ; GFX940-NEXT: ; use s8
5675 ; GFX940-NEXT: ;;#ASMEND
5676 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5677 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5678 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 3>
5679 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5683 define void @s_shuffle_v2i16_v4i16__5_3() {
5684 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_3:
5686 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5687 ; GFX900-NEXT: ;;#ASMSTART
5688 ; GFX900-NEXT: ; def s[4:5]
5689 ; GFX900-NEXT: ;;#ASMEND
5690 ; GFX900-NEXT: ;;#ASMSTART
5691 ; GFX900-NEXT: ; def s[6:7]
5692 ; GFX900-NEXT: ;;#ASMEND
5693 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s5
5694 ; GFX900-NEXT: ;;#ASMSTART
5695 ; GFX900-NEXT: ; use s8
5696 ; GFX900-NEXT: ;;#ASMEND
5697 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5699 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_3:
5701 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5702 ; GFX90A-NEXT: ;;#ASMSTART
5703 ; GFX90A-NEXT: ; def s[4:5]
5704 ; GFX90A-NEXT: ;;#ASMEND
5705 ; GFX90A-NEXT: ;;#ASMSTART
5706 ; GFX90A-NEXT: ; def s[6:7]
5707 ; GFX90A-NEXT: ;;#ASMEND
5708 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s5
5709 ; GFX90A-NEXT: ;;#ASMSTART
5710 ; GFX90A-NEXT: ; use s8
5711 ; GFX90A-NEXT: ;;#ASMEND
5712 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5714 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_3:
5716 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5717 ; GFX940-NEXT: ;;#ASMSTART
5718 ; GFX940-NEXT: ; def s[0:1]
5719 ; GFX940-NEXT: ;;#ASMEND
5720 ; GFX940-NEXT: ;;#ASMSTART
5721 ; GFX940-NEXT: ; def s[2:3]
5722 ; GFX940-NEXT: ;;#ASMEND
5723 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1
5724 ; GFX940-NEXT: ;;#ASMSTART
5725 ; GFX940-NEXT: ; use s8
5726 ; GFX940-NEXT: ;;#ASMEND
5727 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5728 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5729 %vec1 = call <4 x i16> asm "; def $0", "=s"()
5730 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 3>
5731 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5735 define void @s_shuffle_v2i16_v4i16__6_3() {
5736 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_3:
5738 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5739 ; GFX900-NEXT: ;;#ASMSTART
5740 ; GFX900-NEXT: ; def s[4:5]
5741 ; GFX900-NEXT: ;;#ASMEND
5742 ; GFX900-NEXT: ;;#ASMSTART
5743 ; GFX900-NEXT: ; def s[6:7]
5744 ; GFX900-NEXT: ;;#ASMEND
5745 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s5
5746 ; GFX900-NEXT: ;;#ASMSTART
5747 ; GFX900-NEXT: ; use s8
5748 ; GFX900-NEXT: ;;#ASMEND
5749 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5751 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_3:
5753 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5754 ; GFX90A-NEXT: ;;#ASMSTART
5755 ; GFX90A-NEXT: ; def s[4:5]
5756 ; GFX90A-NEXT: ;;#ASMEND
5757 ; GFX90A-NEXT: ;;#ASMSTART
5758 ; GFX90A-NEXT: ; def s[6:7]
5759 ; GFX90A-NEXT: ;;#ASMEND
5760 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s5
5761 ; GFX90A-NEXT: ;;#ASMSTART
5762 ; GFX90A-NEXT: ; use s8
5763 ; GFX90A-NEXT: ;;#ASMEND
5764 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5766 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_3:
5768 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5769 ; GFX940-NEXT: ;;#ASMSTART
5770 ; GFX940-NEXT: ; def s[0:1]
5771 ; GFX940-NEXT: ;;#ASMEND
5772 ; GFX940-NEXT: ;;#ASMSTART
5773 ; GFX940-NEXT: ; def s[2:3]
5774 ; GFX940-NEXT: ;;#ASMEND
5775 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1
5776 ; GFX940-NEXT: ;;#ASMSTART
5777 ; GFX940-NEXT: ; use s8
5778 ; GFX940-NEXT: ;;#ASMEND
5779 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5780 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5781 %vec1 = call <4 x i16> asm "; def $0", "=s"()
5782 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 3>
5783 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5787 define void @s_shuffle_v2i16_v4i16__u_4() {
5788 ; GFX9-LABEL: s_shuffle_v2i16_v4i16__u_4:
5790 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5791 ; GFX9-NEXT: ;;#ASMSTART
5792 ; GFX9-NEXT: ; use s8
5793 ; GFX9-NEXT: ;;#ASMEND
5794 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5795 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5796 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 poison, i32 4>
5797 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5801 define void @s_shuffle_v2i16_v4i16__0_4() {
5802 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_4:
5804 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5805 ; GFX900-NEXT: ;;#ASMSTART
5806 ; GFX900-NEXT: ; def s[8:9]
5807 ; GFX900-NEXT: ;;#ASMEND
5808 ; GFX900-NEXT: ;;#ASMSTART
5809 ; GFX900-NEXT: ; use s8
5810 ; GFX900-NEXT: ;;#ASMEND
5811 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5813 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_4:
5815 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5816 ; GFX90A-NEXT: ;;#ASMSTART
5817 ; GFX90A-NEXT: ; def s[8:9]
5818 ; GFX90A-NEXT: ;;#ASMEND
5819 ; GFX90A-NEXT: ;;#ASMSTART
5820 ; GFX90A-NEXT: ; use s8
5821 ; GFX90A-NEXT: ;;#ASMEND
5822 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5824 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_4:
5826 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5827 ; GFX940-NEXT: ;;#ASMSTART
5828 ; GFX940-NEXT: ; def s[8:9]
5829 ; GFX940-NEXT: ;;#ASMEND
5830 ; GFX940-NEXT: s_nop 0
5831 ; GFX940-NEXT: ;;#ASMSTART
5832 ; GFX940-NEXT: ; use s8
5833 ; GFX940-NEXT: ;;#ASMEND
5834 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5835 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5836 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 0, i32 4>
5837 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5841 define void @s_shuffle_v2i16_v4i16__1_4() {
5842 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_4:
5844 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5845 ; GFX900-NEXT: ;;#ASMSTART
5846 ; GFX900-NEXT: ; def s[4:5]
5847 ; GFX900-NEXT: ;;#ASMEND
5848 ; GFX900-NEXT: s_lshr_b32 s8, s4, 16
5849 ; GFX900-NEXT: ;;#ASMSTART
5850 ; GFX900-NEXT: ; use s8
5851 ; GFX900-NEXT: ;;#ASMEND
5852 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5854 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_4:
5856 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5857 ; GFX90A-NEXT: ;;#ASMSTART
5858 ; GFX90A-NEXT: ; def s[4:5]
5859 ; GFX90A-NEXT: ;;#ASMEND
5860 ; GFX90A-NEXT: s_lshr_b32 s8, s4, 16
5861 ; GFX90A-NEXT: ;;#ASMSTART
5862 ; GFX90A-NEXT: ; use s8
5863 ; GFX90A-NEXT: ;;#ASMEND
5864 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5866 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_4:
5868 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5869 ; GFX940-NEXT: ;;#ASMSTART
5870 ; GFX940-NEXT: ; def s[0:1]
5871 ; GFX940-NEXT: ;;#ASMEND
5872 ; GFX940-NEXT: s_lshr_b32 s8, s0, 16
5873 ; GFX940-NEXT: ;;#ASMSTART
5874 ; GFX940-NEXT: ; use s8
5875 ; GFX940-NEXT: ;;#ASMEND
5876 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5877 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5878 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 1, i32 4>
5879 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5883 define void @s_shuffle_v2i16_v4i16__2_4() {
5884 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_4:
5886 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5887 ; GFX900-NEXT: ;;#ASMSTART
5888 ; GFX900-NEXT: ; def s[4:5]
5889 ; GFX900-NEXT: ;;#ASMEND
5890 ; GFX900-NEXT: s_mov_b32 s8, s5
5891 ; GFX900-NEXT: ;;#ASMSTART
5892 ; GFX900-NEXT: ; use s8
5893 ; GFX900-NEXT: ;;#ASMEND
5894 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5896 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_4:
5898 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5899 ; GFX90A-NEXT: ;;#ASMSTART
5900 ; GFX90A-NEXT: ; def s[4:5]
5901 ; GFX90A-NEXT: ;;#ASMEND
5902 ; GFX90A-NEXT: s_mov_b32 s8, s5
5903 ; GFX90A-NEXT: ;;#ASMSTART
5904 ; GFX90A-NEXT: ; use s8
5905 ; GFX90A-NEXT: ;;#ASMEND
5906 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5908 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_4:
5910 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5911 ; GFX940-NEXT: ;;#ASMSTART
5912 ; GFX940-NEXT: ; def s[0:1]
5913 ; GFX940-NEXT: ;;#ASMEND
5914 ; GFX940-NEXT: s_mov_b32 s8, s1
5915 ; GFX940-NEXT: ;;#ASMSTART
5916 ; GFX940-NEXT: ; use s8
5917 ; GFX940-NEXT: ;;#ASMEND
5918 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5919 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5920 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 2, i32 4>
5921 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5925 define void @s_shuffle_v2i16_v4i16__3_4() {
5926 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_4:
5928 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5929 ; GFX900-NEXT: ;;#ASMSTART
5930 ; GFX900-NEXT: ; def s[4:5]
5931 ; GFX900-NEXT: ;;#ASMEND
5932 ; GFX900-NEXT: s_lshr_b32 s8, s5, 16
5933 ; GFX900-NEXT: ;;#ASMSTART
5934 ; GFX900-NEXT: ; use s8
5935 ; GFX900-NEXT: ;;#ASMEND
5936 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5938 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_4:
5940 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5941 ; GFX90A-NEXT: ;;#ASMSTART
5942 ; GFX90A-NEXT: ; def s[4:5]
5943 ; GFX90A-NEXT: ;;#ASMEND
5944 ; GFX90A-NEXT: s_lshr_b32 s8, s5, 16
5945 ; GFX90A-NEXT: ;;#ASMSTART
5946 ; GFX90A-NEXT: ; use s8
5947 ; GFX90A-NEXT: ;;#ASMEND
5948 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
5950 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_4:
5952 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5953 ; GFX940-NEXT: ;;#ASMSTART
5954 ; GFX940-NEXT: ; def s[0:1]
5955 ; GFX940-NEXT: ;;#ASMEND
5956 ; GFX940-NEXT: s_lshr_b32 s8, s1, 16
5957 ; GFX940-NEXT: ;;#ASMSTART
5958 ; GFX940-NEXT: ; use s8
5959 ; GFX940-NEXT: ;;#ASMEND
5960 ; GFX940-NEXT: s_setpc_b64 s[30:31]
5961 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5962 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 3, i32 4>
5963 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5967 define void @s_shuffle_v2i16_v4i16__4_4() {
5968 ; GFX9-LABEL: s_shuffle_v2i16_v4i16__4_4:
5970 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5971 ; GFX9-NEXT: ;;#ASMSTART
5972 ; GFX9-NEXT: ; use s8
5973 ; GFX9-NEXT: ;;#ASMEND
5974 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5975 %vec0 = call <4 x i16> asm "; def $0", "=s"()
5976 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> <i32 4, i32 4>
5977 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
5981 define void @s_shuffle_v2i16_v4i16__5_4() {
5982 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_4:
5984 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5985 ; GFX900-NEXT: ;;#ASMSTART
5986 ; GFX900-NEXT: ; def s[4:5]
5987 ; GFX900-NEXT: ;;#ASMEND
5988 ; GFX900-NEXT: s_lshr_b32 s5, s4, 16
5989 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
5990 ; GFX900-NEXT: ;;#ASMSTART
5991 ; GFX900-NEXT: ; use s8
5992 ; GFX900-NEXT: ;;#ASMEND
5993 ; GFX900-NEXT: s_setpc_b64 s[30:31]
5995 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_4:
5997 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5998 ; GFX90A-NEXT: ;;#ASMSTART
5999 ; GFX90A-NEXT: ; def s[4:5]
6000 ; GFX90A-NEXT: ;;#ASMEND
6001 ; GFX90A-NEXT: s_lshr_b32 s5, s4, 16
6002 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6003 ; GFX90A-NEXT: ;;#ASMSTART
6004 ; GFX90A-NEXT: ; use s8
6005 ; GFX90A-NEXT: ;;#ASMEND
6006 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6008 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_4:
6010 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6011 ; GFX940-NEXT: ;;#ASMSTART
6012 ; GFX940-NEXT: ; def s[0:1]
6013 ; GFX940-NEXT: ;;#ASMEND
6014 ; GFX940-NEXT: s_lshr_b32 s1, s0, 16
6015 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
6016 ; GFX940-NEXT: ;;#ASMSTART
6017 ; GFX940-NEXT: ; use s8
6018 ; GFX940-NEXT: ;;#ASMEND
6019 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6020 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6021 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6022 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 4>
6023 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6027 define void @s_shuffle_v2i16_v4i16__6_4() {
6028 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_4:
6030 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6031 ; GFX900-NEXT: ;;#ASMSTART
6032 ; GFX900-NEXT: ; def s[4:5]
6033 ; GFX900-NEXT: ;;#ASMEND
6034 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6035 ; GFX900-NEXT: ;;#ASMSTART
6036 ; GFX900-NEXT: ; use s8
6037 ; GFX900-NEXT: ;;#ASMEND
6038 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6040 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_4:
6042 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6043 ; GFX90A-NEXT: ;;#ASMSTART
6044 ; GFX90A-NEXT: ; def s[4:5]
6045 ; GFX90A-NEXT: ;;#ASMEND
6046 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4
6047 ; GFX90A-NEXT: ;;#ASMSTART
6048 ; GFX90A-NEXT: ; use s8
6049 ; GFX90A-NEXT: ;;#ASMEND
6050 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6052 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_4:
6054 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6055 ; GFX940-NEXT: ;;#ASMSTART
6056 ; GFX940-NEXT: ; def s[0:1]
6057 ; GFX940-NEXT: ;;#ASMEND
6058 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0
6059 ; GFX940-NEXT: ;;#ASMSTART
6060 ; GFX940-NEXT: ; use s8
6061 ; GFX940-NEXT: ;;#ASMEND
6062 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6063 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6064 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6065 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 4>
6066 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6070 define void @s_shuffle_v2i16_v4i16__u_5() {
6071 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__u_5:
6073 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6074 ; GFX900-NEXT: ;;#ASMSTART
6075 ; GFX900-NEXT: ; def s[8:9]
6076 ; GFX900-NEXT: ;;#ASMEND
6077 ; GFX900-NEXT: ;;#ASMSTART
6078 ; GFX900-NEXT: ; use s8
6079 ; GFX900-NEXT: ;;#ASMEND
6080 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6082 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__u_5:
6084 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6085 ; GFX90A-NEXT: ;;#ASMSTART
6086 ; GFX90A-NEXT: ; def s[8:9]
6087 ; GFX90A-NEXT: ;;#ASMEND
6088 ; GFX90A-NEXT: ;;#ASMSTART
6089 ; GFX90A-NEXT: ; use s8
6090 ; GFX90A-NEXT: ;;#ASMEND
6091 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6093 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_5:
6095 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6096 ; GFX940-NEXT: ;;#ASMSTART
6097 ; GFX940-NEXT: ; def s[8:9]
6098 ; GFX940-NEXT: ;;#ASMEND
6099 ; GFX940-NEXT: s_nop 0
6100 ; GFX940-NEXT: ;;#ASMSTART
6101 ; GFX940-NEXT: ; use s8
6102 ; GFX940-NEXT: ;;#ASMEND
6103 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6104 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6105 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6106 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 poison, i32 5>
6107 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6111 define void @s_shuffle_v2i16_v4i16__0_5() {
6112 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_5:
6114 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6115 ; GFX900-NEXT: ;;#ASMSTART
6116 ; GFX900-NEXT: ; def s[4:5]
6117 ; GFX900-NEXT: ;;#ASMEND
6118 ; GFX900-NEXT: ;;#ASMSTART
6119 ; GFX900-NEXT: ; def s[6:7]
6120 ; GFX900-NEXT: ;;#ASMEND
6121 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s6
6122 ; GFX900-NEXT: ;;#ASMSTART
6123 ; GFX900-NEXT: ; use s8
6124 ; GFX900-NEXT: ;;#ASMEND
6125 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6127 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_5:
6129 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6130 ; GFX90A-NEXT: ;;#ASMSTART
6131 ; GFX90A-NEXT: ; def s[4:5]
6132 ; GFX90A-NEXT: ;;#ASMEND
6133 ; GFX90A-NEXT: ;;#ASMSTART
6134 ; GFX90A-NEXT: ; def s[6:7]
6135 ; GFX90A-NEXT: ;;#ASMEND
6136 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s6
6137 ; GFX90A-NEXT: ;;#ASMSTART
6138 ; GFX90A-NEXT: ; use s8
6139 ; GFX90A-NEXT: ;;#ASMEND
6140 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6142 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_5:
6144 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6145 ; GFX940-NEXT: ;;#ASMSTART
6146 ; GFX940-NEXT: ; def s[0:1]
6147 ; GFX940-NEXT: ;;#ASMEND
6148 ; GFX940-NEXT: ;;#ASMSTART
6149 ; GFX940-NEXT: ; def s[2:3]
6150 ; GFX940-NEXT: ;;#ASMEND
6151 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2
6152 ; GFX940-NEXT: ;;#ASMSTART
6153 ; GFX940-NEXT: ; use s8
6154 ; GFX940-NEXT: ;;#ASMEND
6155 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6156 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6157 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6158 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 0, i32 5>
6159 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6163 define void @s_shuffle_v2i16_v4i16__1_5() {
6164 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_5:
6166 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6167 ; GFX900-NEXT: ;;#ASMSTART
6168 ; GFX900-NEXT: ; def s[4:5]
6169 ; GFX900-NEXT: ;;#ASMEND
6170 ; GFX900-NEXT: ;;#ASMSTART
6171 ; GFX900-NEXT: ; def s[6:7]
6172 ; GFX900-NEXT: ;;#ASMEND
6173 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s6
6174 ; GFX900-NEXT: ;;#ASMSTART
6175 ; GFX900-NEXT: ; use s8
6176 ; GFX900-NEXT: ;;#ASMEND
6177 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6179 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_5:
6181 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6182 ; GFX90A-NEXT: ;;#ASMSTART
6183 ; GFX90A-NEXT: ; def s[4:5]
6184 ; GFX90A-NEXT: ;;#ASMEND
6185 ; GFX90A-NEXT: ;;#ASMSTART
6186 ; GFX90A-NEXT: ; def s[6:7]
6187 ; GFX90A-NEXT: ;;#ASMEND
6188 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s6
6189 ; GFX90A-NEXT: ;;#ASMSTART
6190 ; GFX90A-NEXT: ; use s8
6191 ; GFX90A-NEXT: ;;#ASMEND
6192 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6194 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_5:
6196 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6197 ; GFX940-NEXT: ;;#ASMSTART
6198 ; GFX940-NEXT: ; def s[0:1]
6199 ; GFX940-NEXT: ;;#ASMEND
6200 ; GFX940-NEXT: ;;#ASMSTART
6201 ; GFX940-NEXT: ; def s[2:3]
6202 ; GFX940-NEXT: ;;#ASMEND
6203 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2
6204 ; GFX940-NEXT: ;;#ASMSTART
6205 ; GFX940-NEXT: ; use s8
6206 ; GFX940-NEXT: ;;#ASMEND
6207 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6208 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6209 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6210 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 1, i32 5>
6211 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6215 define void @s_shuffle_v2i16_v4i16__2_5() {
6216 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_5:
6218 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6219 ; GFX900-NEXT: ;;#ASMSTART
6220 ; GFX900-NEXT: ; def s[4:5]
6221 ; GFX900-NEXT: ;;#ASMEND
6222 ; GFX900-NEXT: ;;#ASMSTART
6223 ; GFX900-NEXT: ; def s[6:7]
6224 ; GFX900-NEXT: ;;#ASMEND
6225 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s6
6226 ; GFX900-NEXT: ;;#ASMSTART
6227 ; GFX900-NEXT: ; use s8
6228 ; GFX900-NEXT: ;;#ASMEND
6229 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6231 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_5:
6233 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6234 ; GFX90A-NEXT: ;;#ASMSTART
6235 ; GFX90A-NEXT: ; def s[4:5]
6236 ; GFX90A-NEXT: ;;#ASMEND
6237 ; GFX90A-NEXT: ;;#ASMSTART
6238 ; GFX90A-NEXT: ; def s[6:7]
6239 ; GFX90A-NEXT: ;;#ASMEND
6240 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s6
6241 ; GFX90A-NEXT: ;;#ASMSTART
6242 ; GFX90A-NEXT: ; use s8
6243 ; GFX90A-NEXT: ;;#ASMEND
6244 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6246 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_5:
6248 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6249 ; GFX940-NEXT: ;;#ASMSTART
6250 ; GFX940-NEXT: ; def s[0:1]
6251 ; GFX940-NEXT: ;;#ASMEND
6252 ; GFX940-NEXT: ;;#ASMSTART
6253 ; GFX940-NEXT: ; def s[2:3]
6254 ; GFX940-NEXT: ;;#ASMEND
6255 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2
6256 ; GFX940-NEXT: ;;#ASMSTART
6257 ; GFX940-NEXT: ; use s8
6258 ; GFX940-NEXT: ;;#ASMEND
6259 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6260 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6261 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6262 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 2, i32 5>
6263 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6267 define void @s_shuffle_v2i16_v4i16__3_5() {
6268 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_5:
6270 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6271 ; GFX900-NEXT: ;;#ASMSTART
6272 ; GFX900-NEXT: ; def s[4:5]
6273 ; GFX900-NEXT: ;;#ASMEND
6274 ; GFX900-NEXT: ;;#ASMSTART
6275 ; GFX900-NEXT: ; def s[6:7]
6276 ; GFX900-NEXT: ;;#ASMEND
6277 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s6
6278 ; GFX900-NEXT: ;;#ASMSTART
6279 ; GFX900-NEXT: ; use s8
6280 ; GFX900-NEXT: ;;#ASMEND
6281 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6283 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_5:
6285 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6286 ; GFX90A-NEXT: ;;#ASMSTART
6287 ; GFX90A-NEXT: ; def s[4:5]
6288 ; GFX90A-NEXT: ;;#ASMEND
6289 ; GFX90A-NEXT: ;;#ASMSTART
6290 ; GFX90A-NEXT: ; def s[6:7]
6291 ; GFX90A-NEXT: ;;#ASMEND
6292 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s6
6293 ; GFX90A-NEXT: ;;#ASMSTART
6294 ; GFX90A-NEXT: ; use s8
6295 ; GFX90A-NEXT: ;;#ASMEND
6296 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6298 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_5:
6300 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6301 ; GFX940-NEXT: ;;#ASMSTART
6302 ; GFX940-NEXT: ; def s[0:1]
6303 ; GFX940-NEXT: ;;#ASMEND
6304 ; GFX940-NEXT: ;;#ASMSTART
6305 ; GFX940-NEXT: ; def s[2:3]
6306 ; GFX940-NEXT: ;;#ASMEND
6307 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2
6308 ; GFX940-NEXT: ;;#ASMSTART
6309 ; GFX940-NEXT: ; use s8
6310 ; GFX940-NEXT: ;;#ASMEND
6311 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6312 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6313 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6314 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 3, i32 5>
6315 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6319 define void @s_shuffle_v2i16_v4i16__4_5() {
6320 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__4_5:
6322 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6323 ; GFX900-NEXT: ;;#ASMSTART
6324 ; GFX900-NEXT: ; def s[8:9]
6325 ; GFX900-NEXT: ;;#ASMEND
6326 ; GFX900-NEXT: ;;#ASMSTART
6327 ; GFX900-NEXT: ; use s8
6328 ; GFX900-NEXT: ;;#ASMEND
6329 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6331 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__4_5:
6333 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6334 ; GFX90A-NEXT: ;;#ASMSTART
6335 ; GFX90A-NEXT: ; def s[8:9]
6336 ; GFX90A-NEXT: ;;#ASMEND
6337 ; GFX90A-NEXT: ;;#ASMSTART
6338 ; GFX90A-NEXT: ; use s8
6339 ; GFX90A-NEXT: ;;#ASMEND
6340 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6342 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_5:
6344 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6345 ; GFX940-NEXT: ;;#ASMSTART
6346 ; GFX940-NEXT: ; def s[8:9]
6347 ; GFX940-NEXT: ;;#ASMEND
6348 ; GFX940-NEXT: s_nop 0
6349 ; GFX940-NEXT: ;;#ASMSTART
6350 ; GFX940-NEXT: ; use s8
6351 ; GFX940-NEXT: ;;#ASMEND
6352 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6353 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6354 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6355 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 4, i32 5>
6356 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6360 define void @s_shuffle_v2i16_v4i16__5_5() {
6361 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_5:
6363 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6364 ; GFX900-NEXT: ;;#ASMSTART
6365 ; GFX900-NEXT: ; def s[4:5]
6366 ; GFX900-NEXT: ;;#ASMEND
6367 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4
6368 ; GFX900-NEXT: ;;#ASMSTART
6369 ; GFX900-NEXT: ; use s8
6370 ; GFX900-NEXT: ;;#ASMEND
6371 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6373 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_5:
6375 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6376 ; GFX90A-NEXT: ;;#ASMSTART
6377 ; GFX90A-NEXT: ; def s[4:5]
6378 ; GFX90A-NEXT: ;;#ASMEND
6379 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4
6380 ; GFX90A-NEXT: ;;#ASMSTART
6381 ; GFX90A-NEXT: ; use s8
6382 ; GFX90A-NEXT: ;;#ASMEND
6383 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6385 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_5:
6387 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6388 ; GFX940-NEXT: ;;#ASMSTART
6389 ; GFX940-NEXT: ; def s[0:1]
6390 ; GFX940-NEXT: ;;#ASMEND
6391 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0
6392 ; GFX940-NEXT: ;;#ASMSTART
6393 ; GFX940-NEXT: ; use s8
6394 ; GFX940-NEXT: ;;#ASMEND
6395 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6396 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6397 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6398 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 5>
6399 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6403 define void @s_shuffle_v2i16_v4i16__6_5() {
6404 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_5:
6406 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6407 ; GFX900-NEXT: ;;#ASMSTART
6408 ; GFX900-NEXT: ; def s[4:5]
6409 ; GFX900-NEXT: ;;#ASMEND
6410 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4
6411 ; GFX900-NEXT: ;;#ASMSTART
6412 ; GFX900-NEXT: ; use s8
6413 ; GFX900-NEXT: ;;#ASMEND
6414 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6416 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_5:
6418 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6419 ; GFX90A-NEXT: ;;#ASMSTART
6420 ; GFX90A-NEXT: ; def s[4:5]
6421 ; GFX90A-NEXT: ;;#ASMEND
6422 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4
6423 ; GFX90A-NEXT: ;;#ASMSTART
6424 ; GFX90A-NEXT: ; use s8
6425 ; GFX90A-NEXT: ;;#ASMEND
6426 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6428 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_5:
6430 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6431 ; GFX940-NEXT: ;;#ASMSTART
6432 ; GFX940-NEXT: ; def s[0:1]
6433 ; GFX940-NEXT: ;;#ASMEND
6434 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0
6435 ; GFX940-NEXT: ;;#ASMSTART
6436 ; GFX940-NEXT: ; use s8
6437 ; GFX940-NEXT: ;;#ASMEND
6438 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6439 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6440 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6441 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 5>
6442 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6446 define void @s_shuffle_v2i16_v4i16__u_6() {
6447 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__u_6:
6449 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6450 ; GFX900-NEXT: ;;#ASMSTART
6451 ; GFX900-NEXT: ; def s[4:5]
6452 ; GFX900-NEXT: ;;#ASMEND
6453 ; GFX900-NEXT: s_lshl_b32 s8, s5, 16
6454 ; GFX900-NEXT: ;;#ASMSTART
6455 ; GFX900-NEXT: ; use s8
6456 ; GFX900-NEXT: ;;#ASMEND
6457 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6459 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__u_6:
6461 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6462 ; GFX90A-NEXT: ;;#ASMSTART
6463 ; GFX90A-NEXT: ; def s[4:5]
6464 ; GFX90A-NEXT: ;;#ASMEND
6465 ; GFX90A-NEXT: s_lshl_b32 s8, s5, 16
6466 ; GFX90A-NEXT: ;;#ASMSTART
6467 ; GFX90A-NEXT: ; use s8
6468 ; GFX90A-NEXT: ;;#ASMEND
6469 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6471 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_6:
6473 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6474 ; GFX940-NEXT: ;;#ASMSTART
6475 ; GFX940-NEXT: ; def s[0:1]
6476 ; GFX940-NEXT: ;;#ASMEND
6477 ; GFX940-NEXT: s_lshl_b32 s8, s1, 16
6478 ; GFX940-NEXT: ;;#ASMSTART
6479 ; GFX940-NEXT: ; use s8
6480 ; GFX940-NEXT: ;;#ASMEND
6481 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6482 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6483 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6484 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 poison, i32 6>
6485 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6489 define void @s_shuffle_v2i16_v4i16__0_6() {
6490 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_6:
6492 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6493 ; GFX900-NEXT: ;;#ASMSTART
6494 ; GFX900-NEXT: ; def s[4:5]
6495 ; GFX900-NEXT: ;;#ASMEND
6496 ; GFX900-NEXT: ;;#ASMSTART
6497 ; GFX900-NEXT: ; def s[6:7]
6498 ; GFX900-NEXT: ;;#ASMEND
6499 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7
6500 ; GFX900-NEXT: ;;#ASMSTART
6501 ; GFX900-NEXT: ; use s8
6502 ; GFX900-NEXT: ;;#ASMEND
6503 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6505 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_6:
6507 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6508 ; GFX90A-NEXT: ;;#ASMSTART
6509 ; GFX90A-NEXT: ; def s[4:5]
6510 ; GFX90A-NEXT: ;;#ASMEND
6511 ; GFX90A-NEXT: ;;#ASMSTART
6512 ; GFX90A-NEXT: ; def s[6:7]
6513 ; GFX90A-NEXT: ;;#ASMEND
6514 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7
6515 ; GFX90A-NEXT: ;;#ASMSTART
6516 ; GFX90A-NEXT: ; use s8
6517 ; GFX90A-NEXT: ;;#ASMEND
6518 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6520 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_6:
6522 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6523 ; GFX940-NEXT: ;;#ASMSTART
6524 ; GFX940-NEXT: ; def s[0:1]
6525 ; GFX940-NEXT: ;;#ASMEND
6526 ; GFX940-NEXT: ;;#ASMSTART
6527 ; GFX940-NEXT: ; def s[2:3]
6528 ; GFX940-NEXT: ;;#ASMEND
6529 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3
6530 ; GFX940-NEXT: ;;#ASMSTART
6531 ; GFX940-NEXT: ; use s8
6532 ; GFX940-NEXT: ;;#ASMEND
6533 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6534 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6535 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6536 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 0, i32 6>
6537 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6541 define void @s_shuffle_v2i16_v4i16__1_6() {
6542 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_6:
6544 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6545 ; GFX900-NEXT: ;;#ASMSTART
6546 ; GFX900-NEXT: ; def s[4:5]
6547 ; GFX900-NEXT: ;;#ASMEND
6548 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
6549 ; GFX900-NEXT: ;;#ASMSTART
6550 ; GFX900-NEXT: ; def s[6:7]
6551 ; GFX900-NEXT: ;;#ASMEND
6552 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7
6553 ; GFX900-NEXT: ;;#ASMSTART
6554 ; GFX900-NEXT: ; use s8
6555 ; GFX900-NEXT: ;;#ASMEND
6556 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6558 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_6:
6560 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6561 ; GFX90A-NEXT: ;;#ASMSTART
6562 ; GFX90A-NEXT: ; def s[4:5]
6563 ; GFX90A-NEXT: ;;#ASMEND
6564 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
6565 ; GFX90A-NEXT: ;;#ASMSTART
6566 ; GFX90A-NEXT: ; def s[6:7]
6567 ; GFX90A-NEXT: ;;#ASMEND
6568 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7
6569 ; GFX90A-NEXT: ;;#ASMSTART
6570 ; GFX90A-NEXT: ; use s8
6571 ; GFX90A-NEXT: ;;#ASMEND
6572 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6574 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_6:
6576 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6577 ; GFX940-NEXT: ;;#ASMSTART
6578 ; GFX940-NEXT: ; def s[0:1]
6579 ; GFX940-NEXT: ;;#ASMEND
6580 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
6581 ; GFX940-NEXT: ;;#ASMSTART
6582 ; GFX940-NEXT: ; def s[2:3]
6583 ; GFX940-NEXT: ;;#ASMEND
6584 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3
6585 ; GFX940-NEXT: ;;#ASMSTART
6586 ; GFX940-NEXT: ; use s8
6587 ; GFX940-NEXT: ;;#ASMEND
6588 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6589 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6590 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6591 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 1, i32 6>
6592 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6596 define void @s_shuffle_v2i16_v4i16__2_6() {
6597 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_6:
6599 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6600 ; GFX900-NEXT: ;;#ASMSTART
6601 ; GFX900-NEXT: ; def s[4:5]
6602 ; GFX900-NEXT: ;;#ASMEND
6603 ; GFX900-NEXT: ;;#ASMSTART
6604 ; GFX900-NEXT: ; def s[6:7]
6605 ; GFX900-NEXT: ;;#ASMEND
6606 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7
6607 ; GFX900-NEXT: ;;#ASMSTART
6608 ; GFX900-NEXT: ; use s8
6609 ; GFX900-NEXT: ;;#ASMEND
6610 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6612 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_6:
6614 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6615 ; GFX90A-NEXT: ;;#ASMSTART
6616 ; GFX90A-NEXT: ; def s[4:5]
6617 ; GFX90A-NEXT: ;;#ASMEND
6618 ; GFX90A-NEXT: ;;#ASMSTART
6619 ; GFX90A-NEXT: ; def s[6:7]
6620 ; GFX90A-NEXT: ;;#ASMEND
6621 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7
6622 ; GFX90A-NEXT: ;;#ASMSTART
6623 ; GFX90A-NEXT: ; use s8
6624 ; GFX90A-NEXT: ;;#ASMEND
6625 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6627 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_6:
6629 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6630 ; GFX940-NEXT: ;;#ASMSTART
6631 ; GFX940-NEXT: ; def s[0:1]
6632 ; GFX940-NEXT: ;;#ASMEND
6633 ; GFX940-NEXT: ;;#ASMSTART
6634 ; GFX940-NEXT: ; def s[2:3]
6635 ; GFX940-NEXT: ;;#ASMEND
6636 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3
6637 ; GFX940-NEXT: ;;#ASMSTART
6638 ; GFX940-NEXT: ; use s8
6639 ; GFX940-NEXT: ;;#ASMEND
6640 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6641 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6642 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6643 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 2, i32 6>
6644 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6648 define void @s_shuffle_v2i16_v4i16__3_6() {
6649 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_6:
6651 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6652 ; GFX900-NEXT: ;;#ASMSTART
6653 ; GFX900-NEXT: ; def s[4:5]
6654 ; GFX900-NEXT: ;;#ASMEND
6655 ; GFX900-NEXT: s_lshr_b32 s4, s5, 16
6656 ; GFX900-NEXT: ;;#ASMSTART
6657 ; GFX900-NEXT: ; def s[6:7]
6658 ; GFX900-NEXT: ;;#ASMEND
6659 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7
6660 ; GFX900-NEXT: ;;#ASMSTART
6661 ; GFX900-NEXT: ; use s8
6662 ; GFX900-NEXT: ;;#ASMEND
6663 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6665 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_6:
6667 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6668 ; GFX90A-NEXT: ;;#ASMSTART
6669 ; GFX90A-NEXT: ; def s[4:5]
6670 ; GFX90A-NEXT: ;;#ASMEND
6671 ; GFX90A-NEXT: s_lshr_b32 s4, s5, 16
6672 ; GFX90A-NEXT: ;;#ASMSTART
6673 ; GFX90A-NEXT: ; def s[6:7]
6674 ; GFX90A-NEXT: ;;#ASMEND
6675 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7
6676 ; GFX90A-NEXT: ;;#ASMSTART
6677 ; GFX90A-NEXT: ; use s8
6678 ; GFX90A-NEXT: ;;#ASMEND
6679 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6681 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_6:
6683 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6684 ; GFX940-NEXT: ;;#ASMSTART
6685 ; GFX940-NEXT: ; def s[0:1]
6686 ; GFX940-NEXT: ;;#ASMEND
6687 ; GFX940-NEXT: s_lshr_b32 s0, s1, 16
6688 ; GFX940-NEXT: ;;#ASMSTART
6689 ; GFX940-NEXT: ; def s[2:3]
6690 ; GFX940-NEXT: ;;#ASMEND
6691 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3
6692 ; GFX940-NEXT: ;;#ASMSTART
6693 ; GFX940-NEXT: ; use s8
6694 ; GFX940-NEXT: ;;#ASMEND
6695 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6696 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6697 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6698 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 3, i32 6>
6699 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6703 define void @s_shuffle_v2i16_v4i16__4_6() {
6704 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__4_6:
6706 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6707 ; GFX900-NEXT: ;;#ASMSTART
6708 ; GFX900-NEXT: ; def s[4:5]
6709 ; GFX900-NEXT: ;;#ASMEND
6710 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
6711 ; GFX900-NEXT: ;;#ASMSTART
6712 ; GFX900-NEXT: ; use s8
6713 ; GFX900-NEXT: ;;#ASMEND
6714 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6716 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__4_6:
6718 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6719 ; GFX90A-NEXT: ;;#ASMSTART
6720 ; GFX90A-NEXT: ; def s[4:5]
6721 ; GFX90A-NEXT: ;;#ASMEND
6722 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
6723 ; GFX90A-NEXT: ;;#ASMSTART
6724 ; GFX90A-NEXT: ; use s8
6725 ; GFX90A-NEXT: ;;#ASMEND
6726 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6728 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_6:
6730 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6731 ; GFX940-NEXT: ;;#ASMSTART
6732 ; GFX940-NEXT: ; def s[0:1]
6733 ; GFX940-NEXT: ;;#ASMEND
6734 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
6735 ; GFX940-NEXT: ;;#ASMSTART
6736 ; GFX940-NEXT: ; use s8
6737 ; GFX940-NEXT: ;;#ASMEND
6738 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6739 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6740 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6741 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 4, i32 6>
6742 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6746 define void @s_shuffle_v2i16_v4i16__5_6() {
6747 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_6:
6749 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6750 ; GFX900-NEXT: ;;#ASMSTART
6751 ; GFX900-NEXT: ; def s[4:5]
6752 ; GFX900-NEXT: ;;#ASMEND
6753 ; GFX900-NEXT: s_lshr_b32 s4, s4, 16
6754 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5
6755 ; GFX900-NEXT: ;;#ASMSTART
6756 ; GFX900-NEXT: ; use s8
6757 ; GFX900-NEXT: ;;#ASMEND
6758 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6760 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_6:
6762 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6763 ; GFX90A-NEXT: ;;#ASMSTART
6764 ; GFX90A-NEXT: ; def s[4:5]
6765 ; GFX90A-NEXT: ;;#ASMEND
6766 ; GFX90A-NEXT: s_lshr_b32 s4, s4, 16
6767 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5
6768 ; GFX90A-NEXT: ;;#ASMSTART
6769 ; GFX90A-NEXT: ; use s8
6770 ; GFX90A-NEXT: ;;#ASMEND
6771 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6773 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_6:
6775 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6776 ; GFX940-NEXT: ;;#ASMSTART
6777 ; GFX940-NEXT: ; def s[0:1]
6778 ; GFX940-NEXT: ;;#ASMEND
6779 ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
6780 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1
6781 ; GFX940-NEXT: ;;#ASMSTART
6782 ; GFX940-NEXT: ; use s8
6783 ; GFX940-NEXT: ;;#ASMEND
6784 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6785 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6786 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6787 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 6>
6788 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6792 define void @s_shuffle_v2i16_v4i16__6_6() {
6793 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_6:
6795 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6796 ; GFX900-NEXT: ;;#ASMSTART
6797 ; GFX900-NEXT: ; def s[4:5]
6798 ; GFX900-NEXT: ;;#ASMEND
6799 ; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5
6800 ; GFX900-NEXT: ;;#ASMSTART
6801 ; GFX900-NEXT: ; use s8
6802 ; GFX900-NEXT: ;;#ASMEND
6803 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6805 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_6:
6807 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6808 ; GFX90A-NEXT: ;;#ASMSTART
6809 ; GFX90A-NEXT: ; def s[4:5]
6810 ; GFX90A-NEXT: ;;#ASMEND
6811 ; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5
6812 ; GFX90A-NEXT: ;;#ASMSTART
6813 ; GFX90A-NEXT: ; use s8
6814 ; GFX90A-NEXT: ;;#ASMEND
6815 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6817 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_6:
6819 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6820 ; GFX940-NEXT: ;;#ASMSTART
6821 ; GFX940-NEXT: ; def s[0:1]
6822 ; GFX940-NEXT: ;;#ASMEND
6823 ; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1
6824 ; GFX940-NEXT: ;;#ASMSTART
6825 ; GFX940-NEXT: ; use s8
6826 ; GFX940-NEXT: ;;#ASMEND
6827 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6828 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6829 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6830 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 6>
6831 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6835 define void @s_shuffle_v2i16_v4i16__u_7() {
6836 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__u_7:
6838 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6839 ; GFX900-NEXT: ;;#ASMSTART
6840 ; GFX900-NEXT: ; def s[4:5]
6841 ; GFX900-NEXT: ;;#ASMEND
6842 ; GFX900-NEXT: s_mov_b32 s8, s5
6843 ; GFX900-NEXT: ;;#ASMSTART
6844 ; GFX900-NEXT: ; use s8
6845 ; GFX900-NEXT: ;;#ASMEND
6846 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6848 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__u_7:
6850 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6851 ; GFX90A-NEXT: ;;#ASMSTART
6852 ; GFX90A-NEXT: ; def s[4:5]
6853 ; GFX90A-NEXT: ;;#ASMEND
6854 ; GFX90A-NEXT: s_mov_b32 s8, s5
6855 ; GFX90A-NEXT: ;;#ASMSTART
6856 ; GFX90A-NEXT: ; use s8
6857 ; GFX90A-NEXT: ;;#ASMEND
6858 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6860 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_7:
6862 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6863 ; GFX940-NEXT: ;;#ASMSTART
6864 ; GFX940-NEXT: ; def s[0:1]
6865 ; GFX940-NEXT: ;;#ASMEND
6866 ; GFX940-NEXT: s_mov_b32 s8, s1
6867 ; GFX940-NEXT: ;;#ASMSTART
6868 ; GFX940-NEXT: ; use s8
6869 ; GFX940-NEXT: ;;#ASMEND
6870 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6871 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6872 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6873 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 poison, i32 7>
6874 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6878 define void @s_shuffle_v2i16_v4i16__0_7() {
6879 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__0_7:
6881 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6882 ; GFX900-NEXT: ;;#ASMSTART
6883 ; GFX900-NEXT: ; def s[4:5]
6884 ; GFX900-NEXT: ;;#ASMEND
6885 ; GFX900-NEXT: ;;#ASMSTART
6886 ; GFX900-NEXT: ; def s[6:7]
6887 ; GFX900-NEXT: ;;#ASMEND
6888 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s7
6889 ; GFX900-NEXT: ;;#ASMSTART
6890 ; GFX900-NEXT: ; use s8
6891 ; GFX900-NEXT: ;;#ASMEND
6892 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6894 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__0_7:
6896 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6897 ; GFX90A-NEXT: ;;#ASMSTART
6898 ; GFX90A-NEXT: ; def s[4:5]
6899 ; GFX90A-NEXT: ;;#ASMEND
6900 ; GFX90A-NEXT: ;;#ASMSTART
6901 ; GFX90A-NEXT: ; def s[6:7]
6902 ; GFX90A-NEXT: ;;#ASMEND
6903 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s7
6904 ; GFX90A-NEXT: ;;#ASMSTART
6905 ; GFX90A-NEXT: ; use s8
6906 ; GFX90A-NEXT: ;;#ASMEND
6907 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6909 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_7:
6911 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6912 ; GFX940-NEXT: ;;#ASMSTART
6913 ; GFX940-NEXT: ; def s[0:1]
6914 ; GFX940-NEXT: ;;#ASMEND
6915 ; GFX940-NEXT: ;;#ASMSTART
6916 ; GFX940-NEXT: ; def s[2:3]
6917 ; GFX940-NEXT: ;;#ASMEND
6918 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3
6919 ; GFX940-NEXT: ;;#ASMSTART
6920 ; GFX940-NEXT: ; use s8
6921 ; GFX940-NEXT: ;;#ASMEND
6922 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6923 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6924 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6925 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 0, i32 7>
6926 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6930 define void @s_shuffle_v2i16_v4i16__1_7() {
6931 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__1_7:
6933 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6934 ; GFX900-NEXT: ;;#ASMSTART
6935 ; GFX900-NEXT: ; def s[4:5]
6936 ; GFX900-NEXT: ;;#ASMEND
6937 ; GFX900-NEXT: ;;#ASMSTART
6938 ; GFX900-NEXT: ; def s[6:7]
6939 ; GFX900-NEXT: ;;#ASMEND
6940 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s7
6941 ; GFX900-NEXT: ;;#ASMSTART
6942 ; GFX900-NEXT: ; use s8
6943 ; GFX900-NEXT: ;;#ASMEND
6944 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6946 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__1_7:
6948 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6949 ; GFX90A-NEXT: ;;#ASMSTART
6950 ; GFX90A-NEXT: ; def s[4:5]
6951 ; GFX90A-NEXT: ;;#ASMEND
6952 ; GFX90A-NEXT: ;;#ASMSTART
6953 ; GFX90A-NEXT: ; def s[6:7]
6954 ; GFX90A-NEXT: ;;#ASMEND
6955 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s7
6956 ; GFX90A-NEXT: ;;#ASMSTART
6957 ; GFX90A-NEXT: ; use s8
6958 ; GFX90A-NEXT: ;;#ASMEND
6959 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
6961 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_7:
6963 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6964 ; GFX940-NEXT: ;;#ASMSTART
6965 ; GFX940-NEXT: ; def s[0:1]
6966 ; GFX940-NEXT: ;;#ASMEND
6967 ; GFX940-NEXT: ;;#ASMSTART
6968 ; GFX940-NEXT: ; def s[2:3]
6969 ; GFX940-NEXT: ;;#ASMEND
6970 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3
6971 ; GFX940-NEXT: ;;#ASMSTART
6972 ; GFX940-NEXT: ; use s8
6973 ; GFX940-NEXT: ;;#ASMEND
6974 ; GFX940-NEXT: s_setpc_b64 s[30:31]
6975 %vec0 = call <4 x i16> asm "; def $0", "=s"()
6976 %vec1 = call <4 x i16> asm "; def $0", "=s"()
6977 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 1, i32 7>
6978 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
6982 define void @s_shuffle_v2i16_v4i16__2_7() {
6983 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__2_7:
6985 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6986 ; GFX900-NEXT: ;;#ASMSTART
6987 ; GFX900-NEXT: ; def s[4:5]
6988 ; GFX900-NEXT: ;;#ASMEND
6989 ; GFX900-NEXT: ;;#ASMSTART
6990 ; GFX900-NEXT: ; def s[6:7]
6991 ; GFX900-NEXT: ;;#ASMEND
6992 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s7
6993 ; GFX900-NEXT: ;;#ASMSTART
6994 ; GFX900-NEXT: ; use s8
6995 ; GFX900-NEXT: ;;#ASMEND
6996 ; GFX900-NEXT: s_setpc_b64 s[30:31]
6998 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__2_7:
7000 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7001 ; GFX90A-NEXT: ;;#ASMSTART
7002 ; GFX90A-NEXT: ; def s[4:5]
7003 ; GFX90A-NEXT: ;;#ASMEND
7004 ; GFX90A-NEXT: ;;#ASMSTART
7005 ; GFX90A-NEXT: ; def s[6:7]
7006 ; GFX90A-NEXT: ;;#ASMEND
7007 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s7
7008 ; GFX90A-NEXT: ;;#ASMSTART
7009 ; GFX90A-NEXT: ; use s8
7010 ; GFX90A-NEXT: ;;#ASMEND
7011 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7013 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_7:
7015 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7016 ; GFX940-NEXT: ;;#ASMSTART
7017 ; GFX940-NEXT: ; def s[0:1]
7018 ; GFX940-NEXT: ;;#ASMEND
7019 ; GFX940-NEXT: ;;#ASMSTART
7020 ; GFX940-NEXT: ; def s[2:3]
7021 ; GFX940-NEXT: ;;#ASMEND
7022 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3
7023 ; GFX940-NEXT: ;;#ASMSTART
7024 ; GFX940-NEXT: ; use s8
7025 ; GFX940-NEXT: ;;#ASMEND
7026 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7027 %vec0 = call <4 x i16> asm "; def $0", "=s"()
7028 %vec1 = call <4 x i16> asm "; def $0", "=s"()
7029 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 2, i32 7>
7030 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
7034 define void @s_shuffle_v2i16_v4i16__3_7() {
7035 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__3_7:
7037 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7038 ; GFX900-NEXT: ;;#ASMSTART
7039 ; GFX900-NEXT: ; def s[4:5]
7040 ; GFX900-NEXT: ;;#ASMEND
7041 ; GFX900-NEXT: ;;#ASMSTART
7042 ; GFX900-NEXT: ; def s[6:7]
7043 ; GFX900-NEXT: ;;#ASMEND
7044 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s7
7045 ; GFX900-NEXT: ;;#ASMSTART
7046 ; GFX900-NEXT: ; use s8
7047 ; GFX900-NEXT: ;;#ASMEND
7048 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7050 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__3_7:
7052 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7053 ; GFX90A-NEXT: ;;#ASMSTART
7054 ; GFX90A-NEXT: ; def s[4:5]
7055 ; GFX90A-NEXT: ;;#ASMEND
7056 ; GFX90A-NEXT: ;;#ASMSTART
7057 ; GFX90A-NEXT: ; def s[6:7]
7058 ; GFX90A-NEXT: ;;#ASMEND
7059 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s7
7060 ; GFX90A-NEXT: ;;#ASMSTART
7061 ; GFX90A-NEXT: ; use s8
7062 ; GFX90A-NEXT: ;;#ASMEND
7063 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7065 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_7:
7067 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7068 ; GFX940-NEXT: ;;#ASMSTART
7069 ; GFX940-NEXT: ; def s[0:1]
7070 ; GFX940-NEXT: ;;#ASMEND
7071 ; GFX940-NEXT: ;;#ASMSTART
7072 ; GFX940-NEXT: ; def s[2:3]
7073 ; GFX940-NEXT: ;;#ASMEND
7074 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3
7075 ; GFX940-NEXT: ;;#ASMSTART
7076 ; GFX940-NEXT: ; use s8
7077 ; GFX940-NEXT: ;;#ASMEND
7078 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7079 %vec0 = call <4 x i16> asm "; def $0", "=s"()
7080 %vec1 = call <4 x i16> asm "; def $0", "=s"()
7081 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 3, i32 7>
7082 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
7086 define void @s_shuffle_v2i16_v4i16__4_7() {
7087 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__4_7:
7089 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7090 ; GFX900-NEXT: ;;#ASMSTART
7091 ; GFX900-NEXT: ; def s[4:5]
7092 ; GFX900-NEXT: ;;#ASMEND
7093 ; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5
7094 ; GFX900-NEXT: ;;#ASMSTART
7095 ; GFX900-NEXT: ; use s8
7096 ; GFX900-NEXT: ;;#ASMEND
7097 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7099 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__4_7:
7101 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7102 ; GFX90A-NEXT: ;;#ASMSTART
7103 ; GFX90A-NEXT: ; def s[4:5]
7104 ; GFX90A-NEXT: ;;#ASMEND
7105 ; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5
7106 ; GFX90A-NEXT: ;;#ASMSTART
7107 ; GFX90A-NEXT: ; use s8
7108 ; GFX90A-NEXT: ;;#ASMEND
7109 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7111 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_7:
7113 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7114 ; GFX940-NEXT: ;;#ASMSTART
7115 ; GFX940-NEXT: ; def s[0:1]
7116 ; GFX940-NEXT: ;;#ASMEND
7117 ; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1
7118 ; GFX940-NEXT: ;;#ASMSTART
7119 ; GFX940-NEXT: ; use s8
7120 ; GFX940-NEXT: ;;#ASMEND
7121 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7122 %vec0 = call <4 x i16> asm "; def $0", "=s"()
7123 %vec1 = call <4 x i16> asm "; def $0", "=s"()
7124 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 4, i32 7>
7125 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
7129 define void @s_shuffle_v2i16_v4i16__5_7() {
7130 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__5_7:
7132 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7133 ; GFX900-NEXT: ;;#ASMSTART
7134 ; GFX900-NEXT: ; def s[4:5]
7135 ; GFX900-NEXT: ;;#ASMEND
7136 ; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5
7137 ; GFX900-NEXT: ;;#ASMSTART
7138 ; GFX900-NEXT: ; use s8
7139 ; GFX900-NEXT: ;;#ASMEND
7140 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7142 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__5_7:
7144 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7145 ; GFX90A-NEXT: ;;#ASMSTART
7146 ; GFX90A-NEXT: ; def s[4:5]
7147 ; GFX90A-NEXT: ;;#ASMEND
7148 ; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5
7149 ; GFX90A-NEXT: ;;#ASMSTART
7150 ; GFX90A-NEXT: ; use s8
7151 ; GFX90A-NEXT: ;;#ASMEND
7152 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7154 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_7:
7156 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7157 ; GFX940-NEXT: ;;#ASMSTART
7158 ; GFX940-NEXT: ; def s[0:1]
7159 ; GFX940-NEXT: ;;#ASMEND
7160 ; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1
7161 ; GFX940-NEXT: ;;#ASMSTART
7162 ; GFX940-NEXT: ; use s8
7163 ; GFX940-NEXT: ;;#ASMEND
7164 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7165 %vec0 = call <4 x i16> asm "; def $0", "=s"()
7166 %vec1 = call <4 x i16> asm "; def $0", "=s"()
7167 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 5, i32 7>
7168 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
7172 define void @s_shuffle_v2i16_v4i16__6_7() {
7173 ; GFX900-LABEL: s_shuffle_v2i16_v4i16__6_7:
7175 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7176 ; GFX900-NEXT: ;;#ASMSTART
7177 ; GFX900-NEXT: ; def s[4:5]
7178 ; GFX900-NEXT: ;;#ASMEND
7179 ; GFX900-NEXT: s_mov_b32 s8, s5
7180 ; GFX900-NEXT: ;;#ASMSTART
7181 ; GFX900-NEXT: ; use s8
7182 ; GFX900-NEXT: ;;#ASMEND
7183 ; GFX900-NEXT: s_setpc_b64 s[30:31]
7185 ; GFX90A-LABEL: s_shuffle_v2i16_v4i16__6_7:
7187 ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7188 ; GFX90A-NEXT: ;;#ASMSTART
7189 ; GFX90A-NEXT: ; def s[4:5]
7190 ; GFX90A-NEXT: ;;#ASMEND
7191 ; GFX90A-NEXT: s_mov_b32 s8, s5
7192 ; GFX90A-NEXT: ;;#ASMSTART
7193 ; GFX90A-NEXT: ; use s8
7194 ; GFX90A-NEXT: ;;#ASMEND
7195 ; GFX90A-NEXT: s_setpc_b64 s[30:31]
7197 ; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_7:
7199 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7200 ; GFX940-NEXT: ;;#ASMSTART
7201 ; GFX940-NEXT: ; def s[0:1]
7202 ; GFX940-NEXT: ;;#ASMEND
7203 ; GFX940-NEXT: s_mov_b32 s8, s1
7204 ; GFX940-NEXT: ;;#ASMSTART
7205 ; GFX940-NEXT: ; use s8
7206 ; GFX940-NEXT: ;;#ASMEND
7207 ; GFX940-NEXT: s_setpc_b64 s[30:31]
7208 %vec0 = call <4 x i16> asm "; def $0", "=s"()
7209 %vec1 = call <4 x i16> asm "; def $0", "=s"()
7210 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> <i32 6, i32 7>
7211 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf)
7214 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
7215 ; GFX90APLUS: {{.*}}